app/vmselect/promql: return expected results from increase() over the beginning of time series, which start from big value

Examples for such counters: OS-level counters for network or cpu stats.
app/victoria-metrics: check for error arg passed to filepath.Walk callback
2026-06-09 11:54:31 +03:00 · 2020-01-28 16:30:11 +02:00 · 2020-01-27 20:56:45 +02:00 · 2020-01-27 20:25:28 +02:00 · 2020-01-27 18:44:21 +02:00 · 2020-01-27 18:38:22 +02:00
400 changed files with 26582 additions and 8523 deletions
--- a/2
+++ b/2
@@ -175,7 +175,7 @@

   END OF TERMS AND CONDITIONS

-   Copyright 2019 VictoriaMetrics, Inc.
+   Copyright 2019-2020 VictoriaMetrics, Inc.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
--- a/8
+++ b/8
@@ -79,16 +79,16 @@ install-errcheck:
 check-all: fmt vet lint errcheck golangci-lint

 test:
-	GO111MODULE=on go test -tags=integration -mod=vendor ./lib/... ./app/...
+	GO111MODULE=on go test -mod=vendor ./lib/... ./app/...

 test-pure:
-	GO111MODULE=on CGO_ENABLED=0 go test -tags=integration -mod=vendor ./lib/... ./app/...
+	GO111MODULE=on CGO_ENABLED=0 go test -mod=vendor ./lib/... ./app/...

 test-full:
-	GO111MODULE=on go test -tags=integration -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
+	GO111MODULE=on go test -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...

 test-full-386:
-	GO111MODULE=on GOARCH=386 go test -tags=integration -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
+	GO111MODULE=on GOARCH=386 go test -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...

 benchmark:
 	GO111MODULE=on go test -mod=vendor -bench=. ./lib/...
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
 [![Latest Release](https://img.shields.io/github/release/VictoriaMetrics/VictoriaMetrics.svg?style=flat-square)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
+[![Docker Pulls](https://img.shields.io/docker/pulls/victoriametrics/victoria-metrics.svg?maxAge=604800)](https://hub.docker.com/r/victoriametrics/victoria-metrics)
 [![Slack](https://img.shields.io/badge/join%20slack-%23victoriametrics-brightgreen.svg)](http://slack.victoriametrics.com/)
 [![GitHub license](https://img.shields.io/github/license/VictoriaMetrics/VictoriaMetrics.svg)](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
 [![Go Report](https://goreportcard.com/badge/github.com/VictoriaMetrics/VictoriaMetrics)](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
@@ -7,20 +8,29 @@

 <img alt="Victoria Metrics" src="logo.png">

-## Single-node VictoriaMetrics
+## VictoriaMetrics

 VictoriaMetrics is fast, cost-effective and scalable time-series database. It can be used as long-term remote storage for Prometheus.
 It is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
 [docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and
-in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
+in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics). Just download VictoriaMetrics and see [how to start it](#how-to-start-victoriametrics).

 Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).


+## Case studies and talks
+
+* [Adidas](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#adidas)
+* [COLOPL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#colopl)
+* [Wix.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wixcom)
+* [Wedos.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wedoscom)
+* [Dreamteam](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#dreamteam)
+
+
 ## Prominent features

 * Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
-  Additionally, VictoriaMetrics extends PromQL with opt-in [useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL).
+  VictoriaMetrics implements [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL) query language, which is inspired by PromQL.
 * Supports global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query.
 * High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
  and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
@@ -30,9 +40,11 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
 * High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
  may be crammed into limited storage comparing to TimescaleDB.
 * Optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
-* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, Uber M3, Cortex, InfluxDB or TimescaleDB.
-  See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae)
-  and [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
+* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB.
+  See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae),
+  [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
+  and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk
+  from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
 * Easy operation:
  * VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d) without external dependencies.
  * All the configuration is done via explicit command-line flags with reasonable defaults.
@@ -43,11 +55,12 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
 * Storage is protected from corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
 * Supports metrics' ingestion and [backfilling](#backfilling) via the following protocols:
  * [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
-  * [InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
-  * [Graphite plaintext protocol](https://graphite.readthedocs.io/en/latest/feeding-carbon.html) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
+  * [InfluxDB line protocol](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
+  * [Graphite plaintext protocol](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
    if `-graphiteListenAddr` is set.
-  * [OpenTSDB put message](http://opentsdb.net/docs/build/html/api_telnet/put.html) if `-opentsdbListenAddr` is set.
-  * [HTTP OpenTSDB /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html) if `-opentsdbHTTPListenAddr` is set.
+  * [OpenTSDB put message](#sending-data-via-telnet-put-protocol) if `-opentsdbListenAddr` is set.
+  * [HTTP OpenTSDB /api/put requests](#sending-opentsdb-data-via-http-apiput-requests) if `-opentsdbHTTPListenAddr` is set.
+  * [/api/v1/import](#how-to-import-time-series-data)
 * Ideally works with big amounts of time series data from Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data and various Enterprise workloads.
 * Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).

@@ -66,6 +79,7 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
  - [How to send data from Graphite-compatible agents such as StatsD?](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
  - [Querying Graphite data](#querying-graphite-data)
  - [How to send data from OpenTSDB-compatible agents?](#how-to-send-data-from-opentsdb-compatible-agents)
+  - [Prometheus querying API usage](#prometheus-querying-api-usage)
  - [How to build from sources](#how-to-build-from-sources)
    - [Development build](#development-build)
    - [Production build](#production-build)
@@ -74,13 +88,14 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
    - [Building docker images](#building-docker-images)
  - [Start with docker-compose](#start-with-docker-compose)
  - [Setting up service](#setting-up-service)
-  - [Third-party contributions](#third-party-contributions)
  - [How to work with snapshots?](#how-to-work-with-snapshots)
  - [How to delete time series?](#how-to-delete-time-series)
  - [How to export time series?](#how-to-export-time-series)
+  - [How to import time series data?](#how-to-import-time-series-data)
  - [Federation](#federation)
  - [Capacity planning](#capacity-planning)
  - [High availability](#high-availability)
+  - [Retention](#retention)
  - [Multiple retentions](#multiple-retentions)
  - [Downsampling](#downsampling)
  - [Multi-tenancy](#multi-tenancy)
@@ -96,6 +111,7 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
 - [Roadmap](#roadmap)
 - [Contacts](#contacts)
 - [Community and contributions](#community-and-contributions)
+- [Third-party contributions](#third-party-contributions)
 - [Reporting bugs](#reporting-bugs)
 - [Victoria Metrics Logo](#victoria-metrics-logo)
  - [Logo Usage Guidelines](#logo-usage-guidelines)
@@ -125,14 +141,13 @@ It is recommended setting up [monitoring](#monitoring) for VictoriaMetrics.

 ### Prometheus setup

-Add the following lines to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`):
+Prometheus must be configured with [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) 
+in order to send data to VictoriaMetrics. Add the following lines 
+to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`):

 ```yml
 remote_write:
  - url: http://<victoriametrics-addr>:8428/api/v1/write
-    queue_config:
-      max_samples_per_send: 10000
-      max_shards: 30
 ```

 Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
@@ -159,6 +174,22 @@ This instructs Prometheus to add `datacenter=dc-123` label to each time series s
 The label name may be arbitrary - `datacenter` is just an example. The label value must be unique
 across Prometheus instances, so those time series may be filtered and grouped by this label.

+For highly loaded Prometheus instances (400k+ samples per second)
+the following tuning may be applied:
+```
+remote_write:
+  - url: http://<victoriametrics-addr>:8428/api/v1/write
+    queue_config:
+      max_samples_per_send: 10000
+      capacity: 20000
+      max_shards: 30
+```
+
+Using remote write increases memory usage for Prometheus up to ~25%
+and depends on the shape of data. If you are experiencing issues with
+too high memory consumption try to lower `max_samples_per_send` 
+and `capacity` params (keep in mind that these two params are tightly connected).
+Read more about tuning remote write for Prometheus [here](https://prometheus.io/docs/practices/remote_write).

 It is recommended upgrading Prometheus to [v2.12.0](https://github.com/prometheus/prometheus/releases) or newer,
 since the previous versions may have issues with `remote_write`.
@@ -302,7 +333,7 @@ The `/api/v1/export` endpoint should return the following response:
 ### Querying Graphite data

 Data sent to VictoriaMetrics via `Graphite plaintext protocol` may be read either via
-[Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/)
+[Prometheus querying API](#prometheus-querying-api-usage)
 or via [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).


@@ -382,6 +413,31 @@ The `/api/v1/export` endpoint should return the following response:
 ```


+### Prometheus querying API usage
+
+VictoriaMetrics supports the following handlers from [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/):
+
+* [/api/v1/query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries)
+* [/api/v1/query_range](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries)
+* [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)
+* [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names)
+* [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values)
+
+These handlers can be queried from Prometheus-compatible clients such as Grafana or curl.
+
+VictoriaMetrics accepts additional args for `/api/v1/labels` and `/api/v1/label/.../values` handlers.
+See [this feature request](https://github.com/prometheus/prometheus/issues/6178) for details:
+
+* Any number [time series selectors](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors) via `match[]` query arg.
+* Optional `start` and `end` query args for limiting the time range for the selected labels or label values.
+
+Additionally VictoriaMetrics provides the following handlers:
+
+* `/api/v1/series/count` - it returns the total number of time series in the database. Note that this handler scans all the inverted index,
+  so it can be slow if the database contains tens of millions of time series.
+* `/api/v1/labels/count` - it returns a list of `label: values_count` entries. It can be used for determining labels with the maximum number of values.
+
+
 ### How to build from sources

 We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
@@ -447,11 +503,6 @@ More details may be found [here](https://github.com/VictoriaMetrics/VictoriaMetr
 Read [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/43) on how to set up VictoriaMetrics as a service in your OS.


-### Third-party contributions
-
-* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm))
-
-
 ### How to work with snapshots?

 VictoriaMetrics can create [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
@@ -491,12 +542,26 @@ the deleted time series isn't freed instantly - it is freed during subsequent me
 It is recommended verifying which metrics will be deleted with the call to `http://<victoria-metrics-addr>:8428/api/v1/series?match[]=<timeseries_selector_for_delete>`
 before actually deleting the metrics.

+The delete API is intended mainly for the following cases:
+
+- One-off deleting of accidentally written invalid (or undesired) time series.
+- One-off deleting of user data due to [GDPR](https://en.wikipedia.org/wiki/General_Data_Protection_Regulation).
+
+It isn't recommended using delete API for the following cases, since it brings non-zero overhead:
+
+- Regular cleanups for unneded data. Just prevent writing unneeded data into VictoriaMetrics.
+- Reducing disk space usage by deleting unneded time series. This doesn't work as expected, since the deleted
+  time series occupy disk space until the next merge operation, which can never occur.
+
+It is better using `-retentionPeriod` command-line flag for efficient pruning of old data.
+

 ### How to export time series?

 Send a request to `http://<victoriametrics-addr>:8428/api/v1/export?match[]=<timeseries_selector_for_export>`,
 where `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
-for metrics to export. The response would contain all the data for the selected time series in [JSON streaming format](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON).
+for metrics to export. Use `{__name__!=""}` selector for fetching all the time series.
+The response would contain all the data for the selected time series in [JSON streaming format](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON).
 Each JSON line would contain data for a single time series. An example output:

 ```
@@ -507,6 +572,52 @@ Each JSON line would contain data for a single time series. An example output:
 Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
 unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.

+Pass `Accept-Encoding: gzip` HTTP header in the request to `/api/v1/export` in order to reduce network bandwidth during exporing big amounts
+of time series data. This enables gzip compression for the exported data. Example for exporting gzipped data:
+
+```
+curl -H 'Accept-Encoding: gzip' http://localhost:8428/api/v1/export -d 'match[]={__name__!=""}' > data.jsonl.gz
+```
+
+The maximum duration for each request to `/api/v1/export` is limited by `-search.maxExportDuration` command-line flag.
+
+Exported data can be imported via POST'ing it to [/api/v1/import](#how-to-import-time-series-data).
+
+
+### How to import time series data?
+
+Time series data can be imported via any supported ingestion protocol:
+
+* [Prometheus remote_write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
+* [Influx line protocol](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
+* [Graphite plaintext protocol](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
+* [OpenTSDB telnet put protocol](#sending-data-via-telnet-put-protocol)
+* [OpenTSDB http /api/put](#sending-opentsdb-data-via-http-apiput-requests)
+* `/api/v1/import` http POST handler, which accepts data from [/api/v1/export](#how-to-export-time-series).
+
+The most efficient protocol for importing data into VictoriaMetrics is `/api/v1/import`. Example for importing data obtained via `/api/v1/export`:
+
+```
+# Export the data from <source-victoriametrics>:
+curl http://source-victoriametrics:8428/api/v1/export -d 'match={__name__!=""}' > exported_data.jsonl
+
+# Import the data to <destination-victoriametrics>:
+curl -X POST http://destination-victoriametrics:8428/api/v1/import -T exported_data.jsonl
+```
+
+Pass `Content-Encoding: gzip` HTTP request header to `/api/v1/import` for importing gzipped data:
+
+```
+# Export gzipped data from <source-victoriametrics>:
+curl -H 'Accept-Encoding: gzip' http://source-victoriametrics:8428/api/v1/export -d 'match={__name__!=""}' > exported_data.jsonl.gz
+
+# Import gzipped data to <destination-victoriametrics>:
+curl -X POST -H 'Content-Encoding: gzip' http://destination-victoriametrics:8428/api/v1/import -T exported_data.jsonl.gz
+```
+
+Each request to `/api/v1/import` can load up to a single vCPU core on VictoriaMetrics. Import speed can be improved by splitting the original file into smaller parts
+and importing them concurrently. Note that the original file must be split on newlines.
+

 ### Federation

@@ -527,7 +638,7 @@ A rough estimation of the required resources for ingestion path:
 * RAM size: less than 1KB per active time series. So, ~1GB of RAM is required for 1M active time series.
  Time series is considered active if new data points have been added to it recently or if it has been recently queried.
  The number of active time series may be obtained from `vm_cache_entries{type="storage/hour_metric_ids"}` metric
-  exproted on the `/metrics` page.
+  exported on the `/metrics` page.
  VictoriaMetrics stores various caches in RAM. Memory size for these caches may be limited by `-memory.allowedPercent` flag.

 * CPU cores: a CPU core per 300K inserted data points per second. So, ~4 CPU cores are required for processing
@@ -591,6 +702,16 @@ If you have Prometheus HA pairs with replicas `r1` and `r2` in each pair, then c
 to write data to `victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.


+### Retention
+
+Retention is configured with `-retentionPeriod` command-line flag. For instance, `-retentionPeriod=3` means
+that the data will be stored for 3 months and then deleted.
+Data is split in per-month subdirectories inside `<-storageDataPath>/data/small` and `<-storageDataPath>/data/big` folders.
+Directories for months outside the configured retention are deleted on the first day of new month.
+In order to keep data according to `-retentionPeriod` max disk space usage is going to be `-retentionPeriod` + 1 month.
+For example if `-retentionPeriod` is set to 1, data for January is deleted on March 1st.
+
+
 ### Multiple retentions

 Just start multiple VictoriaMetrics instances with distinct values for the following flags:
@@ -608,7 +729,7 @@ There is no downsampling support at the moment, but:
 - VictoriaMetrics has good compression for on-disk data. See [this article](https://medium.com/@valyala/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932)
  for details.

-These properties reduce the need in downsampling. We plan to implement downsampling in the future.
+These properties reduce the need of downsampling. We plan to implement downsampling in the future.
 See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36) for details.


@@ -630,8 +751,10 @@ horizontally scalable long-term remote storage for really large Prometheus deplo

 ### Alerting

-VictoriaMetrics doesn't support rule evaluation and alerting yet, so these actions must be performed either
-on [Prometheus side](https://prometheus.io/docs/alerting/overview/) or on [Grafana side](https://grafana.com/docs/alerting/rules/).
+VictoriaMetrics doesn't support rule evaluation and alerting yet, so these actions can be performed at the following places:
+* At Prometheus - see [the corresponding docs](https://prometheus.io/docs/alerting/overview/).
+* At Promxy - see [the corresponding docs](https://github.com/jacksontj/promxy/blob/master/README.md#how-do-i-use-alertingrecording-rules-in-promxy).
+* At Grafana - see [the corresponding docs](https://grafana.com/docs/alerting/rules/).


 ### Security
@@ -651,14 +774,14 @@ For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<i

 ### Tuning

-* There is no need in VictoriaMetrics tuning since it uses reasonable defaults for command-line flags,
+* There is no need for VictoriaMetrics tuning since it uses reasonable defaults for command-line flags,
  which are automatically adjusted for the available CPU and RAM resources.
-* There is no need in Operating System tuning since VictoriaMetrics is optimized for default OS settings.
+* There is no need for Operating System tuning since VictoriaMetrics is optimized for default OS settings.
  The only option is increasing the limit on [the number of open files in the OS](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a),
  so Prometheus instances could establish more connections to VictoriaMetrics.
 * The recommended filesystem is `ext4`, the recommended persistent storage is [persistent HDD-based disk on GCP](https://cloud.google.com/compute/docs/disks/#pdspecs),
  since it is protected from hardware failures via internal replication and it can be [resized on the fly](https://cloud.google.com/compute/docs/disks/add-persistent-disk#resize_pd).
-  If you plan storing more than 1TB of data on `ext4` partition or plan extending it to more than 16TB,
+  If you plan to store more than 1TB of data on `ext4` partition or plan extending it to more than 16TB,
  then the following options are recommended to pass to `mkfs.ext4`:

 ```
@@ -668,9 +791,12 @@ mkfs.ext4 ... -O 64bit,huge_file,extent -T huge

 ### Monitoring

-VictoriaMetrics exports internal metrics in Prometheus format on the `/metrics` page.
-Add this page to Prometheus' scrape config in order to collect VictoriaMetrics metrics.
-There is [an official Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229).
+VictoriaMetrics exports internal metrics in Prometheus format at `/metrics` page.
+These metrics may be collected either via Prometheus by adding the corresponding scrape config to it.
+Alternatively they can be self-scraped by setting `-selfScrapeInterval` command-line flag to duration greater than 0.
+For example, `-scrapeInterval=10s` would enable self-scraping of `/metrics` page with 10 seconds interval.
+
+There are officials Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229) and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176).

 The most interesting metrics are:

@@ -689,7 +815,7 @@ The most interesting metrics are:
 ### Troubleshooting

 * It is recommended to use default command-line flag values (i.e. don't set them explicitly) until the need
-  in tweaking these flag values arises.
+  of tweaking these flag values arises.

 * If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
  then it is likely you have too many active time series for the current amount of RAM.
@@ -703,13 +829,14 @@ The most interesting metrics are:
  has at least 20% of free space comparing to disk size.

 * If VictoriaMetrics doesn't work because of certain parts are corrupted due to disk errors,
-  then just remove directoreis with broken parts. This will recover VictoriaMetrics at the cost
+  then just remove directories with broken parts. This will recover VictoriaMetrics at the cost
  of data loss stored in the broken parts. In the future, `vmrecover` tool will be created
  for automatic recovering from such errors.


 ### Backfilling

+VictoriaMetrics accepts historical data in arbitrary order of time.
 Make sure that configured `-retentionPeriod` covers timestamps for the backfilled data.

 It is recommended disabling query cache with `-search.disableCache` command-line flag when writing
@@ -754,7 +881,7 @@ The collected profiles may be analyzed with [go tool pprof](https://github.com/g
 - [ ] CLI tool for data migration, re-balancing and adding/removing nodes [#103](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/103)


-The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment any item or add own one.
+The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment on any item or add you own one.


 ## Contacts
@@ -767,6 +894,7 @@ Contact us with any questions regarding VictoriaMetrics at [info@victoriametrics
 Feel free asking any questions regarding VictoriaMetrics:

 - [slack](http://slack.victoriametrics.com/)
+- [reddit](https://www.reddit.com/r/VictoriaMetrics/)
 - [telegram-en](https://t.me/VictoriaMetrics_en)
 - [telegram-ru](https://t.me/VictoriaMetrics_ru1)
 - [google groups](https://groups.google.com/forum/#!forum/victorametrics-users)
@@ -790,6 +918,13 @@ We are open to third-party pull requests provided they follow [KISS design princ
 Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people.


+### Third-party contributions
+
+* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm))
+* [Prometheus -> VictoriaMetrics exporter #1](https://github.com/ryotarai/prometheus-tsdb-dump)
+* [Prometheus -> VictoriaMetrics exporter #2](https://github.com/AnchorFree/tsdb-remote-write)
+
+
 ## Reporting bugs

 Report bugs and propose new features [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
@@ -797,7 +932,7 @@ Report bugs and propose new features [here](https://github.com/VictoriaMetrics/V

 ## Victoria Metrics Logo

-[Zip](VM_logo.zip) contains three folders with different image orientation (main color and inverted version).
+[Zip](VM_logo.zip) contains three folders with different image orientations (main color and inverted version).

 Files included in each folder:

--- a/app/victoria-metrics/Makefile
+++ b/app/victoria-metrics/Makefile
@@ -6,9 +6,44 @@ victoria-metrics:
 victoria-metrics-prod:
 	APP_NAME=victoria-metrics $(MAKE) app-via-docker

+victoria-metrics-pure-prod:
+	APP_NAME=victoria-metrics $(MAKE) app-via-docker-pure
+
+victoria-metrics-amd64-prod:
+	APP_NAME=victoria-metrics $(MAKE) app-via-docker-amd64
+
+victoria-metrics-arm-prod:
+	APP_NAME=victoria-metrics $(MAKE) app-via-docker-arm
+
+victoria-metrics-arm64-prod:
+	APP_NAME=victoria-metrics $(MAKE) app-via-docker-arm64
+
+victoria-metrics-ppc64le-prod:
+	APP_NAME=victoria-metrics $(MAKE) app-via-docker-ppc64le
+
+victoria-metrics-386-prod:
+	APP_NAME=victoria-metrics $(MAKE) app-via-docker-386
+
 package-victoria-metrics:
-	APP_NAME=victoria-metrics \
-	$(MAKE) package-via-docker
+	APP_NAME=victoria-metrics $(MAKE) package-via-docker
+
+package-victoria-metrics-pure:
+	APP_NAME=victoria-metrics $(MAKE) package-via-docker-pure
+
+package-victoria-metrics-amd64:
+	APP_NAME=victoria-metrics $(MAKE) package-via-docker-amd64
+
+package-victoria-metrics-arm:
+	APP_NAME=victoria-metrics $(MAKE) package-via-docker-arm
+
+package-victoria-metrics-arm64:
+	APP_NAME=victoria-metrics $(MAKE) package-via-docker-arm64
+
+package-victoria-metrics-ppc64le:
+	APP_NAME=victoria-metrics $(MAKE) package-via-docker-ppc64le
+
+package-victoria-metrics-386:
+	APP_NAME=victoria-metrics $(MAKE) package-via-docker-386

 publish-victoria-metrics:
 	APP_NAME=victoria-metrics $(MAKE) publish-via-docker
@@ -20,36 +55,24 @@ run-victoria-metrics:
 	ARGS='-graphiteListenAddr=:2003 -opentsdbListenAddr=:4242 -retentionPeriod=12 -search.maxUniqueTimeseries=1000000 -search.maxQueryDuration=10m' \
 	$(MAKE) run-via-docker

+victoria-metrics-amd64:
+	CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/victoria-metrics-amd64 ./app/victoria-metrics
+
 victoria-metrics-arm:
 	CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/victoria-metrics-arm ./app/victoria-metrics

-victoria-metrics-arm-prod:
-	APP_NAME=victoria-metrics APP_SUFFIX='-arm' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=arm' $(MAKE) app-via-docker
-
 victoria-metrics-arm64:
 	CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/victoria-metrics-arm64 ./app/victoria-metrics

-victoria-metrics-arm64-prod:
-	APP_NAME=victoria-metrics APP_SUFFIX='-arm64' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=arm64' $(MAKE) app-via-docker
-
 victoria-metrics-ppc64le:
 	CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/victoria-metrics-ppc64le ./app/victoria-metrics

-victoria-metrics-ppc64le-prod:
-	APP_NAME=victoria-metrics APP_SUFFIX='-ppc64le' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=ppc64le' $(MAKE) app-via-docker
-
 victoria-metrics-386:
 	CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/victoria-metrics-386 ./app/victoria-metrics

-victoria-metrics-386-prod:
-	APP_NAME=victoria-metrics APP_SUFFIX='-386' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=386' $(MAKE) app-via-docker
-
 victoria-metrics-pure:
 	APP_NAME=victoria-metrics $(MAKE) app-local-pure

-victoria-metrics-pure-prod:
-	APP_NAME=victoria-metrics APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
-
 ### Packaging as DEB - amd64
 victoria-metrics-package-deb: victoria-metrics-prod
 	./package/package_deb.sh amd64
--- a/app/victoria-metrics/deployment/Dockerfile
+++ b/app/victoria-metrics/deployment/Dockerfile
@@ -1,5 +1,8 @@
+ARG certs_image
+FROM $certs_image AS certs
 FROM scratch
-COPY --from=local/certs:1.0.3 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
-COPY bin/victoria-metrics-prod .
+COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+ARG src_binary
+COPY $src_binary ./victoria-metrics-prod
 EXPOSE 8428
 ENTRYPOINT ["/victoria-metrics-prod"]
--- a/app/victoria-metrics/main.go
+++ b/app/victoria-metrics/main.go
@@ -21,32 +21,35 @@ func main() {
 	flag.Parse()
 	buildinfo.Init()
 	logger.Init()
-	logger.Infof("starting VictoraMetrics at %q...", *httpListenAddr)
+	logger.Infof("starting VictoriaMetrics at %q...", *httpListenAddr)
 	startTime := time.Now()
 	vmstorage.Init()
 	vmselect.Init()
 	vminsert.Init()
+	startSelfScraper()

 	go httpserver.Serve(*httpListenAddr, requestHandler)
-	logger.Infof("started VictoriaMetrics in %s", time.Since(startTime))
+	logger.Infof("started VictoriaMetrics in %.3f seconds", time.Since(startTime).Seconds())

 	sig := procutil.WaitForSigterm()
 	logger.Infof("received signal %s", sig)

+	stopSelfScraper()
+
 	logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
 	startTime = time.Now()
 	if err := httpserver.Stop(*httpListenAddr); err != nil {
 		logger.Fatalf("cannot stop the webservice: %s", err)
 	}
 	vminsert.Stop()
-	logger.Infof("successfully shut down the webservice in %s", time.Since(startTime))
+	logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())

 	vmstorage.Stop()
 	vmselect.Stop()

 	fs.MustStopDirRemover()

-	logger.Infof("the VictoriaMetrics has been stopped in %s", time.Since(startTime))
+	logger.Infof("the VictoriaMetrics has been stopped in %.3f seconds", time.Since(startTime).Seconds())
 }

 func requestHandler(w http.ResponseWriter, r *http.Request) bool {
--- a/app/victoria-metrics/main_test.go
+++ b/app/victoria-metrics/main_test.go
@@ -1,5 +1,3 @@
-// +build integration
-
 package main

 import (
@@ -302,6 +300,9 @@ func readIn(readFor string, t *testing.T, insertTime time.Time) []test {
 	s := newSuite(t)
 	var tt []test
 	s.noError(filepath.Walk(filepath.Join(testFixturesDir, readFor), func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
 		if filepath.Ext(path) != ".json" {
 			return nil
 		}
--- a/app/victoria-metrics/self_scraper.go
+++ b/app/victoria-metrics/self_scraper.go
@@ -0,0 +1,99 @@
+package main
+
+import (
+	"flag"
+	"sync"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+)
+
+var selfScrapeInterval = flag.Duration("selfScrapeInterval", 0, "Interval for self-scraping own metrics at `/metrics` page")
+
+var selfScraperStopCh chan struct{}
+var selfScraperWG sync.WaitGroup
+
+func startSelfScraper() {
+	selfScraperStopCh = make(chan struct{})
+	selfScraperWG.Add(1)
+	go func() {
+		defer selfScraperWG.Done()
+		selfScraper(*selfScrapeInterval)
+	}()
+}
+
+func stopSelfScraper() {
+	close(selfScraperStopCh)
+	selfScraperWG.Wait()
+}
+
+func selfScraper(scrapeInterval time.Duration) {
+	if scrapeInterval <= 0 {
+		// Self-scrape is disabled.
+		return
+	}
+	logger.Infof("started self-scraping `/metrics` page with interval %.3f seconds", scrapeInterval.Seconds())
+
+	var bb bytesutil.ByteBuffer
+	var rows prometheus.Rows
+	var mrs []storage.MetricRow
+	var labels []prompb.Label
+	t := time.NewTicker(scrapeInterval)
+	var currentTimestamp int64
+	for {
+		select {
+		case <-selfScraperStopCh:
+			t.Stop()
+			logger.Infof("stopped self-scraping `/metrics` page")
+			return
+		case currentTime := <-t.C:
+			currentTimestamp = currentTime.UnixNano() / 1e6
+		}
+		bb.Reset()
+		httpserver.WritePrometheusMetrics(&bb)
+		s := bytesutil.ToUnsafeString(bb.B)
+		rows.Reset()
+		rows.Unmarshal(s)
+		mrs = mrs[:0]
+		for i := range rows.Rows {
+			r := &rows.Rows[i]
+			labels = labels[:0]
+			labels = addLabel(labels, "", r.Metric)
+			labels = addLabel(labels, "job", "victoria-metrics")
+			labels = addLabel(labels, "instance", "self")
+			for j := range r.Tags {
+				t := &r.Tags[j]
+				labels = addLabel(labels, t.Key, t.Value)
+			}
+			if len(mrs) < cap(mrs) {
+				mrs = mrs[:len(mrs)+1]
+			} else {
+				mrs = append(mrs, storage.MetricRow{})
+			}
+			mr := &mrs[len(mrs)-1]
+			mr.MetricNameRaw = storage.MarshalMetricNameRaw(mr.MetricNameRaw[:0], labels)
+			mr.Timestamp = currentTimestamp
+			mr.Value = r.Value
+		}
+		logger.Infof("writing %d rows at timestamp %d", len(mrs), currentTimestamp)
+		vmstorage.AddRows(mrs)
+	}
+}
+
+func addLabel(dst []prompb.Label, key, value string) []prompb.Label {
+	if len(dst) < cap(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, prompb.Label{})
+	}
+	lb := &dst[len(dst)-1]
+	lb.Name = bytesutil.ToUnsafeBytes(key)
+	lb.Value = bytesutil.ToUnsafeBytes(value)
+	return dst
+}
--- a/app/victoria-metrics/test/prom_types.go
+++ b/app/victoria-metrics/test/prom_types.go
@@ -1,18 +1,18 @@
-// +build integration
-
-// Source https://github.com/prometheus/prometheus/blob/master/prompb/remote.pb.go . Code is copy pasted and cleaned up
 package test

+// Source https://github.com/prometheus/prometheus/blob/master/prompb/remote.pb.go . Code is copy pasted and cleaned up
 import (
 	"encoding/binary"
 	"math"
 	"math/bits"
 )

+// WriteRequest is write request
 type WriteRequest struct {
 	Timeseries []TimeSeries `protobuf:"bytes,1,rep,name=timeseries,proto3" json:"timeseries"`
 }

+// Size returns m size in bytes after marshaling.
 func (m *WriteRequest) Size() (n int) {
 	if m == nil {
 		return 0
@@ -31,6 +31,7 @@ func sovRemote(x uint64) (n int) {
 	return (bits.Len64(x|1) + 6) / 7
 }

+// Marshal marshals m.
 func (m *WriteRequest) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
@@ -41,11 +42,13 @@ func (m *WriteRequest) Marshal() (dAtA []byte, err error) {
 	return dAtA[:n], nil
 }

+// MarshalTo marshals m to dAtA
 func (m *WriteRequest) MarshalTo(dAtA []byte) (int, error) {
 	size := m.Size()
 	return m.MarshalToSizedBuffer(dAtA[:size])
 }

+// MarshalToSizedBuffer marshals m to dAtA.
 func (m *WriteRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
 	i := len(dAtA)
 	if len(m.Timeseries) > 0 {
@@ -77,11 +80,13 @@ func encodeVarintRemote(dAtA []byte, offset int, v uint64) int {
 	return base
 }

+// Sample is time series sample.
 type Sample struct {
 	Value     float64 `protobuf:"fixed64,1,opt,name=value,proto3" json:"value,omitempty"`
 	Timestamp int64   `protobuf:"varint,2,opt,name=timestamp,proto3" json:"timestamp,omitempty"`
 }

+// Reset resets m.
 func (m *Sample) Reset() { *m = Sample{} }

 // TimeSeries represents samples and labels for a single time series.
@@ -90,21 +95,27 @@ type TimeSeries struct {
 	Samples []Sample `protobuf:"bytes,2,rep,name=samples,proto3" json:"samples"`
 }

+// Reset resets m.
 func (m *TimeSeries) Reset() { *m = TimeSeries{} }

+// Label is time series label.
 type Label struct {
 	Name  string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
 	Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"`
 }

+// Reset resets m.
 func (m *Label) Reset() { *m = Label{} }

+// Labels is a set of labels.
 type Labels struct {
 	Labels []Label `protobuf:"bytes,1,rep,name=labels,proto3" json:"labels"`
 }

+// Reset resets m.
 func (m *Labels) Reset() { *m = Labels{} }

+// Marshal marshals m.
 func (m *Sample) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
@@ -115,11 +126,13 @@ func (m *Sample) Marshal() (dAtA []byte, err error) {
 	return dAtA[:n], nil
 }

+// MarshalTo marshals m to dAtA.
 func (m *Sample) MarshalTo(dAtA []byte) (int, error) {
 	size := m.Size()
 	return m.MarshalToSizedBuffer(dAtA[:size])
 }

+// MarshalToSizedBuffer marshals m to dAtA.
 func (m *Sample) MarshalToSizedBuffer(dAtA []byte) (int, error) {
 	i := len(dAtA)
 	if m.Timestamp != 0 {
@@ -136,6 +149,7 @@ func (m *Sample) MarshalToSizedBuffer(dAtA []byte) (int, error) {
 	return len(dAtA) - i, nil
 }

+// Marshal marshals m.
 func (m *TimeSeries) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
@@ -146,11 +160,13 @@ func (m *TimeSeries) Marshal() (dAtA []byte, err error) {
 	return dAtA[:n], nil
 }

+// MarshalTo marshals m to dAtA.
 func (m *TimeSeries) MarshalTo(dAtA []byte) (int, error) {
 	size := m.Size()
 	return m.MarshalToSizedBuffer(dAtA[:size])
 }

+// MarshalToSizedBuffer marshals m to dAtA.
 func (m *TimeSeries) MarshalToSizedBuffer(dAtA []byte) (int, error) {
 	i := len(dAtA)
 	if len(m.Samples) > 0 {
@@ -184,6 +200,7 @@ func (m *TimeSeries) MarshalToSizedBuffer(dAtA []byte) (int, error) {
 	return len(dAtA) - i, nil
 }

+// Marshal marshals m.
 func (m *Label) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
@@ -194,11 +211,13 @@ func (m *Label) Marshal() (dAtA []byte, err error) {
 	return dAtA[:n], nil
 }

+// MarshalTo marshals m to dAtA.
 func (m *Label) MarshalTo(dAtA []byte) (int, error) {
 	size := m.Size()
 	return m.MarshalToSizedBuffer(dAtA[:size])
 }

+// MarshalToSizedBuffer marshals m to dAtA.
 func (m *Label) MarshalToSizedBuffer(dAtA []byte) (int, error) {
 	i := len(dAtA)
 	_ = i
@@ -221,6 +240,7 @@ func (m *Label) MarshalToSizedBuffer(dAtA []byte) (int, error) {
 	return len(dAtA) - i, nil
 }

+// Marshal marshals m.
 func (m *Labels) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
@@ -231,11 +251,13 @@ func (m *Labels) Marshal() (dAtA []byte, err error) {
 	return dAtA[:n], nil
 }

+// MarshalTo marshals m to dAtA.
 func (m *Labels) MarshalTo(dAtA []byte) (int, error) {
 	size := m.Size()
 	return m.MarshalToSizedBuffer(dAtA[:size])
 }

+// MarshalToSizedBuffer marshals m to dAtA.
 func (m *Labels) MarshalToSizedBuffer(dAtA []byte) (int, error) {
 	i := len(dAtA)
 	if len(m.Labels) > 0 {
@@ -267,6 +289,7 @@ func encodeVarintTypes(dAtA []byte, offset int, v uint64) int {
 	return base
 }

+// Size returns the size of marshaled m.
 func (m *Sample) Size() (n int) {
 	if m == nil {
 		return 0
@@ -280,6 +303,7 @@ func (m *Sample) Size() (n int) {
 	return n
 }

+// Size returns the size of marshaled m.
 func (m *TimeSeries) Size() (n int) {
 	if m == nil {
 		return 0
@@ -301,6 +325,7 @@ func (m *TimeSeries) Size() (n int) {
 	return n
 }

+// Size returns the size of marshaled m.
 func (m *Label) Size() (n int) {
 	if m == nil {
 		return 0
@@ -318,6 +343,7 @@ func (m *Label) Size() (n int) {
 	return n
 }

+// Size returns the size of marshaled m.
 func (m *Labels) Size() (n int) {
 	if m == nil {
 		return 0
--- a/app/victoria-metrics/test/prom_writter.go
+++ b/app/victoria-metrics/test/prom_writter.go
@@ -1,9 +1,8 @@
-// +build integration
-
 package test

 import "github.com/golang/snappy"

+// Compress marshals and compresses wr.
 func Compress(wr WriteRequest) ([]byte, error) {
 	data, err := wr.Marshal()
 	if err != nil {
--- a/app/victoria-metrics/testdata/graphite/max_lookback_set.json
+++ b/app/victoria-metrics/testdata/graphite/max_lookback_set.json
@@ -13,11 +13,8 @@
    "data":{"resultType":"matrix",
      "result":[{"metric":{"__name__":"max_lookback_set"},"values":[
 	      ["{TIME_S-150s}","4"],
-	      ["{TIME_S-140s}","4"],
 	      ["{TIME_S-120s}","3"],
-	      ["{TIME_S-110s}","3"],
 	      ["{TIME_S-60s}","2"],
-	      ["{TIME_S-50s}","2"],
 	      ["{TIME_S-30s}","1"],
 	      ["{TIME_S-20s}","1"]
      ]}]}}
--- a/app/victoria-metrics/testdata/graphite/max_lookback_unset.json
+++ b/app/victoria-metrics/testdata/graphite/max_lookback_unset.json
@@ -19,14 +19,12 @@
 	      ["{TIME_S-110s}","3"],
 	      ["{TIME_S-100s}","3"],
 	      ["{TIME_S-90s}","3"],
-	      ["{TIME_S-80s}","3"],
-	      ["{TIME_S-70s}","3"],
 	      ["{TIME_S-60s}","2"],
 	      ["{TIME_S-50s}","2"],
 	      ["{TIME_S-40s}","2"],
 	      ["{TIME_S-30s}","1"],
 	      ["{TIME_S-20s}","1"],
 	      ["{TIME_S-10s}","1"],
-	      ["{TIME_S}","1"]
+	      ["{TIME_S-0s}","1"]
      ]}]}}
 }
--- a/app/vmbackup/Makefile
+++ b/app/vmbackup/Makefile
@@ -6,32 +6,62 @@ vmbackup:
 vmbackup-prod:
 	APP_NAME=vmbackup $(MAKE) app-via-docker

+vmbackup-pure-prod:
+	APP_NAME=vmbackup $(MAKE) app-via-docker-pure
+
+vmbackup-amd64-prod:
+	APP_NAME=vmbackup $(MAKE) app-via-docker-amd64
+
+vmbackup-arm-prod:
+	APP_NAME=vmbackup $(MAKE) app-via-docker-arm
+
+vmbackup-arm64-prod:
+	APP_NAME=vmbackup $(MAKE) app-via-docker-arm64
+
+vmbackup-ppc64le-prod:
+	APP_NAME=vmbackup $(MAKE) app-via-docker-ppc64le
+
+vmbackup-386-prod:
+	APP_NAME=vmbackup $(MAKE) app-via-docker-386
+
 package-vmbackup:
 	APP_NAME=vmbackup $(MAKE) package-via-docker

+package-vmbackup-pure:
+	APP_NAME=vmbackup $(MAKE) package-via-docker-pure
+
+package-vmbackup-amd64:
+	APP_NAME=vmbackup $(MAKE) package-via-docker-amd64
+
+package-vmbackup-arm:
+	APP_NAME=vmbackup $(MAKE) package-via-docker-arm
+
+package-vmbackup-arm64:
+	APP_NAME=vmbackup $(MAKE) package-via-docker-arm64
+
+package-vmbackup-ppc64le:
+	APP_NAME=vmbackup $(MAKE) package-via-docker-ppc64le
+
+package-vmbackup-386:
+	APP_NAME=vmbackup $(MAKE) package-via-docker-386
+
 publish-vmbackup:
 	APP_NAME=vmbackup $(MAKE) publish-via-docker

+vmbackup-pure:
+	APP_NAME=vmbackup $(MAKE) app-local-pure
+
+vmbackup-amd64:
+	CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-amd64 ./app/vmbackup
+
 vmbackup-arm:
 	CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-arm ./app/vmbackup

-vmbackup-arm-prod:
-	APP_NAME=vmbackup APP_SUFFIX='-arm' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=arm' $(MAKE) app-via-docker
-
 vmbackup-arm64:
 	CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-arm64 ./app/vmbackup

-vmbackup-arm64-prod:
-	APP_NAME=vmbackup APP_SUFFIX='-arm64' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=arm64' $(MAKE) app-via-docker
+vmbackup-ppc64le:
+	CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-ppc64le ./app/vmbackup

 vmbackup-386:
 	CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-386 ./app/vmbackup
-
-vmbackup-386-prod:
-	APP_NAME=vmbackup APP_SUFFIX='-386' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=386' $(MAKE) app-via-docker
-
-vmbackup-pure:
-	APP_NAME=vmbackup $(MAKE) app-local-pure
-
-vmbackup-pure-prod:
-	APP_NAME=vmbackup APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
--- a/app/vmbackup/README.md
+++ b/app/vmbackup/README.md
@@ -6,7 +6,7 @@ Supported storage systems for backups:

 * [GCS](https://cloud.google.com/storage/). Example: `gcs://<bucket>/<path/to/backup>`
 * [S3](https://aws.amazon.com/s3/). Example: `s3://<bucket>/<path/to/backup>`
-* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio). See `-customS3Endpoint` command-line flag.
+* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/docs/mimic/radosgw/s3/) or [Swift](https://www.swiftstack.com/docs/admin/middleware/s3_middleware.html). See `-customS3Endpoint` command-line flag.
 * Local filesystem. Example: `fs://</absolute/path/to/backup>`

 Incremental backups and full backups are supported. Incremental backups are created automatically if the destination path already contains data from the previous backup.
--- a/app/vmbackup/deployment/Dockerfile
+++ b/app/vmbackup/deployment/Dockerfile
@@ -1,5 +1,7 @@
+ARG certs_image
+FROM $certs_image AS certs
 FROM scratch
-COPY --from=local/certs:1.0.3 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
-COPY bin/vmbackup-prod .
-EXPOSE 8428
+COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+ARG src_binary
+COPY $src_binary ./vmbackup-prod
 ENTRYPOINT ["/vmbackup-prod"]
--- a/app/vminsert/common/gzip_reader.go
+++ b/app/vminsert/common/gzip_reader.go
@@ -1,9 +1,10 @@
 package common

 import (
-	"compress/gzip"
 	"io"
 	"sync"
+
+	"github.com/klauspost/compress/gzip"
 )

 // GetGzipReader returns new gzip reader from the pool.
--- a/app/vminsert/common/insert_ctx.go
+++ b/app/vminsert/common/insert_ctx.go
@@ -47,7 +47,7 @@ func (ctx *InsertCtx) marshalMetricNameRaw(prefix []byte, labels []prompb.Label)
 	return metricNameRaw[:len(metricNameRaw):len(metricNameRaw)]
 }

-// WriteDataPoint writes (timestamp, value) with the given prefix and lables into ctx buffer.
+// WriteDataPoint writes (timestamp, value) with the given prefix and labels into ctx buffer.
 func (ctx *InsertCtx) WriteDataPoint(prefix []byte, labels []prompb.Label, timestamp int64, value float64) {
 	metricNameRaw := ctx.marshalMetricNameRaw(prefix, labels)
 	ctx.addRow(metricNameRaw, timestamp, value)
@@ -78,6 +78,26 @@ func (ctx *InsertCtx) addRow(metricNameRaw []byte, timestamp int64, value float6
 	mr.Value = value
 }

+// AddLabelBytes adds (name, value) label to ctx.Labels.
+//
+// name and value must exist until ctx.Labels is used.
+func (ctx *InsertCtx) AddLabelBytes(name, value []byte) {
+	labels := ctx.Labels
+	if cap(labels) > len(labels) {
+		labels = labels[:len(labels)+1]
+	} else {
+		labels = append(labels, prompb.Label{})
+	}
+	label := &labels[len(labels)-1]
+
+	// Do not copy name and value contents for performance reasons.
+	// This reduces GC overhead on the number of objects and allocations.
+	label.Name = name
+	label.Value = value
+
+	ctx.Labels = labels
+}
+
 // AddLabel adds (name, value) label to ctx.Labels.
 //
 // name and value must exist until ctx.Labels is used.
--- a/app/vminsert/common/lines_reader.go
+++ b/app/vminsert/common/lines_reader.go
@@ -20,6 +20,17 @@ const defaultBlockSize = 64 * 1024
 //
 // Returns (dstBuf, tailBuf).
 func ReadLinesBlock(r io.Reader, dstBuf, tailBuf []byte) ([]byte, []byte, error) {
+	return ReadLinesBlockExt(r, dstBuf, tailBuf, maxLineSize)
+}
+
+// ReadLinesBlockExt reads a block of lines delimited by '\n' from tailBuf and r into dstBuf.
+//
+// Trailing chars after the last newline are put into tailBuf.
+//
+// Returns (dstBuf, tailBuf).
+//
+// maxLineLen limits the maximum length of a single line.
+func ReadLinesBlockExt(r io.Reader, dstBuf, tailBuf []byte, maxLineLen int) ([]byte, []byte, error) {
 	if cap(dstBuf) < defaultBlockSize {
 		dstBuf = bytesutil.Resize(dstBuf, defaultBlockSize)
 	}
@@ -48,8 +59,8 @@ again:
 	nn := bytes.LastIndexByte(dstBuf[len(dstBuf)-n:], '\n')
 	if nn < 0 {
 		// Didn't found at least a single line.
-		if len(dstBuf) > maxLineSize {
-			return dstBuf, tailBuf, fmt.Errorf("too long line: more than %d bytes", maxLineSize)
+		if len(dstBuf) > maxLineLen {
+			return dstBuf, tailBuf, fmt.Errorf("too long line: more than %d bytes", maxLineLen)
 		}
 		if cap(dstBuf) < 2*len(dstBuf) {
 			// Increase dsbBuf capacity, so more data could be read into it.
--- a/app/vminsert/concurrencylimiter/concurrencylimiter.go
+++ b/app/vminsert/concurrencylimiter/concurrencylimiter.go
@@ -12,17 +12,14 @@ import (
 	"github.com/VictoriaMetrics/metrics"
 )

-var maxConcurrentInserts = flag.Int("maxConcurrentInserts", runtime.GOMAXPROCS(-1)*4, "The maximum number of concurrent inserts")
-
 var (
-	// ch is the channel for limiting concurrent calls to Do.
-	ch chan struct{}
-
-	// waitDuration is the amount of time to wait until at least a single
-	// concurrent Do call out of cap(ch) inserts is complete.
-	waitDuration = time.Second * 30
+	maxConcurrentInserts = flag.Int("maxConcurrentInserts", runtime.GOMAXPROCS(-1)*4, "The maximum number of concurrent inserts; see also `-insert.maxQueueDuration`")
+	maxQueueDuration     = flag.Duration("insert.maxQueueDuration", time.Minute, "The maximum duration for waiting in the queue for insert requests due to `-maxConcurrentInserts`")
 )

+// ch is the channel for limiting concurrent calls to Do.
+var ch chan struct{}
+
 // Init initializes concurrencylimiter.
 //
 // Init must be called after flag.Parse call.
@@ -43,9 +40,9 @@ func Do(f func() error) error {
 	}

 	// All the workers are busy.
-	// Sleep for up to waitDuration.
+	// Sleep for up to *maxQueueDuration.
 	concurrencyLimitReached.Inc()
-	t := timerpool.Get(waitDuration)
+	t := timerpool.Get(*maxQueueDuration)
 	select {
 	case ch <- struct{}{}:
 		timerpool.Put(t)
@@ -56,7 +53,9 @@ func Do(f func() error) error {
 		timerpool.Put(t)
 		concurrencyLimitTimeout.Inc()
 		return &httpserver.ErrorWithStatusCode{
-			Err:        fmt.Errorf("the server is overloaded with %d concurrent inserts; either increase -maxConcurrentInserts or reduce the load", cap(ch)),
+			Err: fmt.Errorf("cannot handle more than %d concurrent inserts during %s; possible solutions: "+
+				"increase `-insert.maxQueueDuration`, increase `-maxConcurrentInserts`, "+
+				"decrease `-search.maxConcurrentRequests`, increase server capacity", *maxConcurrentInserts, *maxQueueDuration),
 			StatusCode: http.StatusServiceUnavailable,
 		}
 	}
--- a/app/vminsert/graphite/request_handler.go
+++ b/app/vminsert/graphite/request_handler.go
@@ -11,6 +11,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
 	"github.com/VictoriaMetrics/metrics"
 )

@@ -61,13 +62,13 @@ func (ctx *pushCtx) InsertRows() error {
 const flushTimeout = 3 * time.Second

 func (ctx *pushCtx) Read(r io.Reader) bool {
-	graphiteReadCalls.Inc()
+	readCalls.Inc()
 	if ctx.err != nil {
 		return false
 	}
 	if c, ok := r.(net.Conn); ok {
 		if err := c.SetReadDeadline(time.Now().Add(flushTimeout)); err != nil {
-			graphiteReadErrors.Inc()
+			readErrors.Inc()
 			ctx.err = fmt.Errorf("cannot set read deadline: %s", err)
 			return false
 		}
@@ -79,7 +80,7 @@ func (ctx *pushCtx) Read(r io.Reader) bool {
 			ctx.err = nil
 		} else {
 			if ctx.err != io.EOF {
-				graphiteReadErrors.Inc()
+				readErrors.Inc()
 				ctx.err = fmt.Errorf("cannot read graphite plaintext protocol data: %s", ctx.err)
 			}
 			return false
@@ -106,7 +107,7 @@ func (ctx *pushCtx) Read(r io.Reader) bool {
 }

 type pushCtx struct {
-	Rows   Rows
+	Rows   graphite.Rows
 	Common common.InsertCtx

 	reqBuf  []byte
@@ -132,8 +133,8 @@ func (ctx *pushCtx) reset() {
 }

 var (
-	graphiteReadCalls  = metrics.NewCounter(`vm_read_calls_total{name="graphite"}`)
-	graphiteReadErrors = metrics.NewCounter(`vm_read_errors_total{name="graphite"}`)
+	readCalls  = metrics.NewCounter(`vm_read_calls_total{name="graphite"}`)
+	readErrors = metrics.NewCounter(`vm_read_errors_total{name="graphite"}`)
 )

 func getPushCtx() *pushCtx {
--- a/app/vminsert/graphite/server.go
+++ b/app/vminsert/graphite/server.go
@@ -21,36 +21,62 @@ var (
 	writeErrorsUDP   = metrics.NewCounter(`vm_graphite_request_errors_total{name="write", net="udp"}`)
 )

-// Serve starts graphite server on the given addr.
-func Serve(addr string) {
+// Server accepts Graphite plaintext lines over TCP and UDP.
+type Server struct {
+	addr  string
+	lnTCP net.Listener
+	lnUDP net.PacketConn
+	wg    sync.WaitGroup
+}
+
+// MustStart starts graphite server on the given addr.
+//
+// MustStop must be called on the returned server when it is no longer needed.
+func MustStart(addr string) *Server {
 	logger.Infof("starting TCP Graphite server at %q", addr)
 	lnTCP, err := netutil.NewTCPListener("graphite", addr)
 	if err != nil {
 		logger.Fatalf("cannot start TCP Graphite server at %q: %s", addr, err)
 	}
-	listenerTCP = lnTCP

 	logger.Infof("starting UDP Graphite server at %q", addr)
 	lnUDP, err := net.ListenPacket("udp4", addr)
 	if err != nil {
 		logger.Fatalf("cannot start UDP Graphite server at %q: %s", addr, err)
 	}
-	listenerUDP = lnUDP

-	var wg sync.WaitGroup
-	wg.Add(1)
+	s := &Server{
+		addr:  addr,
+		lnTCP: lnTCP,
+		lnUDP: lnUDP,
+	}
+	s.wg.Add(1)
 	go func() {
-		defer wg.Done()
-		serveTCP(listenerTCP)
+		defer s.wg.Done()
+		serveTCP(lnTCP)
 		logger.Infof("stopped TCP Graphite server at %q", addr)
 	}()
-	wg.Add(1)
+	s.wg.Add(1)
 	go func() {
-		defer wg.Done()
-		serveUDP(listenerUDP)
+		defer s.wg.Done()
+		serveUDP(lnUDP)
 		logger.Infof("stopped UDP Graphite server at %q", addr)
 	}()
-	wg.Wait()
+	return s
+}
+
+// MustStop stops the server.
+func (s *Server) MustStop() {
+	logger.Infof("stopping TCP Graphite server at %q...", s.addr)
+	if err := s.lnTCP.Close(); err != nil {
+		logger.Errorf("cannot close TCP Graphite server: %s", err)
+	}
+	logger.Infof("stopping UDP Graphite server at %q...", s.addr)
+	if err := s.lnUDP.Close(); err != nil {
+		logger.Errorf("cannot close UDP Graphite server: %s", err)
+	}
+	s.wg.Wait()
+	logger.Infof("TCP and UDP Graphite servers at %q have been stopped", s.addr)
 }

 func serveTCP(ln net.Listener) {
@@ -59,6 +85,7 @@ func serveTCP(ln net.Listener) {
 		if err != nil {
 			if ne, ok := err.(net.Error); ok {
 				if ne.Temporary() {
+					logger.Errorf("graphite: temporary error when listening for TCP addr %q: %s", ln.Addr(), err)
 					time.Sleep(time.Second)
 					continue
 				}
@@ -97,6 +124,7 @@ func serveUDP(ln net.PacketConn) {
 					writeErrorsUDP.Inc()
 					if ne, ok := err.(net.Error); ok {
 						if ne.Temporary() {
+							logger.Errorf("graphite: temporary error when listening for UDP addr %q: %s", ln.LocalAddr(), err)
 							time.Sleep(time.Second)
 							continue
 						}
@@ -119,20 +147,3 @@ func serveUDP(ln net.PacketConn) {
 	}
 	wg.Wait()
 }
-
-var (
-	listenerTCP net.Listener
-	listenerUDP net.PacketConn
-)
-
-// Stop stops the server.
-func Stop() {
-	logger.Infof("stopping TCP Graphite server at %q...", listenerTCP.Addr())
-	if err := listenerTCP.Close(); err != nil {
-		logger.Errorf("cannot close TCP Graphite server: %s", err)
-	}
-	logger.Infof("stopping UDP Graphite server at %q...", listenerUDP.LocalAddr())
-	if err := listenerUDP.Close(); err != nil {
-		logger.Errorf("cannot close UDP Graphite server: %s", err)
-	}
-}
--- a/app/vminsert/influx/request_handler.go
+++ b/app/vminsert/influx/request_handler.go
@@ -12,6 +12,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/influx"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 )
@@ -36,7 +37,7 @@ func InsertHandler(req *http.Request) error {
 }

 func insertHandlerInternal(req *http.Request) error {
-	influxReadCalls.Inc()
+	readCalls.Inc()

 	r := req.Body
 	if req.Header.Get("Content-Encoding") == "gzip" {
@@ -82,7 +83,7 @@ func (ctx *pushCtx) InsertRows(db string) error {
 	rows := ctx.Rows.Rows
 	rowsLen := 0
 	for i := range rows {
-		rowsLen += len(rows[i].Tags)
+		rowsLen += len(rows[i].Fields)
 	}
 	ic := &ctx.Common
 	ic.Reset(rowsLen)
@@ -132,7 +133,7 @@ func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
 	ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlock(r, ctx.reqBuf, ctx.tailBuf)
 	if ctx.err != nil {
 		if ctx.err != io.EOF {
-			influxReadErrors.Inc()
+			readErrors.Inc()
 			ctx.err = fmt.Errorf("cannot read influx line protocol data: %s", ctx.err)
 		}
 		return false
@@ -166,12 +167,12 @@ func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
 }

 var (
-	influxReadCalls  = metrics.NewCounter(`vm_read_calls_total{name="influx"}`)
-	influxReadErrors = metrics.NewCounter(`vm_read_errors_total{name="influx"}`)
+	readCalls  = metrics.NewCounter(`vm_read_calls_total{name="influx"}`)
+	readErrors = metrics.NewCounter(`vm_read_errors_total{name="influx"}`)
 )

 type pushCtx struct {
-	Rows   Rows
+	Rows   influx.Rows
 	Common common.InsertCtx

 	reqBuf         []byte
--- a/app/vminsert/main.go
+++ b/app/vminsert/main.go
@@ -12,45 +12,53 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdb"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdbhttp"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/prometheus"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/vmimport"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 )

 var (
-	graphiteListenAddr     = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
-	opentsdbListenAddr     = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB put messages. Usually :4242 must be set. Doesn't work if empty")
+	graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
+	opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB metrics. "+
+		"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
+		"Usually :4242 must be set. Doesn't work if empty")
 	opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
-	maxInsertRequestSize   = flag.Int("maxInsertRequestSize", 32*1024*1024, "The maximum size of a single insert request in bytes")
 	maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 30, "The maximum number of labels accepted per time series. Superflouos labels are dropped")
 )

+var (
+	graphiteServer     *graphite.Server
+	opentsdbServer     *opentsdb.Server
+	opentsdbhttpServer *opentsdbhttp.Server
+)
+
 // Init initializes vminsert.
 func Init() {
 	storage.SetMaxLabelsPerTimeseries(*maxLabelsPerTimeseries)

 	concurrencylimiter.Init()
 	if len(*graphiteListenAddr) > 0 {
-		go graphite.Serve(*graphiteListenAddr)
+		graphiteServer = graphite.MustStart(*graphiteListenAddr)
 	}
 	if len(*opentsdbListenAddr) > 0 {
-		go opentsdb.Serve(*opentsdbListenAddr)
+		opentsdbServer = opentsdb.MustStart(*opentsdbListenAddr)
 	}
 	if len(*opentsdbHTTPListenAddr) > 0 {
-		go opentsdbhttp.Serve(*opentsdbHTTPListenAddr, int64(*maxInsertRequestSize))
+		opentsdbhttpServer = opentsdbhttp.MustStart(*opentsdbHTTPListenAddr)
 	}
 }

 // Stop stops vminsert.
 func Stop() {
 	if len(*graphiteListenAddr) > 0 {
-		graphite.Stop()
+		graphiteServer.MustStop()
 	}
 	if len(*opentsdbListenAddr) > 0 {
-		opentsdb.Stop()
+		opentsdbServer.MustStop()
 	}
 	if len(*opentsdbHTTPListenAddr) > 0 {
-		opentsdbhttp.Stop()
+		opentsdbhttpServer.MustStop()
 	}
 }

@@ -60,13 +68,22 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 	switch path {
 	case "/api/v1/write":
 		prometheusWriteRequests.Inc()
-		if err := prometheus.InsertHandler(r, int64(*maxInsertRequestSize)); err != nil {
+		if err := prometheus.InsertHandler(r); err != nil {
 			prometheusWriteErrors.Inc()
 			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
 			return true
 		}
 		w.WriteHeader(http.StatusNoContent)
 		return true
+	case "/api/v1/import":
+		vmimportRequests.Inc()
+		if err := vmimport.InsertHandler(r); err != nil {
+			vmimportErrors.Inc()
+			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
+			return true
+		}
+		w.WriteHeader(http.StatusNoContent)
+		return true
 	case "/write", "/api/v2/write":
 		influxWriteRequests.Inc()
 		if err := influx.InsertHandler(r); err != nil {
@@ -92,6 +109,9 @@ var (
 	prometheusWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/write", protocol="prometheus"}`)
 	prometheusWriteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/write", protocol="prometheus"}`)

+	vmimportRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/import", protocol="vm"}`)
+	vmimportErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/import", protocol="vm"}`)
+
 	influxWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/write", protocol="influx"}`)
 	influxWriteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/write", protocol="influx"}`)

--- a/app/vminsert/opentsdb/listener_switch.go
+++ b/app/vminsert/opentsdb/listener_switch.go
@@ -0,0 +1,159 @@
+package opentsdb
+
+import (
+	"io"
+	"net"
+	"sync"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+// listenerSwitch listens for incoming connections and multiplexes them to OpenTSDB http or telnet listeners
+// depending on the first byte in the accepted connection.
+//
+// It is expected that both listeners - http and telnet consume incoming connections as soon as possible.
+type listenerSwitch struct {
+	ln net.Listener
+	wg sync.WaitGroup
+
+	telnetConnsCh chan net.Conn
+	httpConnsCh   chan net.Conn
+
+	closeLock sync.Mutex
+	closed    bool
+	acceptErr error
+	closeErr  error
+}
+
+func newListenerSwitch(ln net.Listener) *listenerSwitch {
+	ls := &listenerSwitch{
+		ln: ln,
+	}
+	ls.telnetConnsCh = make(chan net.Conn)
+	ls.httpConnsCh = make(chan net.Conn)
+	ls.wg.Add(1)
+	go func() {
+		ls.worker()
+		close(ls.telnetConnsCh)
+		close(ls.httpConnsCh)
+		ls.wg.Done()
+	}()
+	return ls
+}
+
+func (ls *listenerSwitch) stop() error {
+	var err error
+	ls.closeLock.Lock()
+	if !ls.closed {
+		err = ls.ln.Close()
+		ls.closeErr = err
+		ls.closed = true
+	}
+	ls.closeLock.Unlock()
+
+	if err == nil {
+		// Wait until worker detects the closed ls.ln and exits.
+		ls.wg.Wait()
+	}
+	return err
+}
+
+func (ls *listenerSwitch) worker() {
+	var buf [1]byte
+	for {
+		c, err := ls.ln.Accept()
+		if err != nil {
+			if ne, ok := err.(net.Error); ok && ne.Temporary() {
+				logger.Infof("listenerSwitch: temporary error at %q: %s; sleeping for a second...", ls.ln.Addr(), err)
+				time.Sleep(time.Second)
+				continue
+			}
+			ls.closeLock.Lock()
+			ls.acceptErr = err
+			ls.closeLock.Unlock()
+			return
+		}
+		if _, err := io.ReadFull(c, buf[:]); err != nil {
+			logger.Errorf("listenerSwitch: cannot read one byte from the underlying connection for %q: %s", ls.ln.Addr(), err)
+			_ = c.Close()
+			continue
+		}
+
+		// It is expected that both listeners - http and telnet consume incoming connections as soon as possible,
+		// so the below code shouldn't block for extended periods of time.
+		pc := &peekedConn{
+			Conn:      c,
+			firstChar: buf[0],
+		}
+		if buf[0] == 'p' {
+			// Assume the request starts with `put`.
+			ls.telnetConnsCh <- pc
+		} else {
+			// Assume the request starts with `POST`.
+			ls.httpConnsCh <- pc
+		}
+	}
+}
+
+type peekedConn struct {
+	net.Conn
+	firstChar     byte
+	firstCharRead bool
+}
+
+func (pc *peekedConn) Read(p []byte) (int, error) {
+	// It is assumed that the pc cannot be read from concurrent goroutines.
+	if pc.firstCharRead {
+		// Fast path - first char already read.
+		return pc.Conn.Read(p)
+	}
+
+	// Slow path - read the first char.
+	if len(p) == 0 {
+		return 0, nil
+	}
+	p[0] = pc.firstChar
+	pc.firstCharRead = true
+	n, err := pc.Conn.Read(p[1:])
+	return n + 1, err
+}
+
+func (ls *listenerSwitch) newTelnetListener() *chanListener {
+	return &chanListener{
+		ls: ls,
+		ch: ls.telnetConnsCh,
+	}
+}
+
+func (ls *listenerSwitch) newHTTPListener() *chanListener {
+	return &chanListener{
+		ls: ls,
+		ch: ls.httpConnsCh,
+	}
+}
+
+type chanListener struct {
+	ls *listenerSwitch
+	ch chan net.Conn
+}
+
+func (cl *chanListener) Accept() (net.Conn, error) {
+	c, ok := <-cl.ch
+	if ok {
+		return c, nil
+	}
+
+	cl.ls.closeLock.Lock()
+	err := cl.ls.acceptErr
+	cl.ls.closeLock.Unlock()
+	return nil, err
+}
+
+func (cl *chanListener) Close() error {
+	return cl.ls.stop()
+}
+
+func (cl *chanListener) Addr() net.Addr {
+	return cl.ls.ln.Addr()
+}
--- a/app/vminsert/opentsdb/request_handler.go
+++ b/app/vminsert/opentsdb/request_handler.go
@@ -11,6 +11,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdb"
 	"github.com/VictoriaMetrics/metrics"
 )

@@ -61,13 +62,13 @@ func (ctx *pushCtx) InsertRows() error {
 const flushTimeout = 3 * time.Second

 func (ctx *pushCtx) Read(r io.Reader) bool {
-	opentsdbReadCalls.Inc()
+	readCalls.Inc()
 	if ctx.err != nil {
 		return false
 	}
 	if c, ok := r.(net.Conn); ok {
 		if err := c.SetReadDeadline(time.Now().Add(flushTimeout)); err != nil {
-			opentsdbReadErrors.Inc()
+			readErrors.Inc()
 			ctx.err = fmt.Errorf("cannot set read deadline: %s", err)
 			return false
 		}
@@ -79,7 +80,7 @@ func (ctx *pushCtx) Read(r io.Reader) bool {
 			ctx.err = nil
 		} else {
 			if ctx.err != io.EOF {
-				opentsdbReadErrors.Inc()
+				readErrors.Inc()
 				ctx.err = fmt.Errorf("cannot read OpenTSDB put protocol data: %s", ctx.err)
 			}
 			return false
@@ -105,7 +106,7 @@ func (ctx *pushCtx) Read(r io.Reader) bool {
 }

 type pushCtx struct {
-	Rows   Rows
+	Rows   opentsdb.Rows
 	Common common.InsertCtx

 	reqBuf  []byte
@@ -131,8 +132,8 @@ func (ctx *pushCtx) reset() {
 }

 var (
-	opentsdbReadCalls  = metrics.NewCounter(`vm_read_calls_total{name="opentsdb"}`)
-	opentsdbReadErrors = metrics.NewCounter(`vm_read_errors_total{name="opentsdb"}`)
+	readCalls  = metrics.NewCounter(`vm_read_calls_total{name="opentsdb"}`)
+	readErrors = metrics.NewCounter(`vm_read_errors_total{name="opentsdb"}`)
 )

 func getPushCtx() *pushCtx {
--- a/app/vminsert/opentsdb/server.go
+++ b/app/vminsert/opentsdb/server.go
@@ -7,6 +7,7 @@ import (
 	"sync"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdbhttp"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
@@ -21,44 +22,91 @@ var (
 	writeErrorsUDP   = metrics.NewCounter(`vm_opentsdb_request_errors_total{name="write", net="udp"}`)
 )

-// Serve starts OpenTSDB collector on the given addr.
-func Serve(addr string) {
+// Server is a server for collecting OpenTSDB TCP and UDP metrics.
+//
+// It accepts simultaneously Telnet put requests and HTTP put requests over TCP.
+type Server struct {
+	addr       string
+	ls         *listenerSwitch
+	httpServer *opentsdbhttp.Server
+	lnUDP      net.PacketConn
+	wg         sync.WaitGroup
+}
+
+// MustStart starts OpenTSDB collector on the given addr.
+//
+// MustStop must be called on the returned server when it is no longer needed.
+func MustStart(addr string) *Server {
 	logger.Infof("starting TCP OpenTSDB collector at %q", addr)
 	lnTCP, err := netutil.NewTCPListener("opentsdb", addr)
 	if err != nil {
 		logger.Fatalf("cannot start TCP OpenTSDB collector at %q: %s", addr, err)
 	}
-	listenerTCP = lnTCP
+	ls := newListenerSwitch(lnTCP)
+	lnHTTP := ls.newHTTPListener()
+	lnTelnet := ls.newTelnetListener()
+	httpServer := opentsdbhttp.MustServe(lnHTTP)

 	logger.Infof("starting UDP OpenTSDB collector at %q", addr)
 	lnUDP, err := net.ListenPacket("udp4", addr)
 	if err != nil {
 		logger.Fatalf("cannot start UDP OpenTSDB collector at %q: %s", addr, err)
 	}
-	listenerUDP = lnUDP

-	var wg sync.WaitGroup
-	wg.Add(1)
+	s := &Server{
+		addr:       addr,
+		ls:         ls,
+		httpServer: httpServer,
+		lnUDP:      lnUDP,
+	}
+	s.wg.Add(1)
 	go func() {
-		defer wg.Done()
-		serveTCP(listenerTCP)
-		logger.Infof("stopped TCP OpenTSDB collector at %q", addr)
+		defer s.wg.Done()
+		serveTelnet(lnTelnet)
+		logger.Infof("stopped TCP telnet OpenTSDB server at %q", addr)
 	}()
-	wg.Add(1)
+	s.wg.Add(1)
 	go func() {
-		defer wg.Done()
-		serveUDP(listenerUDP)
-		logger.Infof("stopped UDP OpenTSDB collector at %q", addr)
+		defer s.wg.Done()
+		httpServer.Wait()
+		// Do not log when httpServer is stopped, since this is logged by the server itself.
 	}()
-	wg.Wait()
+	s.wg.Add(1)
+	go func() {
+		defer s.wg.Done()
+		serveUDP(lnUDP)
+		logger.Infof("stopped UDP OpenTSDB server at %q", addr)
+	}()
+	return s
 }

-func serveTCP(ln net.Listener) {
+// MustStop stops the server.
+func (s *Server) MustStop() {
+	// Stop HTTP server. Do not emit log message, since it is emitted by the httpServer.
+	s.httpServer.MustStop()
+
+	logger.Infof("stopping TCP telnet OpenTSDB server at %q...", s.addr)
+	if err := s.ls.stop(); err != nil {
+		logger.Errorf("cannot stop TCP telnet OpenTSDB server: %s", err)
+	}
+
+	logger.Infof("stopping UDP OpenTSDB server at %q...", s.addr)
+	if err := s.lnUDP.Close(); err != nil {
+		logger.Errorf("cannot stop UDP OpenTSDB server: %s", err)
+	}
+
+	// Wait until all the servers are stopped.
+	s.wg.Wait()
+	logger.Infof("TCP and UDP OpenTSDB servers at %q have been stopped", s.addr)
+}
+
+func serveTelnet(ln net.Listener) {
 	for {
 		c, err := ln.Accept()
 		if err != nil {
 			if ne, ok := err.(net.Error); ok {
 				if ne.Temporary() {
+					logger.Errorf("opentsdb: temporary error when listening for TCP addr %q: %s", ln.Addr(), err)
 					time.Sleep(time.Second)
 					continue
 				}
@@ -97,6 +145,7 @@ func serveUDP(ln net.PacketConn) {
 					writeErrorsUDP.Inc()
 					if ne, ok := err.(net.Error); ok {
 						if ne.Temporary() {
+							logger.Errorf("opentsdb: temporary error when listening for UDP addr %q: %s", ln.LocalAddr(), err)
 							time.Sleep(time.Second)
 							continue
 						}
@@ -119,20 +168,3 @@ func serveUDP(ln net.PacketConn) {
 	}
 	wg.Wait()
 }
-
-var (
-	listenerTCP net.Listener
-	listenerUDP net.PacketConn
-)
-
-// Stop stops the server.
-func Stop() {
-	logger.Infof("stopping TCP OpenTSDB server at %q...", listenerTCP.Addr())
-	if err := listenerTCP.Close(); err != nil {
-		logger.Errorf("cannot close TCP OpenTSDB server: %s", err)
-	}
-	logger.Infof("stopping UDP OpenTSDB server at %q...", listenerUDP.LocalAddr())
-	if err := listenerUDP.Close(); err != nil {
-		logger.Errorf("cannot close UDP OpenTSDB server: %s", err)
-	}
-}
--- a/app/vminsert/opentsdbhttp/request_handler.go
+++ b/app/vminsert/opentsdbhttp/request_handler.go
@@ -1,6 +1,7 @@
 package opentsdbhttp

 import (
+	"flag"
 	"fmt"
 	"io"
 	"net/http"
@@ -11,35 +12,37 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentsdbhttp"
 	"github.com/VictoriaMetrics/metrics"
-	"github.com/valyala/fastjson"
 )

+var maxInsertRequestSize = flag.Int("opentsdbhttp.maxInsertRequestSize", 32*1024*1024, "The maximum size of OpenTSDB HTTP put request")
+
 var (
 	rowsInserted  = metrics.NewCounter(`vm_rows_inserted_total{type="opentsdb-http"}`)
 	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="opentsdb-http"}`)

-	opentsdbReadCalls       = metrics.NewCounter(`vm_read_calls_total{name="opentsdb-http"}`)
-	opentsdbReadErrors      = metrics.NewCounter(`vm_read_errors_total{name="opentsdb-http"}`)
-	opentsdbUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="opentsdb-http"}`)
+	readCalls       = metrics.NewCounter(`vm_read_calls_total{name="opentsdb-http"}`)
+	readErrors      = metrics.NewCounter(`vm_read_errors_total{name="opentsdb-http"}`)
+	unmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="opentsdb-http"}`)
 )

 // insertHandler processes HTTP OpenTSDB put requests.
 // See http://opentsdb.net/docs/build/html/api_http/put.html
-func insertHandler(req *http.Request, maxSize int64) error {
+func insertHandler(req *http.Request) error {
 	return concurrencylimiter.Do(func() error {
-		return insertHandlerInternal(req, maxSize)
+		return insertHandlerInternal(req)
 	})
 }

-func insertHandlerInternal(req *http.Request, maxSize int64) error {
-	opentsdbReadCalls.Inc()
+func insertHandlerInternal(req *http.Request) error {
+	readCalls.Inc()

 	r := req.Body
 	if req.Header.Get("Content-Encoding") == "gzip" {
 		zr, err := common.GetGzipReader(r)
 		if err != nil {
-			opentsdbReadErrors.Inc()
+			readErrors.Inc()
 			return fmt.Errorf("cannot read gzipped http protocol data: %s", err)
 		}
 		defer common.PutGzipReader(zr)
@@ -50,23 +53,23 @@ func insertHandlerInternal(req *http.Request, maxSize int64) error {
 	defer putPushCtx(ctx)

 	// Read the request in ctx.reqBuf
-	lr := io.LimitReader(r, maxSize+1)
+	lr := io.LimitReader(r, int64(*maxInsertRequestSize)+1)
 	reqLen, err := ctx.reqBuf.ReadFrom(lr)
 	if err != nil {
-		opentsdbReadErrors.Inc()
+		readErrors.Inc()
 		return fmt.Errorf("cannot read HTTP OpenTSDB request: %s", err)
 	}
-	if reqLen > maxSize {
-		opentsdbReadErrors.Inc()
-		return fmt.Errorf("too big HTTP OpenTSDB request; mustn't exceed %d bytes", maxSize)
+	if reqLen > int64(*maxInsertRequestSize) {
+		readErrors.Inc()
+		return fmt.Errorf("too big HTTP OpenTSDB request; mustn't exceed `-opentsdbhttp.maxInsertRequestSize=%d` bytes", *maxInsertRequestSize)
 	}

 	// Unmarshal the request to ctx.Rows
-	p := parserPool.Get()
-	defer parserPool.Put(p)
+	p := opentsdbhttp.GetParser()
+	defer opentsdbhttp.PutParser(p)
 	v, err := p.ParseBytes(ctx.reqBuf.B)
 	if err != nil {
-		opentsdbUnmarshalErrors.Inc()
+		unmarshalErrors.Inc()
 		return fmt.Errorf("cannot parse HTTP OpenTSDB json: %s", err)
 	}
 	ctx.Rows.Unmarshal(v)
@@ -110,10 +113,8 @@ func insertHandlerInternal(req *http.Request, maxSize int64) error {

 const secondMask int64 = 0x7FFFFFFF00000000

-var parserPool fastjson.ParserPool
-
 type pushCtx struct {
-	Rows   Rows
+	Rows   opentsdbhttp.Rows
 	Common common.InsertCtx

 	reqBuf bytesutil.ByteBuffer
--- a/app/vminsert/opentsdbhttp/server.go
+++ b/app/vminsert/opentsdbhttp/server.go
@@ -2,11 +2,14 @@ package opentsdbhttp

 import (
 	"context"
+	"net"
 	"net/http"
+	"sync"
 	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
 	"github.com/VictoriaMetrics/metrics"
 )

@@ -15,56 +18,84 @@ var (
 	writeErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/put", protocol="opentsdb-http"}`)
 )

-var (
-	httpServer     *http.Server
-	httpAddr       string
-	maxRequestSize int64
-)
+// Server represents HTTP OpenTSDB server.
+type Server struct {
+	s  *http.Server
+	ln net.Listener
+	wg sync.WaitGroup
+}

-// Serve starts HTTP OpenTSDB server on the given addr.
-func Serve(addr string, maxReqSize int64) {
+// MustStart starts HTTP OpenTSDB server on the given addr.
+//
+// MustStop must be called on the returned server when it is no longer needed.
+func MustStart(addr string) *Server {
 	logger.Infof("starting HTTP OpenTSDB server at %q", addr)
-	httpAddr = addr
-	maxRequestSize = maxReqSize
-	httpServer = &http.Server{
-		Addr:         addr,
-		Handler:      http.HandlerFunc(requestHandler),
+	lnTCP, err := netutil.NewTCPListener("opentsdbhttp", addr)
+	if err != nil {
+		logger.Fatalf("cannot start HTTP OpenTSDB collector at %q: %s", addr, err)
+	}
+	return MustServe(lnTCP)
+}
+
+// MustServe serves OpenTSDB HTTP put requests from ln.
+//
+// MustStop must be called on the returned server when it is no longer needed.
+func MustServe(ln net.Listener) *Server {
+	h := newRequestHandler()
+	hs := &http.Server{
+		Handler:      h,
 		ReadTimeout:  30 * time.Second,
 		WriteTimeout: 10 * time.Second,
 	}
+	s := &Server{
+		s:  hs,
+		ln: ln,
+	}
+	s.wg.Add(1)
 	go func() {
-		err := httpServer.ListenAndServe()
+		defer s.wg.Done()
+		err := s.s.Serve(s.ln)
 		if err == http.ErrServerClosed {
 			return
 		}
 		if err != nil {
-			logger.Fatalf("error serving HTTP OpenTSDB: %s", err)
+			logger.Fatalf("error serving HTTP OpenTSDB at %q: %s", s.ln.Addr(), err)
 		}
 	}()
+	return s
 }

-// requestHandler handles HTTP OpenTSDB insert request.
-func requestHandler(w http.ResponseWriter, r *http.Request) {
-	switch r.URL.Path {
-	case "/api/put":
-		writeRequests.Inc()
-		if err := insertHandler(r, maxRequestSize); err != nil {
-			writeErrors.Inc()
-			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
-			return
-		}
-		w.WriteHeader(http.StatusNoContent)
-	default:
-		httpserver.Errorf(w, "unexpected path requested on HTTP OpenTSDB server: %q", r.URL.Path)
-	}
+// Wait waits until the server is stopped with MustStop.
+func (s *Server) Wait() {
+	s.wg.Wait()
 }

-// Stop stops HTTP OpenTSDB server.
-func Stop() {
-	logger.Infof("stopping HTTP OpenTSDB server at %q...", httpAddr)
+// MustStop stops HTTP OpenTSDB server.
+func (s *Server) MustStop() {
+	logger.Infof("stopping HTTP OpenTSDB server at %q...", s.ln.Addr())
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
-	if err := httpServer.Shutdown(ctx); err != nil {
-		logger.Fatalf("cannot close HTTP OpenTSDB server: %s", err)
+	if err := s.s.Shutdown(ctx); err != nil {
+		logger.Fatalf("cannot close HTTP OpenTSDB server at %q: %s", s.ln.Addr(), err)
 	}
+	s.wg.Wait()
+	logger.Infof("OpenTSDB HTTP server at %q has been stopped", s.ln.Addr())
+}
+
+func newRequestHandler() http.Handler {
+	rh := func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case "/api/put":
+			writeRequests.Inc()
+			if err := insertHandler(r); err != nil {
+				writeErrors.Inc()
+				httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
+				return
+			}
+			w.WriteHeader(http.StatusNoContent)
+		default:
+			httpserver.Errorf(w, "unexpected path requested on HTTP OpenTSDB server: %q", r.URL.Path)
+		}
+	}
+	return http.HandlerFunc(rh)
 }
--- a/app/vminsert/prometheus/request_handler.go
+++ b/app/vminsert/prometheus/request_handler.go
@@ -18,16 +18,16 @@ var (
 )

 // InsertHandler processes remote write for prometheus.
-func InsertHandler(r *http.Request, maxSize int64) error {
+func InsertHandler(r *http.Request) error {
 	return concurrencylimiter.Do(func() error {
-		return insertHandlerInternal(r, maxSize)
+		return insertHandlerInternal(r)
 	})
 }

-func insertHandlerInternal(r *http.Request, maxSize int64) error {
+func insertHandlerInternal(r *http.Request) error {
 	ctx := getPushCtx()
 	defer putPushCtx(ctx)
-	if err := ctx.Read(r, maxSize); err != nil {
+	if err := ctx.Read(r); err != nil {
 		return err
 	}
 	timeseries := ctx.req.Timeseries
@@ -65,11 +65,11 @@ func (ctx *pushCtx) reset() {
 	ctx.reqBuf = ctx.reqBuf[:0]
 }

-func (ctx *pushCtx) Read(r *http.Request, maxSize int64) error {
+func (ctx *pushCtx) Read(r *http.Request) error {
 	prometheusReadCalls.Inc()

 	var err error
-	ctx.reqBuf, err = prompb.ReadSnappy(ctx.reqBuf[:0], r.Body, maxSize)
+	ctx.reqBuf, err = prompb.ReadSnappy(ctx.reqBuf[:0], r.Body)
 	if err != nil {
 		prometheusReadErrors.Inc()
 		return fmt.Errorf("cannot read prompb.WriteRequest: %s", err)
--- a/app/vminsert/vmimport/parser.go
+++ b/app/vminsert/vmimport/parser.go
@@ -0,0 +1,202 @@
+package vmimport
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/fastjson"
+)
+
+// Rows contains parsed rows from `/api/v1/import` request.
+type Rows struct {
+	Rows []Row
+
+	tu tagsUnmarshaler
+}
+
+// Reset resets rs.
+func (rs *Rows) Reset() {
+	for i := range rs.Rows {
+		rs.Rows[i].reset()
+	}
+	rs.Rows = rs.Rows[:0]
+
+	rs.tu.reset()
+}
+
+// Unmarshal unmarshals influx line protocol rows from s.
+//
+// See https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/
+//
+// s must be unchanged until rs is in use.
+func (rs *Rows) Unmarshal(s string) {
+	rs.tu.reset()
+	rs.Rows = unmarshalRows(rs.Rows[:0], s, &rs.tu)
+}
+
+// Row is a single row from `/api/v1/import` request.
+type Row struct {
+	Tags       []Tag
+	Values     []float64
+	Timestamps []int64
+}
+
+func (r *Row) reset() {
+	r.Tags = nil
+	r.Values = r.Values[:0]
+	r.Timestamps = r.Timestamps[:0]
+}
+
+func (r *Row) unmarshal(s string, tu *tagsUnmarshaler) error {
+	r.reset()
+	v, err := tu.p.Parse(s)
+	if err != nil {
+		return fmt.Errorf("cannot parse json line: %s", err)
+	}
+
+	// Unmarshal tags
+	metric := v.GetObject("metric")
+	if metric == nil {
+		return fmt.Errorf("missing `metric` object")
+	}
+	tagsStart := len(tu.tagsPool)
+	if err := tu.unmarshalTags(metric); err != nil {
+		return fmt.Errorf("cannot unmarshal `metric`: %s", err)
+	}
+	tags := tu.tagsPool[tagsStart:]
+	r.Tags = tags[:len(tags):len(tags)]
+	if len(r.Tags) == 0 {
+		return fmt.Errorf("missing tags")
+	}
+
+	// Unmarshal values
+	values := v.GetArray("values")
+	if len(values) == 0 {
+		return fmt.Errorf("missing `values` array")
+	}
+	for i, v := range values {
+		f, err := v.Float64()
+		if err != nil {
+			return fmt.Errorf("cannot unmarshal value at position %d: %s", i, err)
+		}
+		r.Values = append(r.Values, f)
+	}
+
+	// Unmarshal timestamps
+	timestamps := v.GetArray("timestamps")
+	if len(timestamps) == 0 {
+		return fmt.Errorf("missing `timestamps` array")
+	}
+	for i, v := range timestamps {
+		ts, err := v.Int64()
+		if err != nil {
+			return fmt.Errorf("cannot unmarshal timestamp at position %d: %s", i, err)
+		}
+		r.Timestamps = append(r.Timestamps, ts)
+	}
+
+	if len(r.Timestamps) != len(r.Values) {
+		return fmt.Errorf("`timestamps` array size must match `values` array size; got %d; want %d", len(r.Timestamps), len(r.Values))
+	}
+	return nil
+}
+
+// Tag represents `/api/v1/import` tag.
+type Tag struct {
+	Key   []byte
+	Value []byte
+}
+
+func (tag *Tag) reset() {
+	// tag.Key and tag.Value point to tu.bytesPool, so there is no need in keeping these byte slices here.
+	tag.Key = nil
+	tag.Value = nil
+}
+
+type tagsUnmarshaler struct {
+	p         fastjson.Parser
+	tagsPool  []Tag
+	bytesPool []byte
+	err       error
+}
+
+func (tu *tagsUnmarshaler) reset() {
+	for i := range tu.tagsPool {
+		tu.tagsPool[i].reset()
+	}
+	tu.tagsPool = tu.tagsPool[:0]
+
+	tu.bytesPool = tu.bytesPool[:0]
+	tu.err = nil
+}
+
+func (tu *tagsUnmarshaler) addTag() *Tag {
+	dst := tu.tagsPool
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Tag{})
+	}
+	tag := &dst[len(dst)-1]
+	tu.tagsPool = dst
+	return tag
+}
+
+func (tu *tagsUnmarshaler) addBytes(b []byte) []byte {
+	bytesPoolLen := len(tu.bytesPool)
+	tu.bytesPool = append(tu.bytesPool, b...)
+	bCopy := tu.bytesPool[bytesPoolLen:]
+	return bCopy[:len(bCopy):len(bCopy)]
+}
+
+func (tu *tagsUnmarshaler) unmarshalTags(o *fastjson.Object) error {
+	tu.err = nil
+	o.Visit(func(key []byte, v *fastjson.Value) {
+		tag := tu.addTag()
+		tag.Key = tu.addBytes(key)
+		sb, err := v.StringBytes()
+		if err != nil && tu.err != nil {
+			tu.err = fmt.Errorf("cannot parse value for tag %q: %s", tag.Key, err)
+		}
+		tag.Value = tu.addBytes(sb)
+	})
+	return tu.err
+}
+
+func unmarshalRows(dst []Row, s string, tu *tagsUnmarshaler) []Row {
+	for len(s) > 0 {
+		n := strings.IndexByte(s, '\n')
+		if n < 0 {
+			// The last line.
+			return unmarshalRow(dst, s, tu)
+		}
+		dst = unmarshalRow(dst, s[:n], tu)
+		s = s[n+1:]
+	}
+	return dst
+}
+
+func unmarshalRow(dst []Row, s string, tu *tagsUnmarshaler) []Row {
+	if len(s) > 0 && s[len(s)-1] == '\r' {
+		s = s[:len(s)-1]
+	}
+	if len(s) == 0 {
+		return dst
+	}
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	if err := r.unmarshal(s, tu); err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal json line %q: %s; skipping it", s, err)
+		invalidLines.Inc()
+	}
+	return dst
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="vmimport"}`)
--- a/app/vminsert/vmimport/parser_test.go
+++ b/app/vminsert/vmimport/parser_test.go
@@ -0,0 +1,229 @@
+package vmimport
+
+import (
+	"math"
+	"reflect"
+	"testing"
+)
+
+func TestRowsUnmarshalFailure(t *testing.T) {
+	f := func(s string) {
+		t.Helper()
+		var rows Rows
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("expecting zero rows; got %d rows", len(rows.Rows))
+		}
+
+		// Try again
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("expecting zero rows; got %d rows", len(rows.Rows))
+		}
+	}
+
+	// Invalid json line
+	f("")
+	f("\n")
+	f("foo\n")
+	f("123")
+	f("[1,3]")
+	f("{}")
+	f("[]")
+	f(`{"foo":"bar"}`)
+
+	// Invalid metric
+	f(`{"metric":123,"values":[1,2],"timestamps":[3,4]}`)
+	f(`{"metric":[123],"values":[1,2],"timestamps":[3,4]}`)
+	f(`{"metric":[],"values":[1,2],"timestamps":[3,4]}`)
+	f(`{"metric":{},"values":[1,2],"timestamps":[3,4]}`)
+	f(`{"metric":null,"values":[1,2],"timestamps":[3,4]}`)
+	f(`{"values":[1,2],"timestamps":[3,4]}`)
+
+	// Invalid values
+	f(`{"metric":{"foo":"bar"},"values":1,"timestamps":[3,4]}`)
+	f(`{"metric":{"foo":"bar"},"values":{"x":1},"timestamps":[3,4]}`)
+	f(`{"metric":{"foo":"bar"},"values":{"x":1},"timestamps":[3,4]}`)
+	f(`{"metric":{"foo":"bar"},"values":null,"timestamps":[3,4]}`)
+	f(`{"metric":{"foo":"bar"},"timestamps":[3,4]}`)
+
+	// Invalid timestamps
+	f(`{"metric":{"foo":"bar"},"values":[1,2],"timestamps":3}`)
+	f(`{"metric":{"foo":"bar"},"values":[1,2],"timestamps":false}`)
+	f(`{"metric":{"foo":"bar"},"values":[1,2],"timestamps":{}}`)
+	f(`{"metric":{"foo":"bar"},"values":[1,2]}`)
+
+	// values and timestamps count mismatch
+	f(`{"metric":{"foo":"bar"},"values":[],"timestamps":[]}`)
+	f(`{"metric":{"foo":"bar"},"values":[],"timestamps":[1]}`)
+	f(`{"metric":{"foo":"bar"},"values":[2],"timestamps":[]}`)
+	f(`{"metric":{"foo":"bar"},"values":[2],"timestamps":[3,4]}`)
+	f(`{"metric":{"foo":"bar"},"values":[2,3],"timestamps":[4]}`)
+
+	// Garbage after the line
+	f(`{"metric":{"foo":"bar"},"values":[2],"timestamps":[4]}{}`)
+}
+
+func TestRowsUnmarshalSuccess(t *testing.T) {
+	f := func(s string, rowsExpected *Rows) {
+		t.Helper()
+		var rows Rows
+		rows.Unmarshal(s)
+		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
+			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
+		}
+
+		// Try unmarshaling again
+		rows.Unmarshal(s)
+		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
+			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
+		}
+
+		rows.Reset()
+		if len(rows.Rows) != 0 {
+			t.Fatalf("non-empty rows after reset: %+v", rows.Rows)
+		}
+	}
+
+	// Empty line
+	f("", &Rows{})
+	f("\n\n", &Rows{})
+	f("\n\r\n", &Rows{})
+
+	// Single line with a single tag
+	f(`{"metric":{"foo":"bar"},"values":[1.23],"timestamps":[456]}`, &Rows{
+		Rows: []Row{{
+			Tags: []Tag{{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			}},
+			Values:     []float64{1.23},
+			Timestamps: []int64{456},
+		}},
+	})
+
+	// Inf and nan values
+	f(`{"metric":{"foo":"bar"},"values":[Inf, -Inf],"timestamps":[456, 789]}`, &Rows{
+		Rows: []Row{{
+			Tags: []Tag{{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			}},
+			Values:     []float64{math.Inf(1), math.Inf(-1)},
+			Timestamps: []int64{456, 789},
+		}},
+	})
+
+	// Line with multiple tags
+	f(`{"metric":{"foo":"bar","baz":"xx"},"values":[1.23, -3.21],"timestamps" : [456,789]}`, &Rows{
+		Rows: []Row{{
+			Tags: []Tag{
+				{
+					Key:   []byte("foo"),
+					Value: []byte("bar"),
+				},
+				{
+					Key:   []byte("baz"),
+					Value: []byte("xx"),
+				},
+			},
+			Values:     []float64{1.23, -3.21},
+			Timestamps: []int64{456, 789},
+		}},
+	})
+
+	// Multiple lines
+	f(`{"metric":{"foo":"bar","baz":"xx"},"values":[1.23, -3.21],"timestamps" : [456,789]}
+{"metric":{"__name__":"xx"},"values":[34],"timestamps" : [11]}
+`, &Rows{
+		Rows: []Row{
+			{
+				Tags: []Tag{
+					{
+						Key:   []byte("foo"),
+						Value: []byte("bar"),
+					},
+					{
+						Key:   []byte("baz"),
+						Value: []byte("xx"),
+					},
+				},
+				Values:     []float64{1.23, -3.21},
+				Timestamps: []int64{456, 789},
+			},
+			{
+				Tags: []Tag{
+					{
+						Key:   []byte("__name__"),
+						Value: []byte("xx"),
+					},
+				},
+				Values:     []float64{34},
+				Timestamps: []int64{11},
+			},
+		},
+	})
+
+	// Multiple lines with invalid line in the middle.
+	f(`{"metric":{"xfoo":"bar","baz":"xx"},"values":[1.232, -3.21],"timestamps" : [456,7890]}
+garbage here
+{"metric":{"__name__":"xxy"},"values":[34],"timestamps" : [111]}`, &Rows{
+		Rows: []Row{
+			{
+				Tags: []Tag{
+					{
+						Key:   []byte("xfoo"),
+						Value: []byte("bar"),
+					},
+					{
+						Key:   []byte("baz"),
+						Value: []byte("xx"),
+					},
+				},
+				Values:     []float64{1.232, -3.21},
+				Timestamps: []int64{456, 7890},
+			},
+			{
+				Tags: []Tag{
+					{
+						Key:   []byte("__name__"),
+						Value: []byte("xxy"),
+					},
+				},
+				Values:     []float64{34},
+				Timestamps: []int64{111},
+			},
+		},
+	})
+
+	// No newline after the second line.
+	f(`{"metric":{"foo":"bar","baz":"xx"},"values":[1.23, -3.21],"timestamps" : [456,789]}
+{"metric":{"__name__":"xx"},"values":[34],"timestamps" : [11]}`, &Rows{
+		Rows: []Row{
+			{
+				Tags: []Tag{
+					{
+						Key:   []byte("foo"),
+						Value: []byte("bar"),
+					},
+					{
+						Key:   []byte("baz"),
+						Value: []byte("xx"),
+					},
+				},
+				Values:     []float64{1.23, -3.21},
+				Timestamps: []int64{456, 789},
+			},
+			{
+				Tags: []Tag{
+					{
+						Key:   []byte("__name__"),
+						Value: []byte("xx"),
+					},
+				},
+				Values:     []float64{34},
+				Timestamps: []int64{11},
+			},
+		},
+	})
+}
--- a/app/vminsert/vmimport/parser_timing_test.go
+++ b/app/vminsert/vmimport/parser_timing_test.go
@@ -0,0 +1,25 @@
+package vmimport
+
+import (
+	"fmt"
+	"testing"
+)
+
+func BenchmarkRowsUnmarshal(b *testing.B) {
+	s := `{"metric":{"__name__":"up","job":"node_exporter","instance":"localhost:9100"},"values":[0,0,0],"timestamps":[1549891472010,1549891487724,1549891503438]}
+{"metric":{"__name__":"up","job":"prometheus","instance":"localhost:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
+{"metric":{"__name__":"up","job":"node_exporter","instance":"foobar.com:9100"},"values":[0,0,0],"timestamps":[1549891472010,1549891487724,1549891503438]}
+{"metric":{"__name__":"up","job":"prometheus","instance":"xxx.yyy.zzz:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
+`
+	b.SetBytes(int64(len(s)))
+	b.ReportAllocs()
+	b.RunParallel(func(pb *testing.PB) {
+		var rows Rows
+		for pb.Next() {
+			rows.Unmarshal(s)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of rows parsed; got %d; want 4", len(rows.Rows)))
+			}
+		}
+	})
+}
--- a/app/vminsert/vmimport/request_handler.go
+++ b/app/vminsert/vmimport/request_handler.go
@@ -0,0 +1,160 @@
+package vmimport
+
+import (
+	"flag"
+	"fmt"
+	"io"
+	"net/http"
+	"runtime"
+	"sync"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/metrics"
+)
+
+var maxLineLen = flag.Int("import.maxLineLen", 100*1024*1024, "The maximum length in bytes of a single line accepted by `/api/v1/import`")
+
+var (
+	rowsInserted  = metrics.NewCounter(`vm_rows_inserted_total{type="vmimport"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="vmimport"}`)
+)
+
+// InsertHandler processes `/api/v1/import` request.
+//
+// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6
+func InsertHandler(req *http.Request) error {
+	return concurrencylimiter.Do(func() error {
+		return insertHandlerInternal(req)
+	})
+}
+
+func insertHandlerInternal(req *http.Request) error {
+	readCalls.Inc()
+
+	r := req.Body
+	if req.Header.Get("Content-Encoding") == "gzip" {
+		zr, err := common.GetGzipReader(r)
+		if err != nil {
+			return fmt.Errorf("cannot read gzipped vmimport data: %s", err)
+		}
+		defer common.PutGzipReader(zr)
+		r = zr
+	}
+
+	ctx := getPushCtx()
+	defer putPushCtx(ctx)
+	for ctx.Read(r) {
+		if err := ctx.InsertRows(); err != nil {
+			return err
+		}
+	}
+	return ctx.Error()
+}
+
+func (ctx *pushCtx) InsertRows() error {
+	rows := ctx.Rows.Rows
+	rowsLen := 0
+	for i := range rows {
+		rowsLen += len(rows[i].Values)
+	}
+	ic := &ctx.Common
+	ic.Reset(rowsLen)
+	rowsTotal := 0
+	for i := range rows {
+		r := &rows[i]
+		ic.Labels = ic.Labels[:0]
+		for j := range r.Tags {
+			tag := &r.Tags[j]
+			ic.AddLabelBytes(tag.Key, tag.Value)
+		}
+		ctx.metricNameBuf = storage.MarshalMetricNameRaw(ctx.metricNameBuf[:0], ic.Labels)
+		values := r.Values
+		timestamps := r.Timestamps
+		_ = timestamps[len(values)-1]
+		for j, value := range values {
+			timestamp := timestamps[j]
+			ic.WriteDataPoint(ctx.metricNameBuf, nil, timestamp, value)
+		}
+		rowsTotal += len(values)
+	}
+	rowsInserted.Add(rowsTotal)
+	rowsPerInsert.Update(float64(rowsTotal))
+	return ic.FlushBufs()
+}
+
+func (ctx *pushCtx) Read(r io.Reader) bool {
+	if ctx.err != nil {
+		return false
+	}
+	ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlockExt(r, ctx.reqBuf, ctx.tailBuf, *maxLineLen)
+	if ctx.err != nil {
+		if ctx.err != io.EOF {
+			readErrors.Inc()
+			ctx.err = fmt.Errorf("cannot read vmimport data: %s", ctx.err)
+		}
+		return false
+	}
+	ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))
+	return true
+}
+
+var (
+	readCalls  = metrics.NewCounter(`vm_read_calls_total{name="vmimport"}`)
+	readErrors = metrics.NewCounter(`vm_read_errors_total{name="vmimport"}`)
+)
+
+type pushCtx struct {
+	Rows   Rows
+	Common common.InsertCtx
+
+	reqBuf        []byte
+	tailBuf       []byte
+	metricNameBuf []byte
+
+	err error
+}
+
+func (ctx *pushCtx) Error() error {
+	if ctx.err == io.EOF {
+		return nil
+	}
+	return ctx.err
+}
+
+func (ctx *pushCtx) reset() {
+	ctx.Rows.Reset()
+	ctx.Common.Reset(0)
+
+	ctx.reqBuf = ctx.reqBuf[:0]
+	ctx.tailBuf = ctx.tailBuf[:0]
+	ctx.metricNameBuf = ctx.metricNameBuf[:0]
+
+	ctx.err = nil
+}
+
+func getPushCtx() *pushCtx {
+	select {
+	case ctx := <-pushCtxPoolCh:
+		return ctx
+	default:
+		if v := pushCtxPool.Get(); v != nil {
+			return v.(*pushCtx)
+		}
+		return &pushCtx{}
+	}
+}
+
+func putPushCtx(ctx *pushCtx) {
+	ctx.reset()
+	select {
+	case pushCtxPoolCh <- ctx:
+	default:
+		pushCtxPool.Put(ctx)
+	}
+}
+
+var pushCtxPool sync.Pool
+var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))
--- a/app/vmrestore/Makefile
+++ b/app/vmrestore/Makefile
@@ -6,32 +6,62 @@ vmrestore:
 vmrestore-prod:
 	APP_NAME=vmrestore $(MAKE) app-via-docker

+vmrestore-pure-prod:
+	APP_NAME=vmrestore $(MAKE) app-via-docker-pure
+
+vmrestore-amd64-prod:
+	APP_NAME=vmrestore $(MAKE) app-via-docker-amd64
+
+vmrestore-arm-prod:
+	APP_NAME=vmrestore $(MAKE) app-via-docker-arm
+
+vmrestore-arm64-prod:
+	APP_NAME=vmrestore $(MAKE) app-via-docker-arm64
+
+vmrestore-ppc64le-prod:
+	APP_NAME=vmrestore $(MAKE) app-via-docker-ppc64le
+
+vmrestore-386-prod:
+	APP_NAME=vmrestore $(MAKE) app-via-docker-386
+
 package-vmrestore:
 	APP_NAME=vmrestore $(MAKE) package-via-docker

+package-vmrestore-pure:
+	APP_NAME=vmrestore $(MAKE) package-via-docker-pure
+
+package-vmrestore-amd64:
+	APP_NAME=vmrestore $(MAKE) package-via-docker-amd64
+
+package-vmrestore-arm:
+	APP_NAME=vmrestore $(MAKE) package-via-docker-arm
+
+package-vmrestore-arm64:
+	APP_NAME=vmrestore $(MAKE) package-via-docker-arm64
+
+package-vmrestore-ppc64le:
+	APP_NAME=vmrestore $(MAKE) package-via-docker-ppc64le
+
+package-vmrestore-386:
+	APP_NAME=vmrestore $(MAKE) package-via-docker-386
+
 publish-vmrestore:
 	APP_NAME=vmrestore $(MAKE) publish-via-docker

+vmrestore-pure:
+	APP_NAME=vmrestore $(MAKE) app-local-pure
+
+vmrestore-amd64:
+	CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-amd64 ./app/vmrestore
+
 vmrestore-arm:
 	CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-arm ./app/vmrestore

-vmrestore-arm-prod:
-	APP_NAME=vmrestore APP_SUFFIX='-arm' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=arm' $(MAKE) app-via-docker
-
 vmrestore-arm64:
 	CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-arm64 ./app/vmrestore

-vmrestore-arm64-prod:
-	APP_NAME=vmrestore APP_SUFFIX='-arm64' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=arm64' $(MAKE) app-via-docker
+vmrestore-ppc64le:
+	CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-ppc64le ./app/vmrestore

 vmrestore-386:
 	CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-386 ./app/vmrestore
-
-vmrestore-386-prod:
-	APP_NAME=vmrestore APP_SUFFIX='-386' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=386' $(MAKE) app-via-docker
-
-vmrestore-pure:
-	APP_NAME=vmrestore $(MAKE) app-local-pure
-
-vmrestore-pure-prod:
-	APP_NAME=vmrestore APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
--- a/app/vmrestore/deployment/Dockerfile
+++ b/app/vmrestore/deployment/Dockerfile
@@ -1,5 +1,7 @@
+ARG certs_image
+FROM $certs_image AS certs
 FROM scratch
-COPY --from=local/certs:1.0.3 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
-COPY bin/vmrestore-prod .
-EXPOSE 8428
+COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+ARG src_binary
+COPY $src_binary ./vmrestore-prod
 ENTRYPOINT ["/vmrestore-prod"]
--- a/app/vmrestore/main.go
+++ b/app/vmrestore/main.go
@@ -16,8 +16,9 @@ var (
 		"Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir")
 	storageDataPath = flag.String("storageDataPath", "victoria-metrics-data", "Destination path where backup must be restored. "+
 		"VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case only missing data is downloaded from backup")
-	concurrency       = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce restore duration")
-	maxBytesPerSecond = flag.Int("maxBytesPerSecond", 0, "The maximum download speed. There is no limit if it is set to 0")
+	concurrency             = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce restore duration")
+	maxBytesPerSecond       = flag.Int("maxBytesPerSecond", 0, "The maximum download speed. There is no limit if it is set to 0")
+	skipBackupCompleteCheck = flag.Bool("skipBackupCompleteCheck", false, "Whether to skip checking for `backup complete` file in `-src`. This may be useful for restoring from old backups, which were created without `backup complete` file")
 )

 func main() {
@@ -34,9 +35,10 @@ func main() {
 		logger.Fatalf("%s", err)
 	}
 	a := &actions.Restore{
-		Concurrency: *concurrency,
-		Src:         srcFS,
-		Dst:         dstFS,
+		Concurrency:             *concurrency,
+		Src:                     srcFS,
+		Dst:                     dstFS,
+		SkipBackupCompleteCheck: *skipBackupCompleteCheck,
 	}
 	if err := a.Run(); err != nil {
 		logger.Fatalf("cannot restore from backup: %s", err)
--- a/app/vmselect/main.go
+++ b/app/vmselect/main.go
@@ -21,10 +21,25 @@ import (

 var (
 	deleteAuthKey         = flag.String("deleteAuthKey", "", "authKey for metrics' deletion via /api/v1/admin/tsdb/delete_series")
-	maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", runtime.GOMAXPROCS(-1)*2, "The maximum number of concurrent search requests. It shouldn't exceed 2*vCPUs for better performance. See also -search.maxQueueDuration")
-	maxQueueDuration      = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached")
+	maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", getDefaultMaxConcurrentRequests(), "The maximum number of concurrent search requests. "+
+		"It shouldn't be high, since a single request can saturate all the CPU cores. See also `-search.maxQueueDuration`")
+	maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when `-search.maxConcurrentRequests` limit is reached")
 )

+func getDefaultMaxConcurrentRequests() int {
+	n := runtime.GOMAXPROCS(-1)
+	if n <= 4 {
+		n *= 2
+	}
+	if n > 16 {
+		// A single request can saturate all the CPU cores, so there is no sense
+		// in allowing higher number of concurrent requests - they will just contend
+		// for unavailable CPU time.
+		n = 16
+	}
+	return n
+}
+
 // Init initializes vmselect
 func Init() {
 	tmpDirPath := *vmstorage.DataPath + "/tmp"
@@ -72,7 +87,9 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 			timerpool.Put(t)
 			concurrencyLimitTimeout.Inc()
 			err := &httpserver.ErrorWithStatusCode{
-				Err:        fmt.Errorf("cannot handle more than %d concurrent requests", cap(concurrencyCh)),
+				Err: fmt.Errorf("cannot handle more than %d concurrent search requests during %s; possible solutions: "+
+					"increase `-search.maxQueueDuration`, increase `-search.maxConcurrentRequests`, increase server capacity",
+					*maxConcurrentRequests, *maxQueueDuration),
 				StatusCode: http.StatusServiceUnavailable,
 			}
 			httpserver.Errorf(w, "%s", err)
@@ -167,6 +184,18 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 			return true
 		}
 		return true
+	case "/api/v1/rules":
+		// Return dumb placeholder
+		rulesRequests.Inc()
+		w.Header().Set("Content-Type", "application/json")
+		fmt.Fprintf(w, "%s", `{"status":"success","data":{"groups":[]}}`)
+		return true
+	case "/api/v1/alerts":
+		// Return dumb placehloder
+		alertsRequests.Inc()
+		w.Header().Set("Content-Type", "application/json")
+		fmt.Fprintf(w, "%s", `{"status":"success","data":{"alerts":[]}}`)
+		return true
 	case "/api/v1/admin/tsdb/delete_series":
 		deleteRequests.Inc()
 		authKey := r.FormValue("authKey")
@@ -187,7 +216,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 }

 func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
-	logger.Errorf("error in %q: %s", r.URL.Path, err)
+	logger.Errorf("error in %q: %s", r.RequestURI, err)

 	w.Header().Set("Content-Type", "application/json")
 	statusCode := http.StatusUnprocessableEntity
@@ -228,4 +257,7 @@ var (

 	federateRequests = metrics.NewCounter(`vm_http_requests_total{path="/federate"}`)
 	federateErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/federate"}`)
+
+	rulesRequests  = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/rules"}`)
+	alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`)
 )
--- a/app/vmselect/netstorage/fadvise_darwin.go
+++ b/app/vmselect/netstorage/fadvise_darwin.go
@@ -1,9 +0,0 @@
-package netstorage
-
-import (
-	"os"
-)
-
-func mustFadviseSequentialRead(f *os.File) {
-	// Do nothing :)
-}
--- a/app/vmselect/netstorage/fadvise_freebsd.go
+++ b/app/vmselect/netstorage/fadvise_freebsd.go
@@ -1,15 +0,0 @@
-package netstorage
-
-import (
-	"os"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"golang.org/x/sys/unix"
-)
-
-func mustFadviseSequentialRead(f *os.File) {
-	fd := int(f.Fd())
-	if err := unix.Fadvise(int(fd), 0, 0, unix.FADV_SEQUENTIAL|unix.FADV_WILLNEED); err != nil {
-		logger.Panicf("FATAL: error returned from unix.Fadvise(SEQUENTIAL|WILLNEED): %s", err)
-	}
-}
--- a/app/vmselect/netstorage/fadvise_linux.go
+++ b/app/vmselect/netstorage/fadvise_linux.go
@@ -1,15 +0,0 @@
-package netstorage
-
-import (
-	"os"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"golang.org/x/sys/unix"
-)
-
-func mustFadviseSequentialRead(f *os.File) {
-	fd := int(f.Fd())
-	if err := unix.Fadvise(int(fd), 0, 0, unix.FADV_SEQUENTIAL|unix.FADV_WILLNEED); err != nil {
-		logger.Panicf("FATAL: error returned from unix.Fadvise(SEQUENTIAL|WILLNEED): %s", err)
-	}
-}
--- a/app/vmselect/netstorage/netstorage.go
+++ b/app/vmselect/netstorage/netstorage.go
@@ -103,7 +103,7 @@ func (rss *Results) RunParallel(f func(rs *Result, workerID uint)) error {
 			rowsProcessed := 0
 			for pts := range workCh {
 				if time.Until(rss.deadline.Deadline) < 0 {
-					err = fmt.Errorf("timeout exceeded during query execution: %s", rss.deadline.Timeout)
+					err = fmt.Errorf("timeout exceeded during query execution: %s", rss.deadline.String())
 					break
 				}
 				if err = pts.Unpack(rss.tbf, rs, rss.tr, rss.fetchData, maxWorkersCount); err != nil {
@@ -432,13 +432,10 @@ func GetLabelEntries(deadline Deadline) ([]storage.TagEntry, error) {
 	// Sort labelEntries by the number of label values in each entry.
 	sort.Slice(labelEntries, func(i, j int) bool {
 		a, b := labelEntries[i].Values, labelEntries[j].Values
-		if len(a) < len(b) {
-			return true
+		if len(a) != len(b) {
+			return len(a) > len(b)
 		}
-		if len(a) > len(b) {
-			return false
-		}
-		return labelEntries[i].Key < labelEntries[j].Key
+		return labelEntries[i].Key > labelEntries[j].Key
 	})

 	return labelEntries, nil
@@ -502,7 +499,7 @@ func ProcessSearchQuery(sq *storage.SearchQuery, fetchData bool, deadline Deadli
 		}
 		if time.Until(deadline.Deadline) < 0 {
 			putTmpBlocksFile(tbf)
-			return nil, fmt.Errorf("timeout exceeded while fetching data block #%d from storage: %s", blocksRead, deadline.Timeout)
+			return nil, fmt.Errorf("timeout exceeded while fetching data block #%d from storage: %s", blocksRead, deadline.String())
 		}
 		metricName := sr.MetricBlock.MetricName
 		m[string(metricName)] = append(m[string(metricName)], addr)
@@ -578,13 +575,24 @@ func setupTfss(tagFilterss [][]storage.TagFilter) ([]*storage.TagFilters, error)
 // Deadline contains deadline with the corresponding timeout for pretty error messages.
 type Deadline struct {
 	Deadline time.Time
-	Timeout  time.Duration
+
+	timeout  time.Duration
+	flagHint string
 }

 // NewDeadline returns deadline for the given timeout.
-func NewDeadline(timeout time.Duration) Deadline {
+//
+// flagHint must contain a hit for command-line flag, which could be used
+// in order to increase timeout.
+func NewDeadline(timeout time.Duration, flagHint string) Deadline {
 	return Deadline{
 		Deadline: time.Now().Add(timeout),
-		Timeout:  timeout,
+		timeout:  timeout,
+		flagHint: flagHint,
 	}
 }
+
+// String returns human-readable string representation for d.
+func (d *Deadline) String() string {
+	return fmt.Sprintf("%.3f seconds; the timeout can be adjusted with `%s` command-line flag", d.timeout.Seconds(), d.flagHint)
+}
--- a/app/vmselect/netstorage/tmp_blocks_file.go
+++ b/app/vmselect/netstorage/tmp_blocks_file.go
@@ -36,6 +36,9 @@ func maxInmemoryTmpBlocksFile() int {
 	if maxLen < 64*1024 {
 		return 64 * 1024
 	}
+	if maxLen > 4*1024*1024 {
+		return 4 * 1024 * 1024
+	}
 	return maxLen
 }

@@ -128,7 +131,7 @@ func (tbf *tmpBlocksFile) Finalize() error {
 	// Hint the OS that the file is read almost sequentiallly.
 	// This should reduce the number of disk seeks, which is important
 	// for HDDs.
-	mustFadviseSequentialRead(tbf.f)
+	fs.MustFadviseSequentialRead(tbf.f, true)
 	return nil
 }

--- a/app/vmselect/prometheus/prometheus.go
+++ b/app/vmselect/prometheus/prometheus.go
@@ -15,6 +15,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/quicktemplate"
@@ -23,9 +24,10 @@ import (
 var (
 	latencyOffset = flag.Duration("search.latencyOffset", time.Second*30, "The time when data points become visible in query results after the colection. "+
 		"Too small value can result in incomplete last points for query results")
-	maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum time for search query execution")
-	maxQueryLen      = flag.Int("search.maxQueryLen", 16*1024, "The maximum search query length in bytes")
-	maxLookback      = flag.Duration("search.maxLookback", 0, "Synonim to `-search.lookback-delta` from Prometheus. "+
+	maxExportDuration = flag.Duration("search.maxExportDuration", time.Hour*24*30, "The maximum duration for `/api/v1/export` call")
+	maxQueryDuration  = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for search query execution")
+	maxQueryLen       = flag.Int("search.maxQueryLen", 16*1024, "The maximum search query length in bytes")
+	maxLookback       = flag.Duration("search.maxLookback", 0, "Synonim to `-search.lookback-delta` from Prometheus. "+
 		"The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via `max_lookback` arg")
 )

@@ -58,7 +60,7 @@ func FederateHandler(w http.ResponseWriter, r *http.Request) error {
 	if err != nil {
 		return err
 	}
-	deadline := getDeadline(r)
+	deadline := getDeadlineForQuery(r)
 	if start >= end {
 		start = end - defaultStep
 	}
@@ -129,12 +131,12 @@ func ExportHandler(w http.ResponseWriter, r *http.Request) error {
 		return err
 	}
 	format := r.FormValue("format")
-	deadline := getDeadline(r)
+	deadline := getDeadlineForExport(r)
 	if start >= end {
 		end = start + defaultStep
 	}
 	if err := exportHandler(w, matches, start, end, format, deadline); err != nil {
-		return err
+		return fmt.Errorf("error when exporting data for queries=%q on the time range (start=%d, end=%d): %s", matches, start, end, err)
 	}
 	exportDuration.UpdateDuration(startTime)
 	return nil
@@ -235,7 +237,7 @@ var deleteDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values
 func LabelValuesHandler(labelName string, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
-	deadline := getDeadline(r)
+	deadline := getDeadlineForQuery(r)

 	if err := r.ParseForm(); err != nil {
 		return fmt.Errorf("cannot parse form values: %s", err)
@@ -285,6 +287,13 @@ func labelValuesWithMatches(labelName string, matches []string, start, end int64
 	if err != nil {
 		return nil, err
 	}
+	for i, tfs := range tagFilterss {
+		// Add `labelName!=''` tag filter in order to filter out series without the labelName.
+		tagFilterss[i] = append(tfs, storage.TagFilter{
+			Key:        []byte(labelName),
+			IsNegative: true,
+		})
+	}
 	if start >= end {
 		end = start + defaultStep
 	}
@@ -326,7 +335,7 @@ var labelValuesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="
 // LabelsCountHandler processes /api/v1/labels/count request.
 func LabelsCountHandler(w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
-	deadline := getDeadline(r)
+	deadline := getDeadlineForQuery(r)
 	labelEntries, err := netstorage.GetLabelEntries(deadline)
 	if err != nil {
 		return fmt.Errorf(`cannot obtain label entries: %s`, err)
@@ -345,10 +354,38 @@ var labelsCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names
 func LabelsHandler(w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
-	deadline := getDeadline(r)
-	labels, err := netstorage.GetLabels(deadline)
-	if err != nil {
-		return fmt.Errorf("cannot obtain labels: %s", err)
+	deadline := getDeadlineForQuery(r)
+
+	if err := r.ParseForm(); err != nil {
+		return fmt.Errorf("cannot parse form values: %s", err)
+	}
+	var labels []string
+	if len(r.Form["match[]"]) == 0 && len(r.Form["start"]) == 0 && len(r.Form["end"]) == 0 {
+		var err error
+		labels, err = netstorage.GetLabels(deadline)
+		if err != nil {
+			return fmt.Errorf("cannot obtain labels: %s", err)
+		}
+	} else {
+		// Extended functionality that allows filtering by label filters and time range
+		// i.e. /api/v1/labels?match[]=foobar{baz="abc"}&start=...&end=...
+		matches := r.Form["match[]"]
+		if len(matches) == 0 {
+			matches = []string{"{__name__!=''}"}
+		}
+		ct := currentTime()
+		end, err := getTime(r, "end", ct)
+		if err != nil {
+			return err
+		}
+		start, err := getTime(r, "start", end-defaultStep)
+		if err != nil {
+			return err
+		}
+		labels, err = labelsWithMatches(matches, start, end, deadline)
+		if err != nil {
+			return fmt.Errorf("cannot obtain labels for match[]=%q, start=%d, end=%d: %s", matches, start, end, err)
+		}
 	}

 	w.Header().Set("Content-Type", "application/json")
@@ -357,12 +394,57 @@ func LabelsHandler(w http.ResponseWriter, r *http.Request) error {
 	return nil
 }

+func labelsWithMatches(matches []string, start, end int64, deadline netstorage.Deadline) ([]string, error) {
+	if len(matches) == 0 {
+		logger.Panicf("BUG: matches must be non-empty")
+	}
+	tagFilterss, err := getTagFilterssFromMatches(matches)
+	if err != nil {
+		return nil, err
+	}
+	if start >= end {
+		end = start + defaultStep
+	}
+	sq := &storage.SearchQuery{
+		MinTimestamp: start,
+		MaxTimestamp: end,
+		TagFilterss:  tagFilterss,
+	}
+	rss, err := netstorage.ProcessSearchQuery(sq, false, deadline)
+	if err != nil {
+		return nil, fmt.Errorf("cannot fetch data for %q: %s", sq, err)
+	}
+
+	m := make(map[string]struct{})
+	var mLock sync.Mutex
+	err = rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
+		mLock.Lock()
+		tags := rs.MetricName.Tags
+		for i := range tags {
+			t := &tags[i]
+			m[string(t.Key)] = struct{}{}
+		}
+		m["__name__"] = struct{}{}
+		mLock.Unlock()
+	})
+	if err != nil {
+		return nil, fmt.Errorf("error when data fetching: %s", err)
+	}
+
+	labels := make([]string, 0, len(m))
+	for label := range m {
+		labels = append(labels, label)
+	}
+	sort.Strings(labels)
+	return labels, nil
+}
+
 var labelsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/labels"}`)

 // SeriesCountHandler processes /api/v1/series/count request.
 func SeriesCountHandler(w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
-	deadline := getDeadline(r)
+	deadline := getDeadlineForQuery(r)
 	n, err := netstorage.GetSeriesCount(deadline)
 	if err != nil {
 		return fmt.Errorf("cannot obtain series count: %s", err)
@@ -402,7 +484,7 @@ func SeriesHandler(w http.ResponseWriter, r *http.Request) error {
 	if err != nil {
 		return err
 	}
-	deadline := getDeadline(r)
+	deadline := getDeadlineForQuery(r)

 	tagFilterss, err := getTagFilterssFromMatches(matches)
 	if err != nil {
@@ -471,14 +553,14 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
 	if err != nil {
 		return err
 	}
-	deadline := getDeadline(r)
+	deadline := getDeadlineForQuery(r)
 	lookbackDelta, err := getMaxLookback(r)
 	if err != nil {
 		return err
 	}

 	if len(query) > *maxQueryLen {
-		return fmt.Errorf(`too long query; got %d bytes; mustn't exceed %d bytes`, len(query), *maxQueryLen)
+		return fmt.Errorf("too long query; got %d bytes; mustn't exceed `-search.maxQueryLen=%d` bytes", len(query), *maxQueryLen)
 	}
 	if !getBool(r, "nocache") && ct-start < queryOffset {
 		// Adjust start time only if `nocache` arg isn't set.
@@ -486,27 +568,44 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
 		start = ct - queryOffset
 	}
 	if childQuery, windowStr, offsetStr := promql.IsMetricSelectorWithRollup(query); childQuery != "" {
-		var window int64
-		if len(windowStr) > 0 {
-			var err error
-			window, err = promql.DurationValue(windowStr, step)
-			if err != nil {
-				return err
-			}
+		window, err := parsePositiveDuration(windowStr, step)
+		if err != nil {
+			return fmt.Errorf("cannot parse window: %s", err)
 		}
-		var offset int64
-		if len(offsetStr) > 0 {
-			var err error
-			offset, err = promql.DurationValue(offsetStr, step)
-			if err != nil {
-				return err
-			}
+		offset, err := parseDuration(offsetStr, step)
+		if err != nil {
+			return fmt.Errorf("cannot parse offset: %s", err)
 		}
 		start -= offset
 		end := start
 		start = end - window
 		if err := exportHandler(w, []string{childQuery}, start, end, "promapi", deadline); err != nil {
-			return err
+			return fmt.Errorf("error when exporting data for query=%q on the time range (start=%d, end=%d): %s", childQuery, start, end, err)
+		}
+		queryDuration.UpdateDuration(startTime)
+		return nil
+	}
+	if childQuery, windowStr, stepStr, offsetStr := promql.IsRollup(query); childQuery != "" {
+		newStep, err := parsePositiveDuration(stepStr, step)
+		if err != nil {
+			return fmt.Errorf("cannot parse step: %s", err)
+		}
+		if newStep > 0 {
+			step = newStep
+		}
+		window, err := parsePositiveDuration(windowStr, step)
+		if err != nil {
+			return fmt.Errorf("cannot parse window: %s", err)
+		}
+		offset, err := parseDuration(offsetStr, step)
+		if err != nil {
+			return fmt.Errorf("cannot parse offset: %s", err)
+		}
+		start -= offset
+		end := start
+		start = end - window
+		if err := queryRangeHandler(w, childQuery, start, end, step, r, ct); err != nil {
+			return fmt.Errorf("error when executing query=%q on the time range (start=%d, end=%d, step=%d): %s", childQuery, start, end, step, err)
 		}
 		queryDuration.UpdateDuration(startTime)
 		return nil
@@ -521,7 +620,7 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
 	}
 	result, err := promql.Exec(&ec, query, true)
 	if err != nil {
-		return fmt.Errorf("cannot execute %q: %s", query, err)
+		return fmt.Errorf("error when executing query=%q for (time=%d, step=%d): %s", query, start, step, err)
 	}

 	w.Header().Set("Content-Type", "application/json")
@@ -532,6 +631,20 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {

 var queryDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/query"}`)

+func parseDuration(s string, step int64) (int64, error) {
+	if len(s) == 0 {
+		return 0, nil
+	}
+	return metricsql.DurationValue(s, step)
+}
+
+func parsePositiveDuration(s string, step int64) (int64, error) {
+	if len(s) == 0 {
+		return 0, nil
+	}
+	return metricsql.PositiveDurationValue(s, step)
+}
+
 // QueryRangeHandler processes /api/v1/query_range request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
@@ -555,7 +668,15 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
 	if err != nil {
 		return err
 	}
-	deadline := getDeadline(r)
+	if err := queryRangeHandler(w, query, start, end, step, r, ct); err != nil {
+		return fmt.Errorf("error when executing query=%q on the time range (start=%d, end=%d, step=%d): %s", query, start, end, step, err)
+	}
+	queryRangeDuration.UpdateDuration(startTime)
+	return nil
+}
+
+func queryRangeHandler(w http.ResponseWriter, query string, start, end, step int64, r *http.Request, ct int64) error {
+	deadline := getDeadlineForQuery(r)
 	mayCache := !getBool(r, "nocache")
 	lookbackDelta, err := getMaxLookback(r)
 	if err != nil {
@@ -564,7 +685,7 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {

 	// Validate input args.
 	if len(query) > *maxQueryLen {
-		return fmt.Errorf(`too long query; got %d bytes; mustn't exceed %d bytes`, len(query), *maxQueryLen)
+		return fmt.Errorf("too long query; got %d bytes; mustn't exceed `-search.maxQueryLen=%d` bytes", len(query), *maxQueryLen)
 	}
 	if start > end {
 		end = start + defaultStep
@@ -586,7 +707,7 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
 	}
 	result, err := promql.Exec(&ec, query, false)
 	if err != nil {
-		return fmt.Errorf("cannot execute %q: %s", query, err)
+		return fmt.Errorf("cannot execute query: %s", err)
 	}
 	queryOffset := getLatencyOffsetMilliseconds()
 	if ct-end < queryOffset {
@@ -599,7 +720,6 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {

 	w.Header().Set("Content-Type", "application/json")
 	WriteQueryRangeResponse(w, result)
-	queryRangeDuration.UpdateDuration(startTime)
 	return nil
 }

@@ -748,17 +868,26 @@ func getMaxLookback(r *http.Request) (int64, error) {
 	return getDuration(r, "max_lookback", d)
 }

-func getDeadline(r *http.Request) netstorage.Deadline {
+func getDeadlineForQuery(r *http.Request) netstorage.Deadline {
+	dMax := int64(maxQueryDuration.Seconds() * 1e3)
+	return getDeadlineWithMaxDuration(r, dMax, "-search.maxQueryDuration")
+}
+
+func getDeadlineForExport(r *http.Request) netstorage.Deadline {
+	dMax := int64(maxExportDuration.Seconds() * 1e3)
+	return getDeadlineWithMaxDuration(r, dMax, "-search.maxExportDuration")
+}
+
+func getDeadlineWithMaxDuration(r *http.Request, dMax int64, flagHint string) netstorage.Deadline {
 	d, err := getDuration(r, "timeout", 0)
 	if err != nil {
 		d = 0
 	}
-	dMax := int64(maxQueryDuration.Seconds() * 1e3)
 	if d <= 0 || d > dMax {
 		d = dMax
 	}
 	timeout := time.Duration(d) * time.Millisecond
-	return netstorage.NewDeadline(timeout)
+	return netstorage.NewDeadline(timeout, flagHint)
 }

 func getBool(r *http.Request, argKey string) bool {
--- a/app/vmselect/promql/aggr.go
+++ b/app/vmselect/promql/aggr.go
@@ -8,8 +8,10 @@ import (
 	"strings"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/histogram"
 )

 var aggrFuncs = map[string]aggrFunc{
@@ -26,20 +28,28 @@ var aggrFuncs = map[string]aggrFunc{
 	"topk":         newAggrFuncTopK(false),
 	"quantile":     aggrFuncQuantile,

-	// Extended PromQL funcs
-	"median":    aggrFuncMedian,
-	"limitk":    aggrFuncLimitK,
-	"distinct":  newAggrFunc(aggrFuncDistinct),
-	"sum2":      newAggrFunc(aggrFuncSum2),
-	"geomean":   newAggrFunc(aggrFuncGeomean),
-	"histogram": newAggrFunc(aggrFuncHistogram),
+	// PromQL extension funcs
+	"median":         aggrFuncMedian,
+	"limitk":         aggrFuncLimitK,
+	"distinct":       newAggrFunc(aggrFuncDistinct),
+	"sum2":           newAggrFunc(aggrFuncSum2),
+	"geomean":        newAggrFunc(aggrFuncGeomean),
+	"histogram":      newAggrFunc(aggrFuncHistogram),
+	"topk_min":       newAggrFuncRangeTopK(minValue, false),
+	"topk_max":       newAggrFuncRangeTopK(maxValue, false),
+	"topk_avg":       newAggrFuncRangeTopK(avgValue, false),
+	"topk_median":    newAggrFuncRangeTopK(medianValue, false),
+	"bottomk_min":    newAggrFuncRangeTopK(minValue, true),
+	"bottomk_max":    newAggrFuncRangeTopK(maxValue, true),
+	"bottomk_avg":    newAggrFuncRangeTopK(avgValue, true),
+	"bottomk_median": newAggrFuncRangeTopK(medianValue, true),
 }

 type aggrFunc func(afa *aggrFuncArg) ([]*timeseries, error)

 type aggrFuncArg struct {
 	args [][]*timeseries
-	ae   *aggrFuncExpr
+	ae   *metricsql.AggrFuncExpr
 	ec   *EvalConfig
 }

@@ -48,20 +58,6 @@ func getAggrFunc(s string) aggrFunc {
 	return aggrFuncs[s]
 }

-func isAggrFunc(s string) bool {
-	return getAggrFunc(s) != nil
-}
-
-func isAggrFuncModifier(s string) bool {
-	s = strings.ToLower(s)
-	switch s {
-	case "by", "without":
-		return true
-	default:
-		return false
-	}
-}
-
 func newAggrFunc(afe func(tss []*timeseries) []*timeseries) aggrFunc {
 	return func(afa *aggrFuncArg) ([]*timeseries, error) {
 		args := afa.args
@@ -72,7 +68,7 @@ func newAggrFunc(afe func(tss []*timeseries) []*timeseries) aggrFunc {
 	}
 }

-func removeGroupTags(metricName *storage.MetricName, modifier *modifierExpr) {
+func removeGroupTags(metricName *storage.MetricName, modifier *metricsql.ModifierExpr) {
 	groupOp := strings.ToLower(modifier.Op)
 	switch groupOp {
 	case "", "by":
@@ -84,7 +80,7 @@ func removeGroupTags(metricName *storage.MetricName, modifier *modifierExpr) {
 	}
 }

-func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeseries, modifier *modifierExpr, keepOriginal bool) ([]*timeseries, error) {
+func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeseries, modifier *metricsql.ModifierExpr, keepOriginal bool) ([]*timeseries, error) {
 	arg := copyTimeseriesMetricNames(argOrig)

 	// Perform grouping.
@@ -459,37 +455,138 @@ func newAggrFuncTopK(isReverse bool) aggrFunc {
 			return nil, err
 		}
 		afe := func(tss []*timeseries) []*timeseries {
-			rvs := tss
-			for n := range rvs[0].Values {
-				sort.Slice(rvs, func(i, j int) bool {
-					a := rvs[i].Values[n]
-					b := rvs[j].Values[n]
-					cmp := lessWithNaNs(a, b)
+			for n := range tss[0].Values {
+				sort.Slice(tss, func(i, j int) bool {
+					a := tss[i].Values[n]
+					b := tss[j].Values[n]
 					if isReverse {
-						cmp = !cmp
+						a, b = b, a
 					}
-					return cmp
+					return lessWithNaNs(a, b)
 				})
-				if math.IsNaN(ks[n]) {
-					ks[n] = 0
-				}
-				k := int(ks[n])
-				if k < 0 {
-					k = 0
-				}
-				if k > len(rvs) {
-					k = len(rvs)
-				}
-				for _, ts := range rvs[:len(rvs)-k] {
-					ts.Values[n] = nan
-				}
+				fillNaNsAtIdx(n, ks[n], tss)
 			}
-			return removeNaNs(rvs)
+			return removeNaNs(tss)
 		}
 		return aggrFuncExt(afe, args[1], &afa.ae.Modifier, true)
 	}
 }

+type tsWithValue struct {
+	ts    *timeseries
+	value float64
+}
+
+func newAggrFuncRangeTopK(f func(values []float64) float64, isReverse bool) aggrFunc {
+	return func(afa *aggrFuncArg) ([]*timeseries, error) {
+		args := afa.args
+		if err := expectTransformArgsNum(args, 2); err != nil {
+			return nil, err
+		}
+		ks, err := getScalar(args[0], 0)
+		if err != nil {
+			return nil, err
+		}
+		afe := func(tss []*timeseries) []*timeseries {
+			maxs := make([]tsWithValue, len(tss))
+			for i, ts := range tss {
+				value := f(ts.Values)
+				maxs[i] = tsWithValue{
+					ts:    ts,
+					value: value,
+				}
+			}
+			sort.Slice(maxs, func(i, j int) bool {
+				a := maxs[i].value
+				b := maxs[j].value
+				if isReverse {
+					a, b = b, a
+				}
+				return lessWithNaNs(a, b)
+			})
+			for i := range maxs {
+				tss[i] = maxs[i].ts
+			}
+			for i, k := range ks {
+				fillNaNsAtIdx(i, k, tss)
+			}
+			return removeNaNs(tss)
+		}
+		return aggrFuncExt(afe, args[1], &afa.ae.Modifier, true)
+	}
+}
+
+func fillNaNsAtIdx(idx int, k float64, tss []*timeseries) {
+	if math.IsNaN(k) {
+		k = 0
+	}
+	kn := int(k)
+	if kn < 0 {
+		kn = 0
+	}
+	if kn > len(tss) {
+		kn = len(tss)
+	}
+	for _, ts := range tss[:len(tss)-kn] {
+		ts.Values[idx] = nan
+	}
+}
+
+func minValue(values []float64) float64 {
+	if len(values) == 0 {
+		return nan
+	}
+	min := values[0]
+	for _, v := range values[1:] {
+		if v < min {
+			min = v
+		}
+	}
+	return min
+}
+
+func maxValue(values []float64) float64 {
+	if len(values) == 0 {
+		return nan
+	}
+	max := values[0]
+	for _, v := range values[1:] {
+		if v > max {
+			max = v
+		}
+	}
+	return max
+}
+
+func avgValue(values []float64) float64 {
+	sum := float64(0)
+	count := 0
+	for _, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		count++
+		sum += v
+	}
+	if count == 0 {
+		return nan
+	}
+	return sum / float64(count)
+}
+
+func medianValue(values []float64) float64 {
+	h := histogram.GetFast()
+	for _, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		h.Update(v)
+	}
+	value := h.Quantile(0.5)
+	histogram.PutFast(h)
+	return value
+}
+
 func aggrFuncLimitK(afa *aggrFuncArg) ([]*timeseries, error) {
 	args := afa.args
 	if err := expectTransformArgsNum(args, 2); err != nil {
--- a/app/vmselect/promql/aggr_incremental.go
+++ b/app/vmselect/promql/aggr_incremental.go
@@ -4,10 +4,12 @@ import (
 	"math"
 	"strings"
 	"sync"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 )

 // callbacks for optimized incremental calculations for aggregate functions
-// over rollups over metricExpr.
+// over rollups over metricsql.MetricExpr.
 //
 // These calculations save RAM for aggregates over big number of time series.
 var incrementalAggrFuncCallbacksMap = map[string]*incrementalAggrFuncCallbacks{
@@ -49,7 +51,7 @@ var incrementalAggrFuncCallbacksMap = map[string]*incrementalAggrFuncCallbacks{
 }

 type incrementalAggrFuncContext struct {
-	ae *aggrFuncExpr
+	ae *metricsql.AggrFuncExpr

 	mLock sync.Mutex
 	m     map[uint]map[string]*incrementalAggrContext
@@ -57,7 +59,7 @@ type incrementalAggrFuncContext struct {
 	callbacks *incrementalAggrFuncCallbacks
 }

-func newIncrementalAggrFuncContext(ae *aggrFuncExpr, callbacks *incrementalAggrFuncCallbacks) *incrementalAggrFuncContext {
+func newIncrementalAggrFuncContext(ae *metricsql.AggrFuncExpr, callbacks *incrementalAggrFuncCallbacks) *incrementalAggrFuncContext {
 	return &incrementalAggrFuncContext{
 		ae:        ae,
 		m:         make(map[uint]map[string]*incrementalAggrContext),
--- a/app/vmselect/promql/aggr_incremental_test.go
+++ b/app/vmselect/promql/aggr_incremental_test.go
@@ -7,6 +7,8 @@ import (
 	"runtime"
 	"sync"
 	"testing"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 )

 func TestIncrementalAggr(t *testing.T) {
@@ -42,7 +44,7 @@ func TestIncrementalAggr(t *testing.T) {
 	f := func(name string, valuesExpected []float64) {
 		t.Helper()
 		callbacks := getIncrementalAggrFuncCallbacks(name)
-		ae := &aggrFuncExpr{
+		ae := &metricsql.AggrFuncExpr{
 			Name: name,
 		}
 		tssExpected := []*timeseries{{
--- a/app/vmselect/promql/binary_op.go
+++ b/app/vmselect/promql/binary_op.go
@@ -6,24 +6,26 @@ import (
 	"strings"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql/binaryop"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 )

 var binaryOpFuncs = map[string]binaryOpFunc{
-	"+": newBinaryOpArithFunc(binaryOpPlus),
-	"-": newBinaryOpArithFunc(binaryOpMinus),
-	"*": newBinaryOpArithFunc(binaryOpMul),
-	"/": newBinaryOpArithFunc(binaryOpDiv),
-	"%": newBinaryOpArithFunc(binaryOpMod),
-	"^": newBinaryOpArithFunc(binaryOpPow),
+	"+": newBinaryOpArithFunc(binaryop.Plus),
+	"-": newBinaryOpArithFunc(binaryop.Minus),
+	"*": newBinaryOpArithFunc(binaryop.Mul),
+	"/": newBinaryOpArithFunc(binaryop.Div),
+	"%": newBinaryOpArithFunc(binaryop.Mod),
+	"^": newBinaryOpArithFunc(binaryop.Pow),

 	// cmp ops
-	"==": newBinaryOpCmpFunc(binaryOpEq),
-	"!=": newBinaryOpCmpFunc(binaryOpNeq),
-	">":  newBinaryOpCmpFunc(binaryOpGt),
-	"<":  newBinaryOpCmpFunc(binaryOpLt),
-	">=": newBinaryOpCmpFunc(binaryOpGte),
-	"<=": newBinaryOpCmpFunc(binaryOpLte),
+	"==": newBinaryOpCmpFunc(binaryop.Eq),
+	"!=": newBinaryOpCmpFunc(binaryop.Neq),
+	">":  newBinaryOpCmpFunc(binaryop.Gt),
+	"<":  newBinaryOpCmpFunc(binaryop.Lt),
+	">=": newBinaryOpCmpFunc(binaryop.Gte),
+	"<=": newBinaryOpCmpFunc(binaryop.Lte),

 	// logical set ops
 	"and":    binaryOpAnd,
@@ -31,38 +33,9 @@ var binaryOpFuncs = map[string]binaryOpFunc{
 	"unless": binaryOpUnless,

 	// New op
-	"if":      newBinaryOpArithFunc(binaryOpIf),
-	"ifnot":   newBinaryOpArithFunc(binaryOpIfnot),
-	"default": newBinaryOpArithFunc(binaryOpDefault),
-}
-
-var binaryOpPriorities = map[string]int{
-	"default": -1,
-
-	"if":    0,
-	"ifnot": 0,
-
-	// See https://prometheus.io/docs/prometheus/latest/querying/operators/#binary-operator-precedence
-	"or": 1,
-
-	"and":    2,
-	"unless": 2,
-
-	"==": 3,
-	"!=": 3,
-	"<":  3,
-	">":  3,
-	"<=": 3,
-	">=": 3,
-
-	"+": 4,
-	"-": 4,
-
-	"*": 5,
-	"/": 5,
-	"%": 5,
-
-	"^": 6,
+	"if":      newBinaryOpArithFunc(binaryop.If),
+	"ifnot":   newBinaryOpArithFunc(binaryop.Ifnot),
+	"default": newBinaryOpArithFunc(binaryop.Default),
 }

 func getBinaryOpFunc(op string) binaryOpFunc {
@@ -70,144 +43,8 @@ func getBinaryOpFunc(op string) binaryOpFunc {
 	return binaryOpFuncs[op]
 }

-func isBinaryOp(op string) bool {
-	return getBinaryOpFunc(op) != nil
-}
-
-func binaryOpPriority(op string) int {
-	op = strings.ToLower(op)
-	return binaryOpPriorities[op]
-}
-
-func scanBinaryOpPrefix(s string) int {
-	n := 0
-	for op := range binaryOpFuncs {
-		if len(s) < len(op) {
-			continue
-		}
-		ss := strings.ToLower(s[:len(op)])
-		if ss == op && len(op) > n {
-			n = len(op)
-		}
-	}
-	return n
-}
-
-func isRightAssociativeBinaryOp(op string) bool {
-	// See https://prometheus.io/docs/prometheus/latest/querying/operators/#binary-operator-precedence
-	return op == "^"
-}
-
-func isBinaryOpGroupModifier(s string) bool {
-	s = strings.ToLower(s)
-	switch s {
-	// See https://prometheus.io/docs/prometheus/latest/querying/operators/#vector-matching
-	case "on", "ignoring":
-		return true
-	default:
-		return false
-	}
-}
-
-func isBinaryOpJoinModifier(s string) bool {
-	s = strings.ToLower(s)
-	switch s {
-	case "group_left", "group_right":
-		return true
-	default:
-		return false
-	}
-}
-
-func isBinaryOpBoolModifier(s string) bool {
-	s = strings.ToLower(s)
-	return s == "bool"
-}
-
-func isBinaryOpCmp(op string) bool {
-	switch op {
-	case "==", "!=", ">", "<", ">=", "<=":
-		return true
-	default:
-		return false
-	}
-}
-
-func isBinaryOpLogicalSet(op string) bool {
-	op = strings.ToLower(op)
-	switch op {
-	case "and", "or", "unless":
-		return true
-	default:
-		return false
-	}
-}
-
-func binaryOpConstants(op string, left, right float64, isBool bool) float64 {
-	if isBinaryOpCmp(op) {
-		evalCmp := func(cf func(left, right float64) bool) float64 {
-			if isBool {
-				if cf(left, right) {
-					return 1
-				}
-				return 0
-			}
-			if cf(left, right) {
-				return left
-			}
-			return nan
-		}
-		switch op {
-		case "==":
-			left = evalCmp(binaryOpEq)
-		case "!=":
-			left = evalCmp(binaryOpNeq)
-		case ">":
-			left = evalCmp(binaryOpGt)
-		case "<":
-			left = evalCmp(binaryOpLt)
-		case ">=":
-			left = evalCmp(binaryOpGte)
-		case "<=":
-			left = evalCmp(binaryOpLte)
-		default:
-			logger.Panicf("BUG: unexpected comparison binaryOp: %q", op)
-		}
-	} else {
-		switch op {
-		case "+":
-			left = binaryOpPlus(left, right)
-		case "-":
-			left = binaryOpMinus(left, right)
-		case "*":
-			left = binaryOpMul(left, right)
-		case "/":
-			left = binaryOpDiv(left, right)
-		case "%":
-			left = binaryOpMod(left, right)
-		case "^":
-			left = binaryOpPow(left, right)
-		case "and":
-			// Nothing to do
-		case "or":
-			// Nothing to do
-		case "unless":
-			left = nan
-		case "default":
-			left = binaryOpDefault(left, right)
-		case "if":
-			left = binaryOpIf(left, right)
-		case "ifnot":
-			left = binaryOpIfnot(left, right)
-		default:
-			logger.Panicf("BUG: unexpected non-comparison binaryOp: %q", op)
-		}
-	}
-	return left
-}
-
 type binaryOpFuncArg struct {
-	be    *binaryOpExpr
+	be    *metricsql.BinaryOpExpr
 	left  []*timeseries
 	right []*timeseries
 }
@@ -267,7 +104,7 @@ func newBinaryOpFunc(bf func(left, right float64, isBool bool) float64) binaryOp
 	}
 }

-func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeseries, []*timeseries, []*timeseries, error) {
+func adjustBinaryOpTags(be *metricsql.BinaryOpExpr, left, right []*timeseries) ([]*timeseries, []*timeseries, []*timeseries, error) {
 	if len(be.GroupModifier.Op) == 0 && len(be.JoinModifier.Op) == 0 {
 		if isScalar(left) {
 			// Fast path: `scalar op vector`
@@ -348,7 +185,7 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 	return rvsLeft, rvsRight, dst, nil
 }

-func ensureSingleTimeseries(side string, be *binaryOpExpr, tss []*timeseries) error {
+func ensureSingleTimeseries(side string, be *metricsql.BinaryOpExpr, tss []*timeseries) error {
 	if len(tss) == 0 {
 		logger.Panicf("BUG: tss must contain at least one value")
 	}
@@ -362,7 +199,7 @@ func ensureSingleTimeseries(side string, be *binaryOpExpr, tss []*timeseries) er
 	return nil
 }

-func groupJoin(singleTimeseriesSide string, be *binaryOpExpr, rvsLeft, rvsRight, tssLeft, tssRight []*timeseries) ([]*timeseries, []*timeseries, error) {
+func groupJoin(singleTimeseriesSide string, be *metricsql.BinaryOpExpr, rvsLeft, rvsRight, tssLeft, tssRight []*timeseries) ([]*timeseries, []*timeseries, error) {
 	joinTags := be.JoinModifier.Args
 	var m map[string]*timeseries
 	for _, tsLeft := range tssLeft {
@@ -432,8 +269,8 @@ func mergeNonOverlappingTimeseries(dst, src *timeseries) bool {
 	return true
 }

-func resetMetricGroupIfRequired(be *binaryOpExpr, ts *timeseries) {
-	if isBinaryOpCmp(be.Op) && !be.Bool {
+func resetMetricGroupIfRequired(be *metricsql.BinaryOpExpr, ts *timeseries) {
+	if metricsql.IsBinaryOpCmp(be.Op) && !be.Bool {
 		// Do not reset MetricGroup for non-boolean `compare` binary ops like Prometheus does.
 		return
 	}
@@ -445,90 +282,6 @@ func resetMetricGroupIfRequired(be *binaryOpExpr, ts *timeseries) {
 	ts.MetricName.ResetMetricGroup()
 }

-func binaryOpPlus(left, right float64) float64 {
-	return left + right
-}
-
-func binaryOpMinus(left, right float64) float64 {
-	return left - right
-}
-
-func binaryOpMul(left, right float64) float64 {
-	return left * right
-}
-
-func binaryOpDiv(left, right float64) float64 {
-	return left / right
-}
-
-func binaryOpMod(left, right float64) float64 {
-	return math.Mod(left, right)
-}
-
-func binaryOpPow(left, right float64) float64 {
-	return math.Pow(left, right)
-}
-
-func binaryOpDefault(left, right float64) float64 {
-	if math.IsNaN(left) {
-		return right
-	}
-	return left
-}
-
-func binaryOpIf(left, right float64) float64 {
-	if math.IsNaN(right) {
-		return nan
-	}
-	return left
-}
-
-func binaryOpIfnot(left, right float64) float64 {
-	if math.IsNaN(right) {
-		return left
-	}
-	return nan
-}
-
-func binaryOpEq(left, right float64) bool {
-	// Special handling for nan == nan.
-	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
-	if math.IsNaN(left) {
-		return math.IsNaN(right)
-	}
-
-	return left == right
-}
-
-func binaryOpNeq(left, right float64) bool {
-	// Special handling for comparison with nan.
-	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
-	if math.IsNaN(left) {
-		return !math.IsNaN(right)
-	}
-	if math.IsNaN(right) {
-		return true
-	}
-
-	return left != right
-}
-
-func binaryOpGt(left, right float64) bool {
-	return left > right
-}
-
-func binaryOpLt(left, right float64) bool {
-	return left < right
-}
-
-func binaryOpGte(left, right float64) bool {
-	return left >= right
-}
-
-func binaryOpLte(left, right float64) bool {
-	return left <= right
-}
-
 func binaryOpAnd(bfa *binaryOpFuncArg) ([]*timeseries, error) {
 	mLeft, mRight := createTimeseriesMapByTagSet(bfa.be, bfa.left, bfa.right)
 	var rvs []*timeseries
@@ -565,7 +318,7 @@ func binaryOpUnless(bfa *binaryOpFuncArg) ([]*timeseries, error) {
 	return rvs, nil
 }

-func createTimeseriesMapByTagSet(be *binaryOpExpr, left, right []*timeseries) (map[string][]*timeseries, map[string][]*timeseries) {
+func createTimeseriesMapByTagSet(be *metricsql.BinaryOpExpr, left, right []*timeseries) (map[string][]*timeseries, map[string][]*timeseries) {
 	groupTags := be.GroupModifier.Args
 	groupOp := strings.ToLower(be.GroupModifier.Op)
 	if len(groupOp) == 0 {
--- a/app/vmselect/promql/eval.go
+++ b/app/vmselect/promql/eval.go
@@ -11,6 +11,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 )
@@ -57,6 +58,14 @@ func AdjustStartEnd(start, end, step int64) (int64, int64) {
 	if adjust > 0 {
 		end += step - adjust
 	}
+
+	// Make sure that the new number of points is the same as the initial number of points.
+	newPoints := (end-start)/step + 1
+	for newPoints > points {
+		end -= step
+		newPoints--
+	}
+
 	return start, end
 }

@@ -144,25 +153,25 @@ func getTimestamps(start, end, step int64) []int64 {
 	return timestamps
 }

-func evalExpr(ec *EvalConfig, e expr) ([]*timeseries, error) {
-	if me, ok := e.(*metricExpr); ok {
-		re := &rollupExpr{
+func evalExpr(ec *EvalConfig, e metricsql.Expr) ([]*timeseries, error) {
+	if me, ok := e.(*metricsql.MetricExpr); ok {
+		re := &metricsql.RollupExpr{
 			Expr: me,
 		}
-		rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, re, nil)
+		rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, e, re, nil)
 		if err != nil {
 			return nil, fmt.Errorf(`cannot evaluate %q: %s`, me.AppendString(nil), err)
 		}
 		return rv, nil
 	}
-	if re, ok := e.(*rollupExpr); ok {
-		rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, re, nil)
+	if re, ok := e.(*metricsql.RollupExpr); ok {
+		rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, e, re, nil)
 		if err != nil {
 			return nil, fmt.Errorf(`cannot evaluate %q: %s`, re.AppendString(nil), err)
 		}
 		return rv, nil
 	}
-	if fe, ok := e.(*funcExpr); ok {
+	if fe, ok := e.(*metricsql.FuncExpr); ok {
 		nrf := getRollupFunc(fe.Name)
 		if nrf == nil {
 			args, err := evalExprs(ec, fe.Args)
@@ -192,17 +201,17 @@ func evalExpr(ec *EvalConfig, e expr) ([]*timeseries, error) {
 		if err != nil {
 			return nil, err
 		}
-		rv, err := evalRollupFunc(ec, fe.Name, rf, re, nil)
+		rv, err := evalRollupFunc(ec, fe.Name, rf, e, re, nil)
 		if err != nil {
 			return nil, fmt.Errorf(`cannot evaluate %q: %s`, fe.AppendString(nil), err)
 		}
 		return rv, nil
 	}
-	if ae, ok := e.(*aggrFuncExpr); ok {
+	if ae, ok := e.(*metricsql.AggrFuncExpr); ok {
 		if callbacks := getIncrementalAggrFuncCallbacks(ae.Name); callbacks != nil {
 			fe, nrf := tryGetArgRollupFuncWithMetricExpr(ae)
 			if fe != nil {
-				// There is an optimized path for calculating aggrFuncExpr over rollupFunc over metricExpr.
+				// There is an optimized path for calculating metricsql.AggrFuncExpr over rollupFunc over metricsql.MetricExpr.
 				// The optimized path saves RAM for aggregates over big number of time series.
 				args, re, err := evalRollupFuncArgs(ec, fe)
 				if err != nil {
@@ -213,7 +222,7 @@ func evalExpr(ec *EvalConfig, e expr) ([]*timeseries, error) {
 					return nil, err
 				}
 				iafc := newIncrementalAggrFuncContext(ae, callbacks)
-				return evalRollupFunc(ec, fe.Name, rf, re, iafc)
+				return evalRollupFunc(ec, fe.Name, rf, e, re, iafc)
 			}
 		}
 		args, err := evalExprs(ec, ae.Args)
@@ -235,7 +244,7 @@ func evalExpr(ec *EvalConfig, e expr) ([]*timeseries, error) {
 		}
 		return rv, nil
 	}
-	if be, ok := e.(*binaryOpExpr); ok {
+	if be, ok := e.(*metricsql.BinaryOpExpr); ok {
 		left, err := evalExpr(ec, be.Left)
 		if err != nil {
 			return nil, err
@@ -259,18 +268,18 @@ func evalExpr(ec *EvalConfig, e expr) ([]*timeseries, error) {
 		}
 		return rv, nil
 	}
-	if ne, ok := e.(*numberExpr); ok {
+	if ne, ok := e.(*metricsql.NumberExpr); ok {
 		rv := evalNumber(ec, ne.N)
 		return rv, nil
 	}
-	if se, ok := e.(*stringExpr); ok {
+	if se, ok := e.(*metricsql.StringExpr); ok {
 		rv := evalString(ec, se.S)
 		return rv, nil
 	}
 	return nil, fmt.Errorf("unexpected expression %q", e.AppendString(nil))
 }

-func tryGetArgRollupFuncWithMetricExpr(ae *aggrFuncExpr) (*funcExpr, newRollupFunc) {
+func tryGetArgRollupFuncWithMetricExpr(ae *metricsql.AggrFuncExpr) (*metricsql.FuncExpr, newRollupFunc) {
 	if len(ae.Args) != 1 {
 		return nil, nil
 	}
@@ -281,31 +290,31 @@ func tryGetArgRollupFuncWithMetricExpr(ae *aggrFuncExpr) (*funcExpr, newRollupFu
 	// - rollupFunc(metricExpr)
 	// - rollupFunc(metricExpr[d])

-	if me, ok := e.(*metricExpr); ok {
+	if me, ok := e.(*metricsql.MetricExpr); ok {
 		// e = metricExpr
 		if me.IsEmpty() {
 			return nil, nil
 		}
-		fe := &funcExpr{
+		fe := &metricsql.FuncExpr{
 			Name: "default_rollup",
-			Args: []expr{me},
+			Args: []metricsql.Expr{me},
 		}
 		nrf := getRollupFunc(fe.Name)
 		return fe, nrf
 	}
-	if re, ok := e.(*rollupExpr); ok {
-		if me, ok := re.Expr.(*metricExpr); !ok || me.IsEmpty() || re.ForSubquery() {
+	if re, ok := e.(*metricsql.RollupExpr); ok {
+		if me, ok := re.Expr.(*metricsql.MetricExpr); !ok || me.IsEmpty() || re.ForSubquery() {
 			return nil, nil
 		}
 		// e = metricExpr[d]
-		fe := &funcExpr{
+		fe := &metricsql.FuncExpr{
 			Name: "default_rollup",
-			Args: []expr{re},
+			Args: []metricsql.Expr{re},
 		}
 		nrf := getRollupFunc(fe.Name)
 		return fe, nrf
 	}
-	fe, ok := e.(*funcExpr)
+	fe, ok := e.(*metricsql.FuncExpr)
 	if !ok {
 		return nil, nil
 	}
@@ -314,19 +323,23 @@ func tryGetArgRollupFuncWithMetricExpr(ae *aggrFuncExpr) (*funcExpr, newRollupFu
 		return nil, nil
 	}
 	rollupArgIdx := getRollupArgIdx(fe.Name)
+	if rollupArgIdx >= len(fe.Args) {
+		// Incorrect number of args for rollup func.
+		return nil, nil
+	}
 	arg := fe.Args[rollupArgIdx]
-	if me, ok := arg.(*metricExpr); ok {
+	if me, ok := arg.(*metricsql.MetricExpr); ok {
 		if me.IsEmpty() {
 			return nil, nil
 		}
 		// e = rollupFunc(metricExpr)
-		return &funcExpr{
+		return &metricsql.FuncExpr{
 			Name: fe.Name,
-			Args: []expr{me},
+			Args: []metricsql.Expr{me},
 		}, nrf
 	}
-	if re, ok := arg.(*rollupExpr); ok {
-		if me, ok := re.Expr.(*metricExpr); !ok || me.IsEmpty() || re.ForSubquery() {
+	if re, ok := arg.(*metricsql.RollupExpr); ok {
+		if me, ok := re.Expr.(*metricsql.MetricExpr); !ok || me.IsEmpty() || re.ForSubquery() {
 			return nil, nil
 		}
 		// e = rollupFunc(metricExpr[d])
@@ -335,7 +348,7 @@ func tryGetArgRollupFuncWithMetricExpr(ae *aggrFuncExpr) (*funcExpr, newRollupFu
 	return nil, nil
 }

-func evalExprs(ec *EvalConfig, es []expr) ([][]*timeseries, error) {
+func evalExprs(ec *EvalConfig, es []metricsql.Expr) ([][]*timeseries, error) {
 	var rvs [][]*timeseries
 	for _, e := range es {
 		rv, err := evalExpr(ec, e)
@@ -347,9 +360,12 @@ func evalExprs(ec *EvalConfig, es []expr) ([][]*timeseries, error) {
 	return rvs, nil
 }

-func evalRollupFuncArgs(ec *EvalConfig, fe *funcExpr) ([]interface{}, *rollupExpr, error) {
-	var re *rollupExpr
+func evalRollupFuncArgs(ec *EvalConfig, fe *metricsql.FuncExpr) ([]interface{}, *metricsql.RollupExpr, error) {
+	var re *metricsql.RollupExpr
 	rollupArgIdx := getRollupArgIdx(fe.Name)
+	if len(fe.Args) <= rollupArgIdx {
+		return nil, nil, fmt.Errorf("expecting at least %d args to %q; got %d args; expr: %q", rollupArgIdx+1, fe.Name, len(fe.Args), fe.AppendString(nil))
+	}
 	args := make([]interface{}, len(fe.Args))
 	for i, arg := range fe.Args {
 		if i == rollupArgIdx {
@@ -366,11 +382,11 @@ func evalRollupFuncArgs(ec *EvalConfig, fe *funcExpr) ([]interface{}, *rollupExp
 	return args, re, nil
 }

-func getRollupExprArg(arg expr) *rollupExpr {
-	re, ok := arg.(*rollupExpr)
+func getRollupExprArg(arg metricsql.Expr) *metricsql.RollupExpr {
+	re, ok := arg.(*metricsql.RollupExpr)
 	if !ok {
-		// Wrap non-rollup arg into rollupExpr.
-		return &rollupExpr{
+		// Wrap non-rollup arg into metricsql.RollupExpr.
+		return &metricsql.RollupExpr{
 			Expr: arg,
 		}
 	}
@@ -378,45 +394,50 @@ func getRollupExprArg(arg expr) *rollupExpr {
 		// Return standard rollup if it doesn't contain subquery.
 		return re
 	}
-	me, ok := re.Expr.(*metricExpr)
+	me, ok := re.Expr.(*metricsql.MetricExpr)
 	if !ok {
 		// arg contains subquery.
 		return re
 	}
 	// Convert me[w:step] -> default_rollup(me)[w:step]
 	reNew := *re
-	reNew.Expr = &funcExpr{
+	reNew.Expr = &metricsql.FuncExpr{
 		Name: "default_rollup",
-		Args: []expr{
-			&rollupExpr{Expr: me},
+		Args: []metricsql.Expr{
+			&metricsql.RollupExpr{Expr: me},
 		},
 	}
 	return &reNew
 }

-func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, re *rollupExpr, iafc *incrementalAggrFuncContext) ([]*timeseries, error) {
+func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, expr metricsql.Expr, re *metricsql.RollupExpr, iafc *incrementalAggrFuncContext) ([]*timeseries, error) {
 	ecNew := ec
 	var offset int64
 	if len(re.Offset) > 0 {
 		var err error
-		offset, err = DurationValue(re.Offset, ec.Step)
+		offset, err = metricsql.DurationValue(re.Offset, ec.Step)
 		if err != nil {
 			return nil, err
 		}
 		ecNew = newEvalConfig(ec)
 		ecNew.Start -= offset
 		ecNew.End -= offset
-		ecNew.Start, ecNew.End = AdjustStartEnd(ecNew.Start, ecNew.End, ecNew.Step)
+		if ecNew.MayCache {
+			start, end := AdjustStartEnd(ecNew.Start, ecNew.End, ecNew.Step)
+			offset += ecNew.Start - start
+			ecNew.Start = start
+			ecNew.End = end
+		}
 	}
 	var rvs []*timeseries
 	var err error
-	if me, ok := re.Expr.(*metricExpr); ok {
-		rvs, err = evalRollupFuncWithMetricExpr(ecNew, name, rf, me, iafc, re.Window)
+	if me, ok := re.Expr.(*metricsql.MetricExpr); ok {
+		rvs, err = evalRollupFuncWithMetricExpr(ecNew, name, rf, expr, me, iafc, re.Window)
 	} else {
 		if iafc != nil {
 			logger.Panicf("BUG: iafc must be nil for rollup %q over subquery %q", name, re.AppendString(nil))
 		}
-		rvs, err = evalRollupFuncWithSubquery(ecNew, name, rf, re)
+		rvs, err = evalRollupFuncWithSubquery(ecNew, name, rf, expr, re)
 	}
 	if err != nil {
 		return nil, err
@@ -435,12 +456,12 @@ func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, re *rollupExpr,
 	return rvs, nil
 }

-func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, re *rollupExpr) ([]*timeseries, error) {
-	// Do not use rollupResultCacheV here, since it works only with metricExpr.
+func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, expr metricsql.Expr, re *metricsql.RollupExpr) ([]*timeseries, error) {
+	// TODO: determine whether to use rollupResultCacheV here.
 	var step int64
 	if len(re.Step) > 0 {
 		var err error
-		step, err = DurationValue(re.Step, ec.Step)
+		step, err = metricsql.PositiveDurationValue(re.Step, ec.Step)
 		if err != nil {
 			return nil, err
 		}
@@ -450,7 +471,7 @@ func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, re *
 	var window int64
 	if len(re.Window) > 0 {
 		var err error
-		window, err = DurationValue(re.Window, ec.Step)
+		window, err = metricsql.PositiveDurationValue(re.Window, ec.Step)
 		if err != nil {
 			return nil, err
 		}
@@ -467,9 +488,19 @@ func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, re *
 	if err != nil {
 		return nil, err
 	}
+	if len(tssSQ) == 0 {
+		if name == "absent_over_time" {
+			tss := evalNumber(ec, 1)
+			return tss, nil
+		}
+		return nil, nil
+	}

 	sharedTimestamps := getTimestamps(ec.Start, ec.End, ec.Step)
-	preFunc, rcs := getRollupConfigs(name, rf, ec.Start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
+	preFunc, rcs, err := getRollupConfigs(name, rf, expr, ec.Start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
+	if err != nil {
+		return nil, err
+	}
 	tss := make([]*timeseries, 0, len(tssSQ)*len(rcs))
 	var tssLock sync.Mutex
 	removeMetricGroup := !rollupFuncsKeepMetricGroup[name]
@@ -477,6 +508,13 @@ func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, re *
 		values, timestamps = removeNanValues(values[:0], timestamps[:0], tsSQ.Values, tsSQ.Timestamps)
 		preFunc(values, timestamps)
 		for _, rc := range rcs {
+			if tsm := newTimeseriesMap(name, sharedTimestamps, &tsSQ.MetricName); tsm != nil {
+				rc.DoTimeseriesMap(tsm, values, timestamps)
+				tssLock.Lock()
+				tss = tsm.AppendTimeseriesTo(tss)
+				tssLock.Unlock()
+				continue
+			}
 			var ts timeseries
 			doRollupForTimeseries(rc, &ts, &tsSQ.MetricName, values, timestamps, sharedTimestamps, removeMetricGroup)
 			tssLock.Lock()
@@ -544,21 +582,22 @@ var (
 	rollupResultCacheMiss        = metrics.NewCounter(`vm_rollup_result_cache_miss_total`)
 )

-func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me *metricExpr, iafc *incrementalAggrFuncContext, windowStr string) ([]*timeseries, error) {
+func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
+	expr metricsql.Expr, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, windowStr string) ([]*timeseries, error) {
 	if me.IsEmpty() {
 		return evalNumber(ec, nan), nil
 	}
 	var window int64
 	if len(windowStr) > 0 {
 		var err error
-		window, err = DurationValue(windowStr, ec.Step)
+		window, err = metricsql.PositiveDurationValue(windowStr, ec.Step)
 		if err != nil {
 			return nil, err
 		}
 	}

 	// Search for partial results in cache.
-	tssCached, start := rollupResultCacheV.Get(name, ec, me, iafc, window)
+	tssCached, start := rollupResultCacheV.Get(ec, expr, window)
 	if start > ec.End {
 		// The result is fully cached.
 		rollupResultCacheFullHits.Inc()
@@ -570,11 +609,20 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
 		rollupResultCacheMiss.Inc()
 	}

+	// Obtain rollup configs before fetching data from db,
+	// so type errors can be caught earlier.
+	sharedTimestamps := getTimestamps(start, ec.End, ec.Step)
+	preFunc, rcs, err := getRollupConfigs(name, rf, expr, start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
+	if err != nil {
+		return nil, err
+	}
+
 	// Fetch the remaining part of the result.
+	tfs := toTagFilters(me.LabelFilters)
 	sq := &storage.SearchQuery{
 		MinTimestamp: start - window - maxSilenceInterval,
 		MaxTimestamp: ec.End + ec.Step,
-		TagFilterss:  [][]storage.TagFilter{me.TagFilters},
+		TagFilterss:  [][]storage.TagFilter{tfs},
 	}
 	rss, err := netstorage.ProcessSearchQuery(sq, true, ec.Deadline)
 	if err != nil {
@@ -583,14 +631,16 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
 	rssLen := rss.Len()
 	if rssLen == 0 {
 		rss.Cancel()
+		var tss []*timeseries
+		if name == "absent_over_time" {
+			tss = getAbsentTimeseries(ec, me)
+		}
 		// Add missing points until ec.End.
 		// Do not cache the result, since missing points
 		// may be backfilled in the future.
-		tss := mergeTimeseries(tssCached, nil, start, ec)
+		tss = mergeTimeseries(tssCached, tss, start, ec)
 		return tss, nil
 	}
-	sharedTimestamps := getTimestamps(start, ec.End, ec.Step)
-	preFunc, rcs := getRollupConfigs(name, rf, start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)

 	// Verify timeseries fit available memory after the rollup.
 	// Take into account points from tssCached.
@@ -602,8 +652,8 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
 		if iafc.ae.Modifier.Op != "" {
 			// Increase the number of timeseries for non-empty group list: `aggr() by (something)`,
 			// since each group can have own set of time series in memory.
-			// Estimate the number of such groups is lower than 100 :)
-			timeseriesLen *= 100
+			// Estimate the number of such groups is lower than 1000 :)
+			timeseriesLen *= 1000
 		}
 	}
 	rollupPoints := mulNoOverflow(pointsPerTimeseries, int64(timeseriesLen*len(rcs)))
@@ -622,16 +672,15 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
 	removeMetricGroup := !rollupFuncsKeepMetricGroup[name]
 	var tss []*timeseries
 	if iafc != nil {
-		tss, err = evalRollupWithIncrementalAggregate(iafc, rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
+		tss, err = evalRollupWithIncrementalAggregate(name, iafc, rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
 	} else {
-		tss, err = evalRollupNoIncrementalAggregate(rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
+		tss, err = evalRollupNoIncrementalAggregate(name, rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
 	}
 	if err != nil {
 		return nil, err
 	}
 	tss = mergeTimeseries(tssCached, tss, start, ec)
-	rollupResultCacheV.Put(name, ec, me, iafc, window, tss)
-
+	rollupResultCacheV.Put(ec, expr, window, tss)
 	return tss, nil
 }

@@ -647,13 +696,20 @@ func getRollupMemoryLimiter() *memoryLimiter {
 	return &rollupMemoryLimiter
 }

-func evalRollupWithIncrementalAggregate(iafc *incrementalAggrFuncContext, rss *netstorage.Results, rcs []*rollupConfig,
+func evalRollupWithIncrementalAggregate(name string, iafc *incrementalAggrFuncContext, rss *netstorage.Results, rcs []*rollupConfig,
 	preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64, removeMetricGroup bool) ([]*timeseries, error) {
 	err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
 		preFunc(rs.Values, rs.Timestamps)
 		ts := getTimeseries()
 		defer putTimeseries(ts)
 		for _, rc := range rcs {
+			if tsm := newTimeseriesMap(name, sharedTimestamps, &rs.MetricName); tsm != nil {
+				rc.DoTimeseriesMap(tsm, rs.Values, rs.Timestamps)
+				for _, ts := range tsm.m {
+					iafc.updateTimeseries(ts, workerID)
+				}
+				continue
+			}
 			ts.Reset()
 			doRollupForTimeseries(rc, ts, &rs.MetricName, rs.Values, rs.Timestamps, sharedTimestamps, removeMetricGroup)
 			iafc.updateTimeseries(ts, workerID)
@@ -670,13 +726,20 @@ func evalRollupWithIncrementalAggregate(iafc *incrementalAggrFuncContext, rss *n
 	return tss, nil
 }

-func evalRollupNoIncrementalAggregate(rss *netstorage.Results, rcs []*rollupConfig,
+func evalRollupNoIncrementalAggregate(name string, rss *netstorage.Results, rcs []*rollupConfig,
 	preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64, removeMetricGroup bool) ([]*timeseries, error) {
 	tss := make([]*timeseries, 0, rss.Len()*len(rcs))
 	var tssLock sync.Mutex
 	err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
 		preFunc(rs.Values, rs.Timestamps)
 		for _, rc := range rcs {
+			if tsm := newTimeseriesMap(name, sharedTimestamps, &rs.MetricName); tsm != nil {
+				rc.DoTimeseriesMap(tsm, rs.Values, rs.Timestamps)
+				tssLock.Lock()
+				tss = tsm.AppendTimeseriesTo(tss)
+				tssLock.Unlock()
+				continue
+			}
 			var ts timeseries
 			doRollupForTimeseries(rc, &ts, &rs.MetricName, rs.Values, rs.Timestamps, sharedTimestamps, removeMetricGroup)
 			tssLock.Lock()
@@ -704,62 +767,6 @@ func doRollupForTimeseries(rc *rollupConfig, tsDst *timeseries, mnSrc *storage.M
 	tsDst.denyReuse = true
 }

-func getRollupConfigs(name string, rf rollupFunc, start, end, step, window int64, lookbackDelta int64, sharedTimestamps []int64) (
-	func(values []float64, timestamps []int64), []*rollupConfig) {
-	preFunc := func(values []float64, timestamps []int64) {}
-	if rollupFuncsRemoveCounterResets[name] {
-		preFunc = func(values []float64, timestamps []int64) {
-			removeCounterResets(values)
-		}
-	}
-	newRollupConfig := func(rf rollupFunc, tagValue string) *rollupConfig {
-		return &rollupConfig{
-			TagValue:        tagValue,
-			Func:            rf,
-			Start:           start,
-			End:             end,
-			Step:            step,
-			Window:          window,
-			MayAdjustWindow: rollupFuncsMayAdjustWindow[name],
-			LookbackDelta:   lookbackDelta,
-			Timestamps:      sharedTimestamps,
-		}
-	}
-	appendRollupConfigs := func(dst []*rollupConfig) []*rollupConfig {
-		dst = append(dst, newRollupConfig(rollupMin, "min"))
-		dst = append(dst, newRollupConfig(rollupMax, "max"))
-		dst = append(dst, newRollupConfig(rollupAvg, "avg"))
-		return dst
-	}
-	var rcs []*rollupConfig
-	switch name {
-	case "rollup":
-		rcs = appendRollupConfigs(rcs)
-	case "rollup_rate", "rollup_deriv":
-		preFuncPrev := preFunc
-		preFunc = func(values []float64, timestamps []int64) {
-			preFuncPrev(values, timestamps)
-			derivValues(values, timestamps)
-		}
-		rcs = appendRollupConfigs(rcs)
-	case "rollup_increase", "rollup_delta":
-		preFuncPrev := preFunc
-		preFunc = func(values []float64, timestamps []int64) {
-			preFuncPrev(values, timestamps)
-			deltaValues(values)
-		}
-		rcs = appendRollupConfigs(rcs)
-	case "rollup_candlestick":
-		rcs = append(rcs, newRollupConfig(rollupFirst, "open"))
-		rcs = append(rcs, newRollupConfig(rollupLast, "close"))
-		rcs = append(rcs, newRollupConfig(rollupMin, "low"))
-		rcs = append(rcs, newRollupConfig(rollupMax, "high"))
-	default:
-		rcs = append(rcs, newRollupConfig(rf, ""))
-	}
-	return preFunc, rcs
-}
-
 var bbPool bytesutil.ByteBufferPool

 func evalNumber(ec *EvalConfig, n float64) []*timeseries {
@@ -798,3 +805,23 @@ func mulNoOverflow(a, b int64) int64 {
 	}
 	return a * b
 }
+
+func toTagFilters(lfs []metricsql.LabelFilter) []storage.TagFilter {
+	tfs := make([]storage.TagFilter, len(lfs))
+	for i := range lfs {
+		toTagFilter(&tfs[i], &lfs[i])
+	}
+	return tfs
+}
+
+func toTagFilter(dst *storage.TagFilter, src *metricsql.LabelFilter) {
+	if src.Label != "__name__" {
+		dst.Key = []byte(src.Label)
+	} else {
+		// This is required for storage.Search.
+		dst.Key = nil
+	}
+	dst.Value = []byte(src.Value)
+	dst.IsRegexp = src.IsRegexp
+	dst.IsNegative = src.IsNegative
+}
--- a/app/vmselect/promql/exec.go
+++ b/app/vmselect/promql/exec.go
@@ -11,6 +11,7 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/metrics"
 )

@@ -18,17 +19,6 @@ var logSlowQueryDuration = flag.Duration("search.logSlowQueryDuration", 5*time.S

 var slowQueries = metrics.NewCounter(`vm_slow_queries_total`)

-// ExpandWithExprs expands WITH expressions inside q and returns the resulting
-// PromQL without WITH expressions.
-func ExpandWithExprs(q string) (string, error) {
-	e, err := parsePromQLWithCache(q)
-	if err != nil {
-		return "", err
-	}
-	buf := e.AppendString(nil)
-	return string(buf), nil
-}
-
 // Exec executes q for the given ec.
 func Exec(ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result, error) {
 	if *logSlowQueryDuration > 0 {
@@ -36,8 +26,8 @@ func Exec(ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result,
 		defer func() {
 			d := time.Since(startTime)
 			if d >= *logSlowQueryDuration {
-				logger.Infof("slow query according to -search.logSlowQueryDuration=%s: duration=%s, start=%d, end=%d, step=%d, query=%q",
-					*logSlowQueryDuration, d, ec.Start/1000, ec.End/1000, ec.Step/1000, q)
+				logger.Infof("slow query according to -search.logSlowQueryDuration=%s: duration=%.3f seconds, start=%d, end=%d, step=%d, query=%q",
+					*logSlowQueryDuration, d.Seconds(), ec.Start/1000, ec.End/1000, ec.Step/1000, q)
 				slowQueries.Inc()
 			}
 		}()
@@ -85,12 +75,12 @@ func Exec(ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result,
 	return result, err
 }

-func maySortResults(e expr, tss []*timeseries) bool {
+func maySortResults(e metricsql.Expr, tss []*timeseries) bool {
 	if len(tss) > 100 {
 		// There is no sense in sorting a lot of results
 		return false
 	}
-	fe, ok := e.(*funcExpr)
+	fe, ok := e.(*metricsql.FuncExpr)
 	if !ok {
 		return true
 	}
@@ -154,10 +144,10 @@ func removeNaNs(tss []*timeseries) []*timeseries {
 	return rvs
 }

-func parsePromQLWithCache(q string) (expr, error) {
+func parsePromQLWithCache(q string) (metricsql.Expr, error) {
 	pcv := parseCacheV.Get(q)
 	if pcv == nil {
-		e, err := parsePromQL(q)
+		e, err := metricsql.Parse(q)
 		pcv = &parseCacheValue{
 			e:   e,
 			err: err,
@@ -189,7 +179,7 @@ var parseCacheV = func() *parseCache {
 const parseCacheMaxLen = 10e3

 type parseCacheValue struct {
-	e   expr
+	e   metricsql.Expr
 	err error
 }

--- a/app/vmselect/promql/exec_test.go
+++ b/app/vmselect/promql/exec_test.go
--- a/app/vmselect/promql/parser.go
+++ b/app/vmselect/promql/parser.go
--- a/app/vmselect/promql/parser_test.go
+++ b/app/vmselect/promql/parser_test.go
@@ -47,672 +47,3 @@ func TestParseMetricSelectorError(t *testing.T) {
 	f(`foo[5m]`)
 	f(`foo offset 5m`)
 }
-
-func TestParsePromQLSuccess(t *testing.T) {
-	another := func(s string, sExpected string) {
-		t.Helper()
-
-		e, err := parsePromQL(s)
-		if err != nil {
-			t.Fatalf("unexpected error when parsing %q: %s", s, err)
-		}
-		res := e.AppendString(nil)
-		if string(res) != sExpected {
-			t.Fatalf("unexpected string constructed;\ngot\n%q\nwant\n%q", res, sExpected)
-		}
-	}
-	same := func(s string) {
-		t.Helper()
-		another(s, s)
-	}
-
-	// metricExpr
-	same(`{}`)
-	same(`{}[5m]`)
-	same(`{}[5m:]`)
-	same(`{}[:]`)
-	another(`{}[: ]`, `{}[:]`)
-	same(`{}[:3s]`)
-	another(`{}[: 3s ]`, `{}[:3s]`)
-	same(`{}[5m:3s]`)
-	another(`{}[ 5m : 3s ]`, `{}[5m:3s]`)
-	same(`{} offset 5m`)
-	same(`{}[5m] offset 10y`)
-	same(`{}[5.3m:3.4s] offset 10y`)
-	same(`{}[:3.4s] offset 10y`)
-	same(`{Foo="bAR"}`)
-	same(`{foo="bar"}`)
-	same(`{foo="bar"}[5m]`)
-	same(`{foo="bar"}[5m:]`)
-	same(`{foo="bar"}[5m:3s]`)
-	same(`{foo="bar"} offset 10y`)
-	same(`{foo="bar"}[5m] offset 10y`)
-	same(`{foo="bar"}[5m:3s] offset 10y`)
-	another(`{foo="bar"}[5m] oFFSEt 10y`, `{foo="bar"}[5m] offset 10y`)
-	same("METRIC")
-	same("metric")
-	same("m_e:tri44:_c123")
-	another("-metric", "0 - metric")
-	same(`metric offset 10h`)
-	same("metric[5m]")
-	same("metric[5m:3s]")
-	same("metric[5m] offset 10h")
-	same("metric[5m:3s] offset 10h")
-	same("metric[5i:3i] offset 10i")
-	same(`metric{foo="bar"}`)
-	same(`metric{foo="bar"} offset 10h`)
-	same(`metric{foo!="bar"}[2d]`)
-	same(`metric{foo="bar"}[2d] offset 10h`)
-	same(`metric{foo="bar", b="sdfsdf"}[2d:3h] offset 10h`)
-	another(`  metric  {  foo  = "bar"  }  [  2d ]   offset   10h  `, `metric{foo="bar"}[2d] offset 10h`)
-	// metric name matching keywords
-	same("rate")
-	same("RATE")
-	same("by")
-	same("BY")
-	same("bool")
-	same("BOOL")
-	same("unless")
-	same("UNLESS")
-	same("Ignoring")
-	same("with")
-	same("WITH")
-	same("With")
-	same("alias")
-	same(`alias{foo="bar"}`)
-	same(`aLIas{alias="aa"}`)
-	another(`al\ias`, `alias`)
-	// identifiers with with escape chars
-	same(`foo\ bar`)
-	same(`foo\-bar\{{baz\+bar="aa"}`)
-	another(`\x2E\x2ef\oo{b\xEF\ar="aa"}`, `\x2e.foo{b\xefar="aa"}`)
-	// Duplicate filters
-	same(`foo{__name__="bar"}`)
-	same(`foo{a="b", a="c", __name__="aaa", b="d"}`)
-	// Metric filters ending with comma
-	another(`m{foo="bar",}`, `m{foo="bar"}`)
-	// String concat in tag value
-	another(`m{foo="bar" + "baz"}`, `m{foo="barbaz"}`)
-
-	// Valid regexp
-	same(`foo{bar=~"x"}`)
-	same(`foo{bar=~"^x"}`)
-	same(`foo{bar=~"^x$"}`)
-	same(`foo{bar=~"^(a[bc]|d)$"}`)
-	same(`foo{bar!~"x"}`)
-	same(`foo{bar!~"^x"}`)
-	same(`foo{bar!~"^x$"}`)
-	same(`foo{bar!~"^(a[bc]|d)$"}`)
-
-	// stringExpr
-	same(`""`)
-	same(`"\n\t\r 12:{}[]()44"`)
-	another(`''`, `""`)
-	another("``", `""`)
-	another("   `foo\"b'ar`  ", "\"foo\\\"b'ar\"")
-	another(`  'foo\'bar"BAZ'  `, `"foo'bar\"BAZ"`)
-	// string concat
-	another(`"foo"+'bar'`, `"foobar"`)
-
-	// numberExpr
-	same(`1`)
-	same(`1.23`)
-	same(`0.23`)
-	same(`1.2e+45`)
-	same(`1.2e-45`)
-	same(`-1`)
-	same(`-1.23`)
-	same(`-0.23`)
-	same(`-1.2e+45`)
-	same(`-1.2e-45`)
-	same(`-1.2e-45`)
-	another(`12.5E34`, `1.25e+35`)
-	another(`-.2`, `-0.2`)
-	another(`-.2E-2`, `-0.002`)
-	same(`NaN`)
-	another(`nan`, `NaN`)
-	another(`NAN`, `NaN`)
-	another(`nAN`, `NaN`)
-	another(`Inf`, `+Inf`)
-	another(`INF`, `+Inf`)
-	another(`inf`, `+Inf`)
-	another(`+Inf`, `+Inf`)
-	another(`-Inf`, `-Inf`)
-	another(`-inF`, `-Inf`)
-
-	// binaryOpExpr
-	another(`nan == nan`, `NaN`)
-	another(`nan ==bool nan`, `1`)
-	another(`nan !=bool nan`, `0`)
-	another(`nan !=bool 2`, `1`)
-	another(`2 !=bool nan`, `1`)
-	another(`nan >bool nan`, `0`)
-	another(`nan <bool nan`, `0`)
-	another(`1 ==bool nan`, `0`)
-	another(`NaN !=bool 1`, `1`)
-	another(`inf >=bool 2`, `1`)
-	another(`-1 >bool -inf`, `1`)
-	another(`-1 <bool -inf`, `0`)
-	another(`nan + 2 *3 * inf`, `NaN`)
-	another(`INF - Inf`, `NaN`)
-	another(`Inf + inf`, `+Inf`)
-	another(`1/0`, `+Inf`)
-	another(`0/0`, `NaN`)
-	another(`-m`, `0 - m`)
-	same(`m + ignoring () n[5m]`)
-	another(`M + IGNORING () N[5m]`, `M + ignoring () N[5m]`)
-	same(`m + on (foo) n[5m]`)
-	another(`m + ON (Foo) n[5m]`, `m + on (Foo) n[5m]`)
-	same(`m + ignoring (a, b) n[5m]`)
-	another(`1 or 2`, `1`)
-	another(`1 and 2`, `1`)
-	another(`1 unless 2`, `NaN`)
-	another(`1 default 2`, `1`)
-	another(`1 default NaN`, `1`)
-	another(`NaN default 2`, `2`)
-	another(`1 > 2`, `NaN`)
-	another(`1 > bool 2`, `0`)
-	another(`3 >= 2`, `3`)
-	another(`3 <= bool 2`, `0`)
-	another(`1 + -2 - 3`, `-4`)
-	another(`1 / 0 + 2`, `+Inf`)
-	another(`2 + -1 / 0`, `-Inf`)
-	another(`-1 ^ 0.5`, `NaN`)
-	another(`512.5 - (1 + 3) * (2 ^ 2) ^ 3`, `256.5`)
-	another(`1 == bool 1 != bool 24 < bool 4 > bool -1`, `1`)
-	another(`1 == bOOl 1 != BOOL 24 < Bool 4 > booL -1`, `1`)
-	another(`m1+on(foo)group_left m2`, `m1 + on (foo) group_left () m2`)
-	another(`M1+ON(FOO)GROUP_left M2`, `M1 + on (FOO) group_left () M2`)
-	same(`m1 + on (foo) group_right () m2`)
-	same(`m1 + on (foo, bar) group_right (x, y) m2`)
-	another(`m1 + on (foo, bar,) group_right (x, y,) m2`, `m1 + on (foo, bar) group_right (x, y) m2`)
-	same(`m1 == bool on (foo, bar) group_right (x, y) m2`)
-	another(`5 - 1 + 3 * 2 ^ 2 ^ 3 - 2  OR Metric {Bar= "Baz", aaa!="bb",cc=~"dd" ,zz !~"ff" } `,
-		`770 or Metric{Bar="Baz", aaa!="bb", cc=~"dd", zz!~"ff"}`)
-	same(`"foo" + bar()`)
-	same(`"foo" + bar{x="y"}`)
-	same(`("foo"[3s] + bar{x="y"})[5m:3s] offset 10s`)
-	same(`("foo"[3s] + bar{x="y"})[5i:3i] offset 10i`)
-	same(`bar + "foo" offset 3s`)
-	same(`bar + "foo" offset 3i`)
-	another(`1+2 if 2>3`, `NaN`)
-	another(`1+4 if 2<3`, `5`)
-	another(`2+6 default 3 if 2>3`, `8`)
-	another(`2+6 if 2>3 default NaN`, `NaN`)
-	another(`42 if 3>2 if 2+2<5`, `42`)
-	another(`42 if 3>2 if 2+2>=5`, `NaN`)
-	another(`1+2 ifnot 2>3`, `3`)
-	another(`1+4 ifnot 2<3`, `NaN`)
-	another(`2+6 default 3 ifnot 2>3`, `8`)
-	another(`2+6 ifnot 2>3 default NaN`, `8`)
-	another(`42 if 3>2 ifnot 2+2<5`, `NaN`)
-	another(`42 if 3>2 ifnot 2+2>=5`, `42`)
-
-	// parensExpr
-	another(`(-foo + ((bar) / (baz))) + ((23))`, `((0 - foo) + (bar / baz)) + 23`)
-	another(`(FOO + ((Bar) / (baZ))) + ((23))`, `(FOO + (Bar / baZ)) + 23`)
-	same(`(foo, bar)`)
-	another(`((foo, bar),(baz))`, `((foo, bar), baz)`)
-	same(`(foo, (bar, baz), ((x, y), (z, y), xx))`)
-	another(`1+(foo, bar,)`, `1 + (foo, bar)`)
-	another(`((foo(bar,baz)), (1+(2)+(3,4)+()))`, `(foo(bar, baz), (3 + (3, 4)) + ())`)
-	same(`()`)
-
-	// funcExpr
-	same(`f()`)
-	another(`f(x,)`, `f(x)`)
-	another(`-f()-Ff()`, `(0 - f()) - Ff()`)
-	same(`F()`)
-	another(`+F()`, `F()`)
-	another(`++F()`, `F()`)
-	another(`--F()`, `0 - (0 - F())`)
-	same(`f(http_server_request)`)
-	same(`f(http_server_request)[4s:5m] offset 10m`)
-	same(`f(http_server_request)[4i:5i] offset 10i`)
-	same(`F(HttpServerRequest)`)
-	same(`f(job, foo)`)
-	same(`F(Job, Foo)`)
-	another(` FOO (bar) + f  (  m  (  ),ff(1 + (  2.5)) ,M[5m ]  , "ff"  )`, `FOO(bar) + f(m(), ff(3.5), M[5m], "ff")`)
-	// funcName matching keywords
-	same(`by(2)`)
-	same(`BY(2)`)
-	same(`or(2)`)
-	same(`OR(2)`)
-	same(`bool(2)`)
-	same(`BOOL(2)`)
-	same(`rate(rate(m))`)
-	same(`rate(rate(m[5m]))`)
-	same(`rate(rate(m[5m])[1h:])`)
-	same(`rate(rate(m[5m])[1h:3s])`)
-	// funcName with escape chars
-	same(`foo\(ba\-r()`)
-
-	// aggrFuncExpr
-	same(`sum(http_server_request) by ()`)
-	same(`sum(http_server_request) by (job)`)
-	same(`sum(http_server_request) without (job, foo)`)
-	another(`sum(x,y,) without (a,b,)`, `sum(x, y) without (a, b)`)
-	another(`sum by () (xx)`, `sum(xx) by ()`)
-	another(`sum by (s) (xx)[5s]`, `(sum(xx) by (s))[5s]`)
-	another(`SUM BY (ZZ, aa) (XX)`, `sum(XX) by (ZZ, aa)`)
-	another(`sum without (a, b) (xx,2+2)`, `sum(xx, 4) without (a, b)`)
-	another(`Sum WIthout (a, B) (XX,2+2)`, `sum(XX, 4) without (a, B)`)
-	same(`sum(a) or sum(b)`)
-	same(`sum(a) by () or sum(b) without (x, y)`)
-	same(`sum(a) + sum(b)`)
-	same(`sum(x) * (1 + sum(a))`)
-
-	// All the above
-	another(`Sum(Ff(M) * M{X=""}[5m] Offset 7m - 123, 35) BY (X, y) * F2("Test")`,
-		`sum((Ff(M) * M{X=""}[5m] offset 7m) - 123, 35) by (X, y) * F2("Test")`)
-	another(`# comment
-		Sum(Ff(M) * M{X=""}[5m] Offset 7m - 123, 35) BY (X, y) # yet another comment
-		* F2("Test")`,
-		`sum((Ff(M) * M{X=""}[5m] offset 7m) - 123, 35) by (X, y) * F2("Test")`)
-
-	// withExpr
-	another(`with () x`, `x`)
-	another(`with (x=1,) x`, `1`)
-	another(`with (x = m offset 5h) x + x`, `m offset 5h + m offset 5h`)
-	another(`with (x = m offset 5i) x + x`, `m offset 5i + m offset 5i`)
-	another(`with (foo = bar{x="x"}) 1`, `1`)
-	another(`with (foo = bar{x="x"}) "x"`, `"x"`)
-	another(`with (f="x") f`, `"x"`)
-	another(`with (foo = bar{x="x"}) x{x="y"}`, `x{x="y"}`)
-	another(`with (foo = bar{x="x"}) 1+1`, `2`)
-	another(`with (foo = bar{x="x"}) f()`, `f()`)
-	another(`with (foo = bar{x="x"}) sum(x)`, `sum(x)`)
-	another(`with (foo = bar{x="x"}) baz{foo="bar"}`, `baz{foo="bar"}`)
-	another(`with (foo = bar) baz`, `baz`)
-	another(`with (foo = bar) foo + foo{a="b"}`, `bar + bar{a="b"}`)
-	another(`with (foo = bar, bar=baz + f()) test`, `test`)
-	another(`with (ct={job="test"}) a{ct} + ct() + f({ct="x"})`, `(a{job="test"} + {job="test"}) + f({ct="x"})`)
-	another(`with (ct={job="test", i="bar"}) ct + {ct, x="d"} + foo{ct, ct} + ctx(1)`,
-		`(({job="test", i="bar"} + {job="test", i="bar", x="d"}) + foo{job="test", i="bar"}) + ctx(1)`)
-	another(`with (foo = bar) {__name__=~"foo"}`, `{__name__=~"foo"}`)
-	another(`with (foo = bar) foo{__name__="foo"}`, `bar`)
-	another(`with (foo = bar) {__name__="foo", x="y"}`, `bar{x="y"}`)
-	another(`with (foo(bar) = {__name__!="bar"}) foo(x)`, `{__name__!="bar"}`)
-	another(`with (foo(bar) = bar{__name__="bar"}) foo(x)`, `x`)
-	another(`with (foo\-bar(baz) = baz + baz) foo\-bar((x,y))`, `(x, y) + (x, y)`)
-	another(`with (foo\-bar(baz) = baz + baz) foo\-bar(x*y)`, `(x * y) + (x * y)`)
-	another(`with (foo\-bar(baz) = baz + baz) foo\-bar(x\*y)`, `x\*y + x\*y`)
-	another(`with (foo\-bar(b\ az) = b\ az + b\ az) foo\-bar(x\*y)`, `x\*y + x\*y`)
-	// override ttf to something new.
-	another(`with (ttf = a) ttf + b`, `a + b`)
-	// override ttf to ru
-	another(`with (ttf = ru(m, n)) ttf`, `(clamp_min(n - clamp_min(m, 0), 0) / clamp_min(n, 0)) * 100`)
-
-	// Verify withExpr recursion and forward reference
-	another(`with (x = x+y, y = x+x) y ^ 2`, `((x + y) + (x + y)) ^ 2`)
-	another(`with (f1(x)=f2(x), f2(x)=f1(x)^2) f1(foobar)`, `f2(foobar)`)
-	another(`with (f1(x)=f2(x), f2(x)=f1(x)^2) f2(foobar)`, `f2(foobar) ^ 2`)
-
-	// Verify withExpr funcs
-	another(`with (x() = y+1) x`, `y + 1`)
-	another(`with (x(foo) = foo+1) x(a)`, `a + 1`)
-	another(`with (x(a, b) = a + b) x(foo, bar)`, `foo + bar`)
-	another(`with (x(a, b) = a + b) x(foo, x(1, 2))`, `foo + 3`)
-	another(`with (x(a) = sum(a) by (b)) x(xx) / x(y)`, `sum(xx) by (b) / sum(y) by (b)`)
-	another(`with (f(a,f,x)=ff(x,f,a)) f(f(x,y,z),1,2)`, `ff(2, 1, ff(z, y, x))`)
-	another(`with (f(x)=1+f(x)) f(foo{bar="baz"})`, `1 + f(foo{bar="baz"})`)
-	another(`with (a=foo, y=bar, f(a)= a+a+y) f(x)`, `(x + x) + bar`)
-	another(`with (f(a, b) = m{a, b}) f({a="x", b="y"}, {c="d"})`, `m{a="x", b="y", c="d"}`)
-	another(`with (xx={a="x"}, f(a, b) = m{a, b}) f({xx, b="y"}, {c="d"})`, `m{a="x", b="y", c="d"}`)
-	another(`with (x() = {b="c"}) foo{x}`, `foo{b="c"}`)
-	another(`with (f(x)=x{foo="bar"} offset 5m) f(m offset 10m)`, `(m{foo="bar"} offset 10m) offset 5m`)
-	another(`with (f(x)=x{foo="bar",bas="a"}[5m]) f(m[10m] offset 3s)`, `(m{foo="bar", bas="a"}[10m] offset 3s)[5m]`)
-	another(`with (f(x)=x{foo="bar"}[5m] offset 10m) f(m{x="y"})`, `m{x="y", foo="bar"}[5m] offset 10m`)
-	another(`with (f(x)=x{foo="bar"}[5m] offset 10m) f({x="y", foo="bar", foo="bar"})`, `{x="y", foo="bar"}[5m] offset 10m`)
-	another(`with (f(m, x)=m{x}[5m] offset 10m) f(foo, {})`, `foo[5m] offset 10m`)
-	another(`with (f(m, x)=m{x, bar="baz"}[5m] offset 10m) f(foo, {})`, `foo{bar="baz"}[5m] offset 10m`)
-	another(`with (f(x)=x[5m] offset 3s) f(foo[3m]+bar)`, `(foo[3m] + bar)[5m] offset 3s`)
-	another(`with (f(x)=x[5m:3s] oFFsEt 1.5m) f(sum(s) by (a,b))`, `(sum(s) by (a, b))[5m:3s] offset 1.5m`)
-	another(`with (x="a", y=x) y+"bc"`, `"abc"`)
-	another(`with (x="a", y="b"+x) "we"+y+"z"+f()`, `"webaz" + f()`)
-	another(`with (f(x) = m{foo=x+"y", bar="y"+x, baz=x} + x) f("qwe")`, `m{foo="qwey", bar="yqwe", baz="qwe"} + "qwe"`)
-	another(`with (f(a)=a) f`, `f`)
-	another(`with (f\q(a)=a) f\q`, `fq`)
-
-	// Verify withExpr for aggr func modifiers
-	another(`with (f(x) = x, y = sum(m) by (f)) y`, `sum(m) by (f)`)
-	another(`with (f(x) = sum(m) by (x)) f(foo)`, `sum(m) by (foo)`)
-	another(`with (f(x) = sum(m) by (x)) f((foo, bar, foo))`, `sum(m) by (foo, bar)`)
-	another(`with (f(x) = sum(m) without (x,y)) f((a, b))`, `sum(m) without (a, b, y)`)
-	another(`with (f(x) = sum(m) without (y,x)) f((a, y))`, `sum(m) without (y, a)`)
-	another(`with (f(x,y) = a + on (x,y) group_left (y,bar) b) f(foo,())`, `a + on (foo) group_left (bar) b`)
-	another(`with (f(x,y) = a + on (x,y) group_left (y,bar) b) f((foo),())`, `a + on (foo) group_left (bar) b`)
-	another(`with (f(x,y) = a + on (x,y) group_left (y,bar) b) f((foo,xx),())`, `a + on (foo, xx) group_left (bar) b`)
-
-	// Verify nested with exprs
-	another(`with (f(x) = (with(x=y) x) + x) f(z)`, `y + z`)
-	another(`with (x=foo) f(a, with (y=x) y)`, `f(a, foo)`)
-	another(`with (x=foo) a * x + (with (y=x) y) / y`, `(a * foo) + (foo / y)`)
-	another(`with (x = with (y = foo) y + x) x/x`, `(foo + x) / (foo + x)`)
-	another(`with (
-		x = {foo="bar"},
-		q = m{x, y="1"},
-		f(x) =
-			with (
-				z(y) = x + y * q
-			)
-			z(foo) / f(x)
-	)
-	f(a)`, `(a + (foo * m{foo="bar", y="1"})) / f(a)`)
-
-	// complex withExpr
-	another(`WITH (
-		treshold = (0.9),
-		commonFilters = {job="cacher", instance=~"1.2.3.4"},
-		hits = rate(cache{type="hit", commonFilters}[5m]),
-		miss = rate(cache{type="miss", commonFilters}[5m]),
-		sumByInstance(arg) = sum(arg) by (instance),
-		hitRatio = sumByInstance(hits) / sumByInstance(hits + miss)
-	)
-	hitRatio < treshold`,
-		`(sum(rate(cache{type="hit", job="cacher", instance=~"1.2.3.4"}[5m])) by (instance) / sum(rate(cache{type="hit", job="cacher", instance=~"1.2.3.4"}[5m]) + rate(cache{type="miss", job="cacher", instance=~"1.2.3.4"}[5m])) by (instance)) < 0.9`)
-	another(`WITH (
-		x2(x) = x^2,
-		f(x, y) = x2(x) + x*y + x2(y)
-	)
-	f(a, 3)
-	`, `((a ^ 2) + (a * 3)) + 9`)
-	another(`WITH (
-		x2(x) = x^2,
-		f(x, y) = x2(x) + x*y + x2(y)
-	)
-	f(2, 3)
-	`, `19`)
-	another(`WITH (
-		commonFilters = {instance="foo"},
-		timeToFuckup(currv, maxv) = (maxv - currv) / rate(currv)
-	)
-	timeToFuckup(diskUsage{commonFilters}, maxDiskSize{commonFilters})`,
-		`(maxDiskSize{instance="foo"} - diskUsage{instance="foo"}) / rate(diskUsage{instance="foo"})`)
-	another(`WITH (
-	       commonFilters = {job="foo", instance="bar"},
-	       sumRate(m, cf) = sum(rate(m{cf})) by (job, instance),
-	       hitRate(hits, misses) = sumRate(hits, commonFilters) / (sumRate(hits, commonFilters) + sumRate(misses, commonFilters))
-	   )
-	   hitRate(cacheHits, cacheMisses)`,
-		`sum(rate(cacheHits{job="foo", instance="bar"})) by (job, instance) / (sum(rate(cacheHits{job="foo", instance="bar"})) by (job, instance) + sum(rate(cacheMisses{job="foo", instance="bar"})) by (job, instance))`)
-	another(`with(y=123,z=5) union(with(y=3,f(x)=x*y) f(2) + f(3), with(x=5,y=2) x*y*z)`, `union(15, 50)`)
-}
-
-func TestParsePromQLError(t *testing.T) {
-	f := func(s string) {
-		t.Helper()
-
-		e, err := parsePromQL(s)
-		if err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
-		}
-		if e != nil {
-			t.Fatalf("expecting nil expr when parsing %q", s)
-		}
-	}
-
-	// an empty string
-	f("")
-	f("  \t\b\r\n  ")
-
-	// invalid metricExpr
-	f(`{__name__="ff"} offset 55`)
-	f(`{__name__="ff"} offset -5m`)
-	f(`foo[55]`)
-	f(`m[-5m]`)
-	f(`{`)
-	f(`foo{`)
-	f(`foo{bar`)
-	f(`foo{bar=`)
-	f(`foo{bar="baz"`)
-	f(`foo{bar="baz",  `)
-	f(`foo{123="23"}`)
-	f(`foo{foo}`)
-	f(`foo{,}`)
-	f(`foo{,foo="bar"}`)
-	f(`foo{foo=}`)
-	f(`foo{foo="ba}`)
-	f(`foo{"foo"="bar"}`)
-	f(`foo{$`)
-	f(`foo{a $`)
-	f(`foo{a="b",$`)
-	f(`foo{a="b"}$`)
-	f(`[`)
-	f(`[]`)
-	f(`f[5m]$`)
-	f(`[5m]`)
-	f(`[5m] offset 4h`)
-	f(`m[5m] offset $`)
-	f(`m[5m] offset 5h $`)
-	f(`m[]`)
-	f(`m[-5m]`)
-	f(`m[5m:`)
-	f(`m[5m:-`)
-	f(`m[5m:-1`)
-	f(`m[5m:-1]`)
-	f(`m[:`)
-	f(`m[:-`)
-	f(`m[:1]`)
-	f(`m[:-1m]`)
-	f(`m[5]`)
-	f(`m[[5m]]`)
-	f(`m[foo]`)
-	f(`m["ff"]`)
-	f(`m[10m`)
-	f(`m[123`)
-	f(`m["ff`)
-	f(`m[(f`)
-	f(`fd}`)
-	f(`]`)
-	f(`m $`)
-	f(`m{,}`)
-	f(`m{x=y}`)
-	f(`m{x=y/5}`)
-	f(`m{x=y+5}`)
-
-	// Invalid regexp
-	f(`foo{bar=~"x["}`)
-	f(`foo{bar=~"x("}`)
-	f(`foo{bar=~"x)"}`)
-	f(`foo{bar!~"x["}`)
-	f(`foo{bar!~"x("}`)
-	f(`foo{bar!~"x)"}`)
-
-	// invalid stringExpr
-	f(`'`)
-	f(`"`)
-	f("`")
-	f(`"foo`)
-	f(`'foo`)
-	f("`foo")
-	f(`"foo\"bar`)
-	f(`'foo\'bar`)
-	f("`foo\\`bar")
-	f(`"" $`)
-	f(`"foo" +`)
-	f(`n{"foo" + m`)
-
-	// invalid numberExpr
-	f(`12.`)
-	f(`1.2e`)
-	f(`23e-`)
-	f(`23E+`)
-	f(`.`)
-	f(`-12.`)
-	f(`-1.2e`)
-	f(`-23e-`)
-	f(`-23E+`)
-	f(`-.`)
-	f(`-1$$`)
-	f(`-$$`)
-	f(`+$$`)
-	f(`23 $$`)
-
-	// invalid binaryOpExpr
-	f(`+`)
-	f(`1 +`)
-	f(`1 + 2.`)
-	f(`3 unless`)
-	f(`23 + on (foo)`)
-	f(`m + on (,) m`)
-	f(`3 * ignoring`)
-	f(`m * on (`)
-	f(`m * on (foo`)
-	f(`m * on (foo,`)
-	f(`m * on (foo,)`)
-	f(`m * on (,foo)`)
-	f(`m * on (,)`)
-	f(`m == bool (bar) baz`)
-	f(`m == bool () baz`)
-	f(`m * by (baz) n`)
-	f(`m + bool group_left m2`)
-	f(`m + on () group_left (`)
-	f(`m + on () group_left (,`)
-	f(`m + on () group_left (,foo`)
-	f(`m + on () group_left (foo,)`)
-	f(`m + on () group_left (,foo)`)
-	f(`m + on () group_left (foo)`)
-	f(`m + on () group_right (foo) (m`)
-	f(`m or ignoring () group_left () n`)
-	f(`1 + bool 2`)
-	f(`m % bool n`)
-	f(`m * bool baz`)
-	f(`M * BOoL BaZ`)
-	f(`foo unless ignoring (bar) group_left xxx`)
-	f(`foo or bool bar`)
-	f(`foo == bool $$`)
-	f(`"foo" + bar`)
-
-	// invalid parensExpr
-	f(`(`)
-	f(`($`)
-	f(`(+`)
-	f(`(1`)
-	f(`(m+`)
-	f(`1)`)
-	f(`(,)`)
-	f(`(1)$`)
-
-	// invalid funcExpr
-	f(`f $`)
-	f(`f($)`)
-	f(`f[`)
-	f(`f()$`)
-	f(`f(`)
-	f(`f(foo`)
-	f(`f(f,`)
-	f(`f(,`)
-	f(`f(,)`)
-	f(`f(,foo)`)
-	f(`f(,foo`)
-	f(`f(foo,$`)
-	f(`f() by (a)`)
-	f(`f without (x) (y)`)
-	f(`f() foo (a)`)
-	f(`f bar (x) (b)`)
-	f(`f bar (x)`)
-
-	// invalid aggrFuncExpr
-	f(`sum(`)
-	f(`sum $`)
-	f(`sum [`)
-	f(`sum($)`)
-	f(`sum()$`)
-	f(`sum(foo) ba`)
-	f(`sum(foo) ba()`)
-	f(`sum(foo) by`)
-	f(`sum(foo) without x`)
-	f(`sum(foo) aaa`)
-	f(`sum(foo) aaa x`)
-	f(`sum() by $`)
-	f(`sum() by (`)
-	f(`sum() by ($`)
-	f(`sum() by (a`)
-	f(`sum() by (a $`)
-	f(`sum() by (a ]`)
-	f(`sum() by (a)$`)
-	f(`sum() by (,`)
-	f(`sum() by (a,$`)
-	f(`sum() by (,)`)
-	f(`sum() by (,a`)
-	f(`sum() by (,a)`)
-	f(`sum() on (b)`)
-	f(`sum() bool`)
-	f(`sum() group_left`)
-	f(`sum() group_right(x)`)
-	f(`sum ba`)
-	f(`sum ba ()`)
-	f(`sum by (`)
-	f(`sum by (a`)
-	f(`sum by (,`)
-	f(`sum by (,)`)
-	f(`sum by (,a`)
-	f(`sum by (,a)`)
-	f(`sum by (a)`)
-	f(`sum by (a) (`)
-	f(`sum by (a) [`)
-	f(`sum by (a) {`)
-	f(`sum by (a) (b`)
-	f(`sum by (a) (b,`)
-	f(`sum by (a) (,)`)
-	f(`avg by (a) (,b)`)
-	f(`sum by (x) (y) by (z)`)
-	f(`sum(m) by (1)`)
-
-	// invalid withExpr
-	f(`with $`)
-	f(`with a`)
-	f(`with a=b c`)
-	f(`with (`)
-	f(`with (x=b)$`)
-	f(`with ($`)
-	f(`with (foo`)
-	f(`with (foo $`)
-	f(`with (x y`)
-	f(`with (x =`)
-	f(`with (x = $`)
-	f(`with (x= y`)
-	f(`with (x= y $`)
-	f(`with (x= y)`)
-	f(`with (x=(`)
-	f(`with (x=[)`)
-	f(`with (x=() x)`)
-	f(`with ($$)`)
-	f(`with (x $$`)
-	f(`with (x = $$)`)
-	f(`with (x = foo) bar{x}`)
-	f(`with (x = {foo="bar"}[5m]) bar{x}`)
-	f(`with (x = {foo="bar"} offset 5m) bar{x}`)
-	f(`with (x = a, x = b) c`)
-	f(`with (x(a, a) = b) c`)
-	f(`with (x=m{f="x"}) foo{x}`)
-	f(`with (sum = x) y`)
-	f(`with (rate(a) = b) c`)
-	f(`with (clamp_min=x) y`)
-	f(`with (f()`)
-	f(`with (a=b c=d) e`)
-	f(`with (f(x)=x^2) m{x}`)
-	f(`with (f(x)=ff()) m{x}`)
-	f(`with (f(x`)
-	f(`with (x=m) a{x} + b`)
-	f(`with (x=m) b + a{x}`)
-	f(`with (x=m) f(b, a{x})`)
-	f(`with (x=m) sum(a{x})`)
-	f(`with (x=m) (a{x})`)
-	f(`with (f(a)=a) f(1, 2)`)
-	f(`with (f(x)=x{foo="bar"}) f(1)`)
-	f(`with (f(x)=x{foo="bar"}) f(m + n)`)
-	f(`with (f = with`)
-	f(`with (,)`)
-	f(`with (1) 2`)
-	f(`with (f(1)=2) 3`)
-	f(`with (f(,)=x) x`)
-	f(`with (x(a) = {b="c"}) foo{x}`)
-	f(`with (f(x) = m{foo=xx}) f("qwe")`)
-	f(`a + with(f(x)=x) f(1,2)`)
-	f(`with (f(x) = sum(m) by (x)) f({foo="bar"})`)
-	f(`with (f(x) = sum(m) by (x)) f((xx(), {foo="bar"}))`)
-	f(`with (f(x) = m + on (x) n) f(xx())`)
-	f(`with (f(x) = m + on (a) group_right (x) n) f(xx())`)
-}
--- a/app/vmselect/promql/rollup.go
+++ b/app/vmselect/promql/rollup.go
@@ -8,12 +8,13 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/histogram"
 )

 var rollupFuncs = map[string]newRollupFunc{
-	"default_rollup": newRollupFuncOneArg(rollupDefault), // default rollup func
-
 	// Standard rollup funcs from PromQL.
 	// See funcs accepting range-vector on https://prometheus.io/docs/prometheus/latest/querying/functions/ .
 	"changes":            newRollupFuncOneArg(rollupChanges),
@@ -35,38 +36,98 @@ var rollupFuncs = map[string]newRollupFunc{
 	"quantile_over_time": newRollupQuantile,
 	"stddev_over_time":   newRollupFuncOneArg(rollupStddev),
 	"stdvar_over_time":   newRollupFuncOneArg(rollupStdvar),
+	"absent_over_time":   newRollupFuncOneArg(rollupAbsent),

 	// Additional rollup funcs.
-	"sum2_over_time":      newRollupFuncOneArg(rollupSum2),
-	"geomean_over_time":   newRollupFuncOneArg(rollupGeomean),
-	"first_over_time":     newRollupFuncOneArg(rollupFirst),
-	"last_over_time":      newRollupFuncOneArg(rollupLast),
-	"distinct_over_time":  newRollupFuncOneArg(rollupDistinct),
-	"increases_over_time": newRollupFuncOneArg(rollupIncreases),
-	"decreases_over_time": newRollupFuncOneArg(rollupDecreases),
-	"integrate":           newRollupFuncOneArg(rollupIntegrate),
-	"ideriv":              newRollupFuncOneArg(rollupIderiv),
-	"lifetime":            newRollupFuncOneArg(rollupLifetime),
-	"lag":                 newRollupFuncOneArg(rollupLag),
-	"scrape_interval":     newRollupFuncOneArg(rollupScrapeInterval),
-	"rollup":              newRollupFuncOneArg(rollupFake),
-	"rollup_rate":         newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
-	"rollup_deriv":        newRollupFuncOneArg(rollupFake),
-	"rollup_delta":        newRollupFuncOneArg(rollupFake),
-	"rollup_increase":     newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
-	"rollup_candlestick":  newRollupFuncOneArg(rollupFake),
+	"default_rollup":        newRollupFuncOneArg(rollupDefault), // default rollup func
+	"range_over_time":       newRollupFuncOneArg(rollupRange),
+	"sum2_over_time":        newRollupFuncOneArg(rollupSum2),
+	"geomean_over_time":     newRollupFuncOneArg(rollupGeomean),
+	"first_over_time":       newRollupFuncOneArg(rollupFirst),
+	"last_over_time":        newRollupFuncOneArg(rollupLast),
+	"distinct_over_time":    newRollupFuncOneArg(rollupDistinct),
+	"increases_over_time":   newRollupFuncOneArg(rollupIncreases),
+	"decreases_over_time":   newRollupFuncOneArg(rollupDecreases),
+	"integrate":             newRollupFuncOneArg(rollupIntegrate),
+	"ideriv":                newRollupFuncOneArg(rollupIderiv),
+	"lifetime":              newRollupFuncOneArg(rollupLifetime),
+	"lag":                   newRollupFuncOneArg(rollupLag),
+	"scrape_interval":       newRollupFuncOneArg(rollupScrapeInterval),
+	"tmin_over_time":        newRollupFuncOneArg(rollupTmin),
+	"tmax_over_time":        newRollupFuncOneArg(rollupTmax),
+	"share_le_over_time":    newRollupShareLE,
+	"share_gt_over_time":    newRollupShareGT,
+	"histogram_over_time":   newRollupFuncOneArg(rollupHistogram),
+	"rollup":                newRollupFuncOneArg(rollupFake),
+	"rollup_rate":           newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
+	"rollup_deriv":          newRollupFuncOneArg(rollupFake),
+	"rollup_delta":          newRollupFuncOneArg(rollupFake),
+	"rollup_increase":       newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
+	"rollup_candlestick":    newRollupFuncOneArg(rollupFake),
+	"aggr_over_time":        newRollupFuncTwoArgs(rollupFake),
+	"hoeffding_bound_upper": newRollupHoeffdingBoundUpper,
+	"hoeffding_bound_lower": newRollupHoeffdingBoundLower,
 }

-var rollupFuncsMayAdjustWindow = map[string]bool{
-	"default_rollup":  true,
-	"first_over_time": true,
-	"last_over_time":  true,
-	"deriv":           true,
-	"deriv_fast":      true,
-	"irate":           true,
-	"rate":            true,
-	"lifetime":        true,
-	"scrape_interval": true,
+// rollupAggrFuncs are functions that can be passed to `aggr_over_time()`
+var rollupAggrFuncs = map[string]rollupFunc{
+	// Standard rollup funcs from PromQL.
+	"changes":          rollupChanges,
+	"delta":            rollupDelta,
+	"deriv":            rollupDerivSlow,
+	"deriv_fast":       rollupDerivFast,
+	"idelta":           rollupIdelta,
+	"increase":         rollupIncrease,  // + rollupFuncsRemoveCounterResets
+	"irate":            rollupIderiv,    // + rollupFuncsRemoveCounterResets
+	"rate":             rollupDerivFast, // + rollupFuncsRemoveCounterResets
+	"resets":           rollupResets,
+	"avg_over_time":    rollupAvg,
+	"min_over_time":    rollupMin,
+	"max_over_time":    rollupMax,
+	"sum_over_time":    rollupSum,
+	"count_over_time":  rollupCount,
+	"stddev_over_time": rollupStddev,
+	"stdvar_over_time": rollupStdvar,
+	"absent_over_time": rollupAbsent,
+
+	// Additional rollup funcs.
+	"range_over_time":     rollupRange,
+	"sum2_over_time":      rollupSum2,
+	"geomean_over_time":   rollupGeomean,
+	"first_over_time":     rollupFirst,
+	"last_over_time":      rollupLast,
+	"distinct_over_time":  rollupDistinct,
+	"increases_over_time": rollupIncreases,
+	"decreases_over_time": rollupDecreases,
+	"integrate":           rollupIntegrate,
+	"ideriv":              rollupIderiv,
+	"lifetime":            rollupLifetime,
+	"lag":                 rollupLag,
+	"scrape_interval":     rollupScrapeInterval,
+	"tmin_over_time":      rollupTmin,
+	"tmax_over_time":      rollupTmax,
+}
+
+var rollupFuncsCannotAdjustWindow = map[string]bool{
+	"changes":             true,
+	"delta":               true,
+	"holt_winters":        true,
+	"idelta":              true,
+	"increase":            true,
+	"predict_linear":      true,
+	"resets":              true,
+	"sum_over_time":       true,
+	"count_over_time":     true,
+	"quantile_over_time":  true,
+	"stddev_over_time":    true,
+	"stdvar_over_time":    true,
+	"absent_over_time":    true,
+	"sum2_over_time":      true,
+	"geomean_over_time":   true,
+	"distinct_over_time":  true,
+	"increases_over_time": true,
+	"decreases_over_time": true,
+	"integrate":           true,
 }

 var rollupFuncsRemoveCounterResets = map[string]bool{
@@ -78,13 +139,64 @@ var rollupFuncsRemoveCounterResets = map[string]bool{
 }

 var rollupFuncsKeepMetricGroup = map[string]bool{
-	"default_rollup":     true,
-	"avg_over_time":      true,
-	"min_over_time":      true,
-	"max_over_time":      true,
-	"quantile_over_time": true,
-	"rollup":             true,
-	"geomean_over_time":  true,
+	"default_rollup":        true,
+	"avg_over_time":         true,
+	"min_over_time":         true,
+	"max_over_time":         true,
+	"quantile_over_time":    true,
+	"rollup":                true,
+	"geomean_over_time":     true,
+	"hoeffding_bound_lower": true,
+	"hoeffding_bound_upper": true,
+}
+
+func getRollupAggrFuncNames(expr metricsql.Expr) ([]string, error) {
+	afe, ok := expr.(*metricsql.AggrFuncExpr)
+	if ok {
+		// This is for incremental aggregate function case:
+		//
+		//     sum(aggr_over_time(...))
+		//
+		// See aggr_incremental.go for details.
+		expr = afe.Args[0]
+	}
+	fe, ok := expr.(*metricsql.FuncExpr)
+	if !ok {
+		logger.Panicf("BUG: unexpected expression; want metricsql.FuncExpr; got %T; value: %s", expr, expr.AppendString(nil))
+	}
+	if fe.Name != "aggr_over_time" {
+		logger.Panicf("BUG: unexpected function name: %q; want `aggr_over_time`", fe.Name)
+	}
+	if len(fe.Args) != 2 {
+		return nil, fmt.Errorf("unexpected number of args to aggr_over_time(); got %d; want %d", len(fe.Args), 2)
+	}
+	arg := fe.Args[0]
+	var aggrFuncNames []string
+	if se, ok := arg.(*metricsql.StringExpr); ok {
+		aggrFuncNames = append(aggrFuncNames, se.S)
+	} else {
+		fe, ok := arg.(*metricsql.FuncExpr)
+		if !ok || fe.Name != "" {
+			return nil, fmt.Errorf("%s cannot be passed to aggr_over_time(); expecting quoted aggregate function name or a list of quoted aggregate function names",
+				arg.AppendString(nil))
+		}
+		for _, e := range fe.Args {
+			se, ok := e.(*metricsql.StringExpr)
+			if !ok {
+				return nil, fmt.Errorf("%s cannot be passed here; expecting quoted aggregate function name", e.AppendString(nil))
+			}
+			aggrFuncNames = append(aggrFuncNames, se.S)
+		}
+	}
+	if len(aggrFuncNames) == 0 {
+		return nil, fmt.Errorf("aggr_over_time() must contain at least a single aggregate function name")
+	}
+	for _, s := range aggrFuncNames {
+		if rollupAggrFuncs[s] == nil {
+			return nil, fmt.Errorf("%q cannot be used in `aggr_over_time` function; expecting quoted aggregate function name", s)
+		}
+	}
+	return aggrFuncNames, nil
 }

 func getRollupArgIdx(funcName string) int {
@@ -92,10 +204,84 @@ func getRollupArgIdx(funcName string) int {
 	if rollupFuncs[funcName] == nil {
 		logger.Panicf("BUG: getRollupArgIdx is called for non-rollup func %q", funcName)
 	}
-	if funcName == "quantile_over_time" {
+	switch funcName {
+	case "quantile_over_time", "aggr_over_time",
+		"hoeffding_bound_lower", "hoeffding_bound_upper":
 		return 1
+	default:
+		return 0
 	}
-	return 0
+}
+
+func getRollupConfigs(name string, rf rollupFunc, expr metricsql.Expr, start, end, step, window int64, lookbackDelta int64, sharedTimestamps []int64) (
+	func(values []float64, timestamps []int64), []*rollupConfig, error) {
+	preFunc := func(values []float64, timestamps []int64) {}
+	if rollupFuncsRemoveCounterResets[name] {
+		preFunc = func(values []float64, timestamps []int64) {
+			removeCounterResets(values)
+		}
+	}
+	newRollupConfig := func(rf rollupFunc, tagValue string) *rollupConfig {
+		return &rollupConfig{
+			TagValue:        tagValue,
+			Func:            rf,
+			Start:           start,
+			End:             end,
+			Step:            step,
+			Window:          window,
+			MayAdjustWindow: !rollupFuncsCannotAdjustWindow[name],
+			LookbackDelta:   lookbackDelta,
+			Timestamps:      sharedTimestamps,
+		}
+	}
+	appendRollupConfigs := func(dst []*rollupConfig) []*rollupConfig {
+		dst = append(dst, newRollupConfig(rollupMin, "min"))
+		dst = append(dst, newRollupConfig(rollupMax, "max"))
+		dst = append(dst, newRollupConfig(rollupAvg, "avg"))
+		return dst
+	}
+	var rcs []*rollupConfig
+	switch name {
+	case "rollup":
+		rcs = appendRollupConfigs(rcs)
+	case "rollup_rate", "rollup_deriv":
+		preFuncPrev := preFunc
+		preFunc = func(values []float64, timestamps []int64) {
+			preFuncPrev(values, timestamps)
+			derivValues(values, timestamps)
+		}
+		rcs = appendRollupConfigs(rcs)
+	case "rollup_increase", "rollup_delta":
+		preFuncPrev := preFunc
+		preFunc = func(values []float64, timestamps []int64) {
+			preFuncPrev(values, timestamps)
+			deltaValues(values)
+		}
+		rcs = appendRollupConfigs(rcs)
+	case "rollup_candlestick":
+		rcs = append(rcs, newRollupConfig(rollupFirst, "open"))
+		rcs = append(rcs, newRollupConfig(rollupLast, "close"))
+		rcs = append(rcs, newRollupConfig(rollupMin, "low"))
+		rcs = append(rcs, newRollupConfig(rollupMax, "high"))
+	case "aggr_over_time":
+		aggrFuncNames, err := getRollupAggrFuncNames(expr)
+		if err != nil {
+			return nil, nil, fmt.Errorf("invalid args to %s: %s", expr.AppendString(nil), err)
+		}
+		for _, aggrFuncName := range aggrFuncNames {
+			if rollupFuncsRemoveCounterResets[aggrFuncName] {
+				// There is no need to save the previous preFunc, since it is either empty or the same.
+				preFunc = func(values []float64, timestamps []int64) {
+					removeCounterResets(values)
+				}
+			}
+			rf := rollupAggrFuncs[aggrFuncName]
+			rcs = append(rcs, newRollupConfig(rf, aggrFuncName))
+		}
+	default:
+		rcs = append(rcs, newRollupConfig(rf, ""))
+	}
+	return preFunc, rcs, nil
 }

 func getRollupFunc(funcName string) newRollupFunc {
@@ -103,10 +289,6 @@ func getRollupFunc(funcName string) newRollupFunc {
 	return rollupFuncs[funcName]
 }

-func isRollupFunc(funcName string) bool {
-	return getRollupFunc(funcName) != nil
-}
-
 type rollupFuncArg struct {
 	prevValue     float64
 	prevTimestamp int64
@@ -116,7 +298,12 @@ type rollupFuncArg struct {
 	currTimestamp int64
 	idx           int
 	step          int64
+
+	// Real previous value even if it is located too far from the current window.
+	// It matches prevValue if prevValue is not nan.
 	realPrevValue float64
+
+	tsm *timeseriesMap
 }

 func (rfa *rollupFuncArg) reset() {
@@ -128,6 +315,7 @@ func (rfa *rollupFuncArg) reset() {
 	rfa.idx = 0
 	rfa.step = 0
 	rfa.realPrevValue = nan
+	rfa.tsm = nil
 }

 // rollupFunc must return rollup value for the given rfa.
@@ -166,6 +354,54 @@ var (
 // The maximum interval without previous rows.
 const maxSilenceInterval = 5 * 60 * 1000

+type timeseriesMap struct {
+	origin    *timeseries
+	labelName string
+	h         metrics.Histogram
+	m         map[string]*timeseries
+}
+
+func newTimeseriesMap(funcName string, sharedTimestamps []int64, mnSrc *storage.MetricName) *timeseriesMap {
+	if funcName != "histogram_over_time" {
+		return nil
+	}
+
+	values := make([]float64, len(sharedTimestamps))
+	for i := range values {
+		values[i] = nan
+	}
+	var origin timeseries
+	origin.MetricName.CopyFrom(mnSrc)
+	origin.MetricName.ResetMetricGroup()
+	origin.Timestamps = sharedTimestamps
+	origin.Values = values
+	return &timeseriesMap{
+		origin:    &origin,
+		labelName: "vmrange",
+		m:         make(map[string]*timeseries),
+	}
+}
+
+func (tsm *timeseriesMap) AppendTimeseriesTo(dst []*timeseries) []*timeseries {
+	for _, ts := range tsm.m {
+		dst = append(dst, ts)
+	}
+	return dst
+}
+
+func (tsm *timeseriesMap) GetOrCreateTimeseries(labelValue string) *timeseries {
+	ts := tsm.m[labelValue]
+	if ts != nil {
+		return ts
+	}
+	ts = &timeseries{}
+	ts.CopyFromShallowTimestamps(tsm.origin)
+	ts.MetricName.RemoveTag(tsm.labelName)
+	ts.MetricName.AddTag(tsm.labelName, labelValue)
+	tsm.m[labelValue] = ts
+	return ts
+}
+
 // Do calculates rollups for the given timestamps and values, appends
 // them to dstValues and returns results.
 //
@@ -173,8 +409,19 @@ const maxSilenceInterval = 5 * 60 * 1000
 //
 // timestamps must cover time range [rc.Start - rc.Window - maxSilenceInterval ... rc.End + rc.Step].
 //
-// Cannot be called from concurrent goroutines.
+// Do cannot be called from concurrent goroutines.
 func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []int64) []float64 {
+	return rc.doInternal(dstValues, nil, values, timestamps)
+}
+
+// DoTimeseriesMap calculates rollups for the given timestamps and values and puts them to tsm.
+func (rc *rollupConfig) DoTimeseriesMap(tsm *timeseriesMap, values []float64, timestamps []int64) {
+	ts := getTimeseries()
+	ts.Values = rc.doInternal(ts.Values[:0], tsm, values, timestamps)
+	putTimeseries(ts)
+}
+
+func (rc *rollupConfig) doInternal(dstValues []float64, tsm *timeseriesMap, values []float64, timestamps []int64) []float64 {
 	// Sanity checks.
 	if rc.Step <= 0 {
 		logger.Panicf("BUG: Step must be bigger than 0; got %d", rc.Step)
@@ -192,7 +439,8 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
 	// Extend dstValues in order to remove mallocs below.
 	dstValues = decimal.ExtendFloat64sCapacity(dstValues, len(rc.Timestamps))

-	maxPrevInterval := getMaxPrevInterval(timestamps)
+	scrapeInterval := getScrapeInterval(timestamps)
+	maxPrevInterval := getMaxPrevInterval(scrapeInterval)
 	if rc.LookbackDelta > 0 && maxPrevInterval > rc.LookbackDelta {
 		maxPrevInterval = rc.LookbackDelta
 	}
@@ -207,6 +455,7 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
 	rfa.idx = 0
 	rfa.step = rc.Step
 	rfa.realPrevValue = nan
+	rfa.tsm = tsm

 	i := 0
 	j := 0
@@ -296,7 +545,7 @@ func binarySearchInt64(a []int64, v int64) uint {
 	return i
 }

-func getMaxPrevInterval(timestamps []int64) int64 {
+func getScrapeInterval(timestamps []int64) int64 {
 	if len(timestamps) < 2 {
 		return int64(maxSilenceInterval)
 	}
@@ -312,30 +561,34 @@ func getMaxPrevInterval(timestamps []int64) int64 {
 		h.Update(float64(ts - tsPrev))
 		tsPrev = ts
 	}
-	d := int64(h.Quantile(0.6))
+	scrapeInterval := int64(h.Quantile(0.6))
 	histogram.PutFast(h)
-	if d <= 0 {
+	if scrapeInterval <= 0 {
 		return int64(maxSilenceInterval)
 	}
-	// Increase d more for smaller scrape intervals in order to hide possible gaps
+	return scrapeInterval
+}
+
+func getMaxPrevInterval(scrapeInterval int64) int64 {
+	// Increase scrapeInterval more for smaller scrape intervals in order to hide possible gaps
 	// when high jitter is present.
 	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/139 .
-	if d <= 2*1000 {
-		return d + 4*d
+	if scrapeInterval <= 2*1000 {
+		return scrapeInterval + 4*scrapeInterval
 	}
-	if d <= 4*1000 {
-		return d + 2*d
+	if scrapeInterval <= 4*1000 {
+		return scrapeInterval + 2*scrapeInterval
 	}
-	if d <= 8*1000 {
-		return d + d
+	if scrapeInterval <= 8*1000 {
+		return scrapeInterval + scrapeInterval
 	}
-	if d <= 16*1000 {
-		return d + d/2
+	if scrapeInterval <= 16*1000 {
+		return scrapeInterval + scrapeInterval/2
 	}
-	if d <= 32*1000 {
-		return d + d/4
+	if scrapeInterval <= 32*1000 {
+		return scrapeInterval + scrapeInterval/4
 	}
-	return d + d/8
+	return scrapeInterval + scrapeInterval/8
 }

 func removeCounterResets(values []float64) {
@@ -414,6 +667,15 @@ func newRollupFuncOneArg(rf rollupFunc) newRollupFunc {
 	}
 }

+func newRollupFuncTwoArgs(rf rollupFunc) newRollupFunc {
+	return func(args []interface{}) (rollupFunc, error) {
+		if err := expectRollupArgsNum(args, 2); err != nil {
+			return nil, err
+		}
+		return rf, nil
+	}
+}
+
 func newRollupHoltWinters(args []interface{}) (rollupFunc, error) {
 	if err := expectRollupArgsNum(args, 3); err != nil {
 		return nil, err
@@ -522,6 +784,116 @@ func linearRegression(rfa *rollupFuncArg) (float64, float64) {
 	return v, k
 }

+func newRollupShareLE(args []interface{}) (rollupFunc, error) {
+	return newRollupShareFilter(args, countFilterLE)
+}
+
+func countFilterLE(values []float64, le float64) int {
+	n := 0
+	for _, v := range values {
+		if v <= le {
+			n++
+		}
+	}
+	return n
+}
+
+func newRollupShareGT(args []interface{}) (rollupFunc, error) {
+	return newRollupShareFilter(args, countFilterGT)
+}
+
+func countFilterGT(values []float64, gt float64) int {
+	n := 0
+	for _, v := range values {
+		if v > gt {
+			n++
+		}
+	}
+	return n
+}
+
+func newRollupShareFilter(args []interface{}, countFilter func(values []float64, limit float64) int) (rollupFunc, error) {
+	if err := expectRollupArgsNum(args, 2); err != nil {
+		return nil, err
+	}
+	limits, err := getScalar(args[1], 1)
+	if err != nil {
+		return nil, err
+	}
+	rf := func(rfa *rollupFuncArg) float64 {
+		// There is no need in handling NaNs here, since they must be cleaned up
+		// before calling rollup funcs.
+		values := rfa.values
+		if len(values) == 0 {
+			return nan
+		}
+		limit := limits[rfa.idx]
+		n := countFilter(values, limit)
+		return float64(n) / float64(len(values))
+	}
+	return rf, nil
+}
+
+func newRollupHoeffdingBoundLower(args []interface{}) (rollupFunc, error) {
+	if err := expectRollupArgsNum(args, 2); err != nil {
+		return nil, err
+	}
+	phis, err := getScalar(args[0], 0)
+	if err != nil {
+		return nil, err
+	}
+	rf := func(rfa *rollupFuncArg) float64 {
+		bound, avg := rollupHoeffdingBoundInternal(rfa, phis)
+		return avg - bound
+	}
+	return rf, nil
+}
+
+func newRollupHoeffdingBoundUpper(args []interface{}) (rollupFunc, error) {
+	if err := expectRollupArgsNum(args, 2); err != nil {
+		return nil, err
+	}
+	phis, err := getScalar(args[0], 0)
+	if err != nil {
+		return nil, err
+	}
+	rf := func(rfa *rollupFuncArg) float64 {
+		bound, avg := rollupHoeffdingBoundInternal(rfa, phis)
+		return avg + bound
+	}
+	return rf, nil
+}
+
+func rollupHoeffdingBoundInternal(rfa *rollupFuncArg, phis []float64) (float64, float64) {
+	// There is no need in handling NaNs here, since they must be cleaned up
+	// before calling rollup funcs.
+	values := rfa.values
+	if len(values) == 0 {
+		return nan, nan
+	}
+	if len(values) == 1 {
+		return 0, values[0]
+	}
+	vMax := rollupMax(rfa)
+	vMin := rollupMin(rfa)
+	vAvg := rollupAvg(rfa)
+	vRange := vMax - vMin
+	if vRange <= 0 {
+		return 0, vAvg
+	}
+	phi := phis[rfa.idx]
+	if phi >= 1 {
+		return inf, vAvg
+	}
+	if phi <= 0 {
+		return 0, vAvg
+	}
+	// See https://en.wikipedia.org/wiki/Hoeffding%27s_inequality
+	// and https://www.youtube.com/watch?v=6UwcqiNsZ8U&feature=youtu.be&t=1237
+	bound := vRange * math.Sqrt(math.Log(1/(1-phi))/(2*float64(len(values))))
+	return bound, vAvg
+}
+
 func newRollupQuantile(args []interface{}) (rollupFunc, error) {
 	if err := expectRollupArgsNum(args, 2); err != nil {
 		return nil, err
@@ -553,6 +925,21 @@ func newRollupQuantile(args []interface{}) (rollupFunc, error) {
 	return rf, nil
 }

+func rollupHistogram(rfa *rollupFuncArg) float64 {
+	values := rfa.values
+	tsm := rfa.tsm
+	tsm.h.Reset()
+	for _, v := range values {
+		tsm.h.Update(v)
+	}
+	idx := rfa.idx
+	tsm.h.VisitNonZeroBuckets(func(vmrange string, count uint64) {
+		ts := tsm.GetOrCreateTimeseries(vmrange)
+		ts.Values[idx] = float64(count)
+	})
+	return nan
+}
+
 func rollupAvg(rfa *rollupFuncArg) float64 {
 	// Do not use `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation,
 	// since it is slower and has no significant benefits in precision.
@@ -561,7 +948,10 @@ func rollupAvg(rfa *rollupFuncArg) float64 {
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return rfa.prevValue
+		// Do not take into account rfa.prevValue, since it may lead
+		// to inconsistent results comparing to Prometheus on broken time series
+		// with irregular data points.
+		return nan
 	}
 	var sum float64
 	for _, v := range values {
@@ -573,14 +963,14 @@ func rollupAvg(rfa *rollupFuncArg) float64 {
 func rollupMin(rfa *rollupFuncArg) float64 {
 	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
-	minValue := rfa.prevValue
 	values := rfa.values
-	if math.IsNaN(minValue) {
-		if len(values) == 0 {
-			return nan
-		}
-		minValue = values[0]
+	if len(values) == 0 {
+		// Do not take into account rfa.prevValue, since it may lead
+		// to inconsistent results comparing to Prometheus on broken time series
+		// with irregular data points.
+		return nan
 	}
+	minValue := values[0]
 	for _, v := range values {
 		if v < minValue {
 			minValue = v
@@ -592,14 +982,14 @@ func rollupMin(rfa *rollupFuncArg) float64 {
 func rollupMax(rfa *rollupFuncArg) float64 {
 	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
-	maxValue := rfa.prevValue
 	values := rfa.values
-	if math.IsNaN(maxValue) {
-		if len(values) == 0 {
-			return nan
-		}
-		maxValue = values[0]
+	if len(values) == 0 {
+		// Do not take into account rfa.prevValue, since it may lead
+		// to inconsistent results comparing to Prometheus on broken time series
+		// with irregular data points.
+		return nan
 	}
+	maxValue := values[0]
 	for _, v := range values {
 		if v > maxValue {
 			maxValue = v
@@ -608,6 +998,44 @@ func rollupMax(rfa *rollupFuncArg) float64 {
 	return maxValue
 }

+func rollupTmin(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
+	// before calling rollup funcs.
+	values := rfa.values
+	timestamps := rfa.timestamps
+	if len(values) == 0 {
+		return nan
+	}
+	minValue := values[0]
+	minTimestamp := timestamps[0]
+	for i, v := range values {
+		if v < minValue {
+			minValue = v
+			minTimestamp = timestamps[i]
+		}
+	}
+	return float64(minTimestamp) * 1e-3
+}
+
+func rollupTmax(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
+	// before calling rollup funcs.
+	values := rfa.values
+	timestamps := rfa.timestamps
+	if len(values) == 0 {
+		return nan
+	}
+	maxValue := values[0]
+	maxTimestamp := timestamps[0]
+	for i, v := range values {
+		if v > maxValue {
+			maxValue = v
+			maxTimestamp = timestamps[i]
+		}
+	}
+	return float64(maxTimestamp) * 1e-3
+}
+
 func rollupSum(rfa *rollupFuncArg) float64 {
 	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
@@ -625,6 +1053,12 @@ func rollupSum(rfa *rollupFuncArg) float64 {
 	return sum
 }

+func rollupRange(rfa *rollupFuncArg) float64 {
+	max := rollupMax(rfa)
+	min := rollupMin(rfa)
+	return max - min
+}
+
 func rollupSum2(rfa *rollupFuncArg) float64 {
 	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
@@ -653,6 +1087,13 @@ func rollupGeomean(rfa *rollupFuncArg) float64 {
 	return math.Pow(p, 1/float64(len(values)))
 }

+func rollupAbsent(rfa *rollupFuncArg) float64 {
+	if len(rfa.values) == 0 {
+		return 1
+	}
+	return nan
+}
+
 func rollupCount(rfa *rollupFuncArg) float64 {
 	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
@@ -716,16 +1157,19 @@ func rollupDeltaInternal(rfa *rollupFuncArg, canUseRealPrevValue bool) float64 {
 		if len(values) == 0 {
 			return nan
 		}
-		if len(values) == 1 {
+		// Assume that the previous non-existing value was 0
+		// only if the first value is quite small.
+		// This should prevent from improper increase() results for os-level counters
+		// such as cpu time or bytes sent over the network interface.
+		// These counters may start long ago before the first value appears in the db.
+		if values[0] < 1e6 {
+			prevValue = 0
 			if canUseRealPrevValue && !math.IsNaN(rfa.realPrevValue) {
-				// Fix against removeCounterResets.
-				return values[0] - rfa.realPrevValue
+				prevValue = rfa.realPrevValue
 			}
-			// Assume that the previous non-existing value was 0.
-			return values[0]
+		} else {
+			prevValue = values[0]
 		}
-		prevValue = values[0]
-		values = values[1:]
 	}
 	if len(values) == 0 {
 		// Assume that the value didn't change on the given interval.
@@ -773,16 +1217,25 @@ func rollupDerivFast(rfa *rollupFuncArg) float64 {
 	prevValue := rfa.prevValue
 	prevTimestamp := rfa.prevTimestamp
 	if math.IsNaN(prevValue) {
-		if len(values) < 2 {
-			// It is impossible to calculate derivative on 0 or 1 values.
+		if len(values) == 0 {
+			return nan
+		}
+		if len(values) == 1 {
+			// It is impossible to determine the duration during which the value changed
+			// from 0 to the current value.
+			// The following attempts didn't work well:
+			// - using scrape interval as the duration. It fails on Prometheus restarts when it
+			//   skips scraping for the counter. This results in too high rate() value for the first point
+			//   after Prometheus restarts.
+			// - using window or step as the duration. It results in too small rate() values for the first
+			//   points of time series.
+			//
+			// So just return nan
 			return nan
 		}
 		prevValue = values[0]
 		prevTimestamp = timestamps[0]
-		values = values[1:]
-		timestamps = timestamps[1:]
-	}
-	if len(values) == 0 {
+	} else if len(values) == 0 {
 		// Assume that the value didn't change on the given interval.
 		return 0
 	}
@@ -799,8 +1252,20 @@ func rollupIderiv(rfa *rollupFuncArg) float64 {
 	values := rfa.values
 	timestamps := rfa.timestamps
 	if len(values) < 2 {
-		if len(values) == 0 || math.IsNaN(rfa.prevValue) {
-			// It is impossible to calculate derivative on 0 or 1 values.
+		if len(values) == 0 {
+			return nan
+		}
+		if math.IsNaN(rfa.prevValue) {
+			// It is impossible to determine the duration during which the value changed
+			// from 0 to the current value.
+			// The following attempts didn't work well:
+			// - using scrape interval as the duration. It fails on Prometheus restarts when it
+			//   skips scraping for the counter. This results in too high rate() value for the first point
+			//   after Prometheus restarts.
+			// - using window or step as the duration. It results in too small rate() values for the first
+			//   points of time series.
+			//
+			// So just return nan
 			return nan
 		}
 		return (values[0] - rfa.prevValue) / (float64(timestamps[0]-rfa.prevTimestamp) * 1e-3)
@@ -955,16 +1420,13 @@ func rollupResets(rfa *rollupFuncArg) float64 {
 }

 func rollupFirst(rfa *rollupFuncArg) float64 {
-	// See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness
-	v := rfa.prevValue
-	if !math.IsNaN(v) {
-		return v
-	}
-
 	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
+		// Do not take into account rfa.prevValue, since it may lead
+		// to inconsistent results comparing to Prometheus on broken time series
+		// with irregular data points.
 		return nan
 	}
 	return values[0]
@@ -977,7 +1439,10 @@ func rollupLast(rfa *rollupFuncArg) float64 {
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return rfa.prevValue
+		// Do not take into account rfa.prevValue, since it may lead
+		// to inconsistent results comparing to Prometheus on broken time series
+		// with irregular data points.
+		return nan
 	}
 	return values[len(values)-1]
 }
--- a/app/vmselect/promql/rollup_result_cache.go
+++ b/app/vmselect/promql/rollup_result_cache.go
@@ -12,6 +12,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
 	"github.com/VictoriaMetrics/fastcache"
 	"github.com/VictoriaMetrics/metrics"
@@ -73,8 +74,8 @@ func InitRollupResultCache(cachePath string) {
 		return stats
 	}
 	if len(rollupResultCachePath) > 0 {
-		logger.Infof("loaded rollupResult cache from %q in %s; entriesCount: %d, sizeBytes: %d",
-			rollupResultCachePath, time.Since(startTime), fcs().EntriesCount, fcs().BytesSize)
+		logger.Infof("loaded rollupResult cache from %q in %.3f seconds; entriesCount: %d, sizeBytes: %d",
+			rollupResultCachePath, time.Since(startTime).Seconds(), fcs().EntriesCount, fcs().BytesSize)
 	}

 	metrics.NewGauge(`vm_cache_entries{type="promql/rollupResult"}`, func() float64 {
@@ -112,8 +113,8 @@ func StopRollupResultCache() {
 	rollupResultCacheV.c.UpdateStats(&fcs)
 	rollupResultCacheV.c.Stop()
 	rollupResultCacheV.c = nil
-	logger.Infof("saved rollupResult cache to %q in %s; entriesCount: %d, sizeBytes: %d",
-		rollupResultCachePath, time.Since(startTime), fcs.EntriesCount, fcs.BytesSize)
+	logger.Infof("saved rollupResult cache to %q in %.3f seconds; entriesCount: %d, sizeBytes: %d",
+		rollupResultCachePath, time.Since(startTime).Seconds(), fcs.EntriesCount, fcs.BytesSize)
 }

 type rollupResultCache struct {
@@ -128,7 +129,7 @@ func ResetRollupResultCache() {
 	rollupResultCacheV.c.Reset()
 }

-func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExpr, iafc *incrementalAggrFuncContext, window int64) (tss []*timeseries, newStart int64) {
+func (rrc *rollupResultCache) Get(ec *EvalConfig, expr metricsql.Expr, window int64) (tss []*timeseries, newStart int64) {
 	if *disableCache || !ec.mayCache() {
 		return nil, ec.Start
 	}
@@ -137,7 +138,7 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
 	bb := bbPool.Get()
 	defer bbPool.Put(bb)

-	bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, iafc, window, ec.Step)
+	bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step)
 	metainfoBuf := rrc.c.Get(nil, bb.B)
 	if len(metainfoBuf) == 0 {
 		return nil, ec.Start
@@ -157,7 +158,7 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
 	if len(compressedResultBuf.B) == 0 {
 		mi.RemoveKey(key)
 		metainfoBuf = mi.Marshal(metainfoBuf[:0])
-		bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, iafc, window, ec.Step)
+		bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step)
 		rrc.c.Set(bb.B, metainfoBuf)
 		return nil, ec.Start
 	}
@@ -209,7 +210,7 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp

 var resultBufPool bytesutil.ByteBufferPool

-func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExpr, iafc *incrementalAggrFuncContext, window int64, tss []*timeseries) {
+func (rrc *rollupResultCache) Put(ec *EvalConfig, expr metricsql.Expr, window int64, tss []*timeseries) {
 	if *disableCache || len(tss) == 0 || !ec.mayCache() {
 		return
 	}
@@ -260,7 +261,7 @@ func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExp
 	bb.B = key.Marshal(bb.B[:0])
 	rrc.c.SetBig(bb.B, compressedResultBuf.B)

-	bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, iafc, window, ec.Step)
+	bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step)
 	metainfoBuf := rrc.c.Get(nil, bb.B)
 	var mi rollupResultCacheMetainfo
 	if len(metainfoBuf) > 0 {
@@ -288,23 +289,13 @@ var (
 var tooBigRollupResults = metrics.NewCounter("vm_too_big_rollup_results_total")

 // Increment this value every time the format of the cache changes.
-const rollupResultCacheVersion = 6
+const rollupResultCacheVersion = 7

-func marshalRollupResultCacheKey(dst []byte, funcName string, me *metricExpr, iafc *incrementalAggrFuncContext, window, step int64) []byte {
+func marshalRollupResultCacheKey(dst []byte, expr metricsql.Expr, window, step int64) []byte {
 	dst = append(dst, rollupResultCacheVersion)
-	if iafc == nil {
-		dst = append(dst, 0)
-	} else {
-		dst = append(dst, 1)
-		dst = iafc.ae.AppendString(dst)
-	}
-	dst = encoding.MarshalUint64(dst, uint64(len(funcName)))
-	dst = append(dst, funcName...)
 	dst = encoding.MarshalInt64(dst, window)
 	dst = encoding.MarshalInt64(dst, step)
-	for i := range me.TagFilters {
-		dst = me.TagFilters[i].Marshal(dst)
-	}
+	dst = expr.AppendString(dst)
 	return dst
 }

--- a/app/vmselect/promql/rollup_result_cache_test.go
+++ b/app/vmselect/promql/rollup_result_cache_test.go
@@ -3,12 +3,12 @@ package promql
 import (
 	"testing"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 )

 func TestRollupResultCache(t *testing.T) {
 	ResetRollupResultCache()
-	funcName := "foo"
 	window := int64(456)
 	ec := &EvalConfig{
 		Start: 1000,
@@ -17,21 +17,24 @@ func TestRollupResultCache(t *testing.T) {

 		MayCache: true,
 	}
-	me := &metricExpr{
-		TagFilters: []storage.TagFilter{{
-			Key:   []byte("aaa"),
-			Value: []byte("xxx"),
+	me := &metricsql.MetricExpr{
+		LabelFilters: []metricsql.LabelFilter{{
+			Label: "aaa",
+			Value: "xxx",
 		}},
 	}
-	iafc := &incrementalAggrFuncContext{
-		ae: &aggrFuncExpr{
-			Name: "foobar",
-		},
+	fe := &metricsql.FuncExpr{
+		Name: "foo",
+		Args: []metricsql.Expr{me},
+	}
+	ae := &metricsql.AggrFuncExpr{
+		Name: "foobar",
+		Args: []metricsql.Expr{fe},
 	}

 	// Try obtaining an empty value.
 	t.Run("empty", func(t *testing.T) {
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
+		tss, newStart := rollupResultCacheV.Get(ec, fe, window)
 		if newStart != ec.Start {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, ec.Start)
 		}
@@ -41,7 +44,7 @@ func TestRollupResultCache(t *testing.T) {
 	})

 	// Store timeseries overlapping with start
-	t.Run("start-overlap-no-iafc", func(t *testing.T) {
+	t.Run("start-overlap-no-ae", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
 			{
@@ -49,8 +52,8 @@ func TestRollupResultCache(t *testing.T) {
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
+		rollupResultCacheV.Put(ec, fe, window, tss)
+		tss, newStart := rollupResultCacheV.Get(ec, fe, window)
 		if newStart != 1400 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
 		}
@@ -62,7 +65,7 @@ func TestRollupResultCache(t *testing.T) {
 		}
 		testTimeseriesEqual(t, tss, tssExpected)
 	})
-	t.Run("start-overlap-with-iafc", func(t *testing.T) {
+	t.Run("start-overlap-with-ae", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
 			{
@@ -70,8 +73,8 @@ func TestRollupResultCache(t *testing.T) {
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, iafc, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, iafc, window)
+		rollupResultCacheV.Put(ec, ae, window, tss)
+		tss, newStart := rollupResultCacheV.Get(ec, ae, window)
 		if newStart != 1400 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
 		}
@@ -93,8 +96,8 @@ func TestRollupResultCache(t *testing.T) {
 				Values:     []float64{333, 0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
+		rollupResultCacheV.Put(ec, fe, window, tss)
+		tss, newStart := rollupResultCacheV.Get(ec, fe, window)
 		if newStart != 1000 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
 		}
@@ -112,8 +115,8 @@ func TestRollupResultCache(t *testing.T) {
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
+		rollupResultCacheV.Put(ec, fe, window, tss)
+		tss, newStart := rollupResultCacheV.Get(ec, fe, window)
 		if newStart != 1000 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
 		}
@@ -131,8 +134,8 @@ func TestRollupResultCache(t *testing.T) {
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
+		rollupResultCacheV.Put(ec, fe, window, tss)
+		tss, newStart := rollupResultCacheV.Get(ec, fe, window)
 		if newStart != 1000 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
 		}
@@ -150,8 +153,8 @@ func TestRollupResultCache(t *testing.T) {
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
+		rollupResultCacheV.Put(ec, fe, window, tss)
+		tss, newStart := rollupResultCacheV.Get(ec, fe, window)
 		if newStart != 1000 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
 		}
@@ -169,8 +172,8 @@ func TestRollupResultCache(t *testing.T) {
 				Values:     []float64{0, 1, 2, 3, 4, 5, 6, 7},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
+		rollupResultCacheV.Put(ec, fe, window, tss)
+		tss, newStart := rollupResultCacheV.Get(ec, fe, window)
 		if newStart != 2200 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
 		}
@@ -192,8 +195,8 @@ func TestRollupResultCache(t *testing.T) {
 				Values:     []float64{1, 2, 3, 4, 5, 6},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
+		rollupResultCacheV.Put(ec, fe, window, tss)
+		tss, newStart := rollupResultCacheV.Get(ec, fe, window)
 		if newStart != 2200 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
 		}
@@ -217,8 +220,8 @@ func TestRollupResultCache(t *testing.T) {
 			}
 			tss = append(tss, ts)
 		}
-		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
-		tssResult, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
+		rollupResultCacheV.Put(ec, fe, window, tss)
+		tssResult, newStart := rollupResultCacheV.Get(ec, fe, window)
 		if newStart != 2200 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
 		}
@@ -246,10 +249,10 @@ func TestRollupResultCache(t *testing.T) {
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss1)
-		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss2)
-		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss3)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
+		rollupResultCacheV.Put(ec, fe, window, tss1)
+		rollupResultCacheV.Put(ec, fe, window, tss2)
+		rollupResultCacheV.Put(ec, fe, window, tss3)
+		tss, newStart := rollupResultCacheV.Get(ec, fe, window)
 		if newStart != 1400 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
 		}
--- a/app/vmselect/promql/rollup_test.go
+++ b/app/vmselect/promql/rollup_test.go
@@ -3,6 +3,8 @@ package promql
 import (
 	"math"
 	"testing"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 )

 var (
@@ -57,7 +59,7 @@ func TestRollupIderivDuplicateTimestamps(t *testing.T) {
 	}
 	n = rollupIderiv(rfa)
 	if n != 500 {
-		t.Fatalf("unexpected value; got %v; want %v", n, 0.5)
+		t.Fatalf("unexpected value; got %v; want %v", n, 500)
 	}

 	rfa = &rollupFuncArg{
@@ -157,7 +159,7 @@ func TestDerivValues(t *testing.T) {
 	testRowsEqual(t, values, timestamps, valuesExpected, timestamps)
 }

-func testRollupFunc(t *testing.T, funcName string, args []interface{}, meExpected *metricExpr, vExpected float64) {
+func testRollupFunc(t *testing.T, funcName string, args []interface{}, meExpected *metricsql.MetricExpr, vExpected float64) {
 	t.Helper()
 	nrf := getRollupFunc(funcName)
 	if nrf == nil {
@@ -190,6 +192,52 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, meExpecte
 	}
 }

+func TestRollupShareLEOverTime(t *testing.T) {
+	f := func(le, vExpected float64) {
+		t.Helper()
+		les := []*timeseries{{
+			Values:     []float64{le},
+			Timestamps: []int64{123},
+		}}
+		var me metricsql.MetricExpr
+		args := []interface{}{&metricsql.RollupExpr{Expr: &me}, les}
+		testRollupFunc(t, "share_le_over_time", args, &me, vExpected)
+	}
+
+	f(-123, 0)
+	f(0, 0)
+	f(10, 0)
+	f(12, 0.08333333333333333)
+	f(30, 0.16666666666666666)
+	f(50, 0.75)
+	f(100, 0.9166666666666666)
+	f(123, 1)
+	f(1000, 1)
+}
+
+func TestRollupShareGTOverTime(t *testing.T) {
+	f := func(gt, vExpected float64) {
+		t.Helper()
+		gts := []*timeseries{{
+			Values:     []float64{gt},
+			Timestamps: []int64{123},
+		}}
+		var me metricsql.MetricExpr
+		args := []interface{}{&metricsql.RollupExpr{Expr: &me}, gts}
+		testRollupFunc(t, "share_gt_over_time", args, &me, vExpected)
+	}
+
+	f(-123, 1)
+	f(0, 1)
+	f(10, 1)
+	f(12, 0.9166666666666666)
+	f(30, 0.8333333333333334)
+	f(50, 0.25)
+	f(100, 0.08333333333333333)
+	f(123, 0)
+	f(1000, 0)
+}
+
 func TestRollupQuantileOverTime(t *testing.T) {
 	f := func(phi, vExpected float64) {
 		t.Helper()
@@ -197,8 +245,8 @@ func TestRollupQuantileOverTime(t *testing.T) {
 			Values:     []float64{phi},
 			Timestamps: []int64{123},
 		}}
-		var me metricExpr
-		args := []interface{}{phis, &rollupExpr{Expr: &me}}
+		var me metricsql.MetricExpr
+		args := []interface{}{phis, &metricsql.RollupExpr{Expr: &me}}
 		testRollupFunc(t, "quantile_over_time", args, &me, vExpected)
 	}

@@ -219,8 +267,8 @@ func TestRollupPredictLinear(t *testing.T) {
 			Values:     []float64{sec},
 			Timestamps: []int64{123},
 		}}
-		var me metricExpr
-		args := []interface{}{&rollupExpr{Expr: &me}, secs}
+		var me metricsql.MetricExpr
+		args := []interface{}{&metricsql.RollupExpr{Expr: &me}, secs}
 		testRollupFunc(t, "predict_linear", args, &me, vExpected)
 	}

@@ -241,8 +289,8 @@ func TestRollupHoltWinters(t *testing.T) {
 			Values:     []float64{tf},
 			Timestamps: []int64{123},
 		}}
-		var me metricExpr
-		args := []interface{}{&rollupExpr{Expr: &me}, sfs, tfs}
+		var me metricsql.MetricExpr
+		args := []interface{}{&metricsql.RollupExpr{Expr: &me}, sfs, tfs}
 		testRollupFunc(t, "holt_winters", args, &me, vExpected)
 	}

@@ -262,27 +310,72 @@ func TestRollupHoltWinters(t *testing.T) {
 	f(0.9, 0.9, 33.99637566941818)
 }

+func TestRollupHoeffdingBoundLower(t *testing.T) {
+	f := func(phi, vExpected float64) {
+		t.Helper()
+		phis := []*timeseries{{
+			Values:     []float64{phi},
+			Timestamps: []int64{123},
+		}}
+		var me metricsql.MetricExpr
+		args := []interface{}{phis, &metricsql.RollupExpr{Expr: &me}}
+		testRollupFunc(t, "hoeffding_bound_lower", args, &me, vExpected)
+	}
+
+	f(0.5, 28.21949401521037)
+	f(-1, 47.083333333333336)
+	f(0, 47.083333333333336)
+	f(1, -inf)
+	f(2, -inf)
+	f(0.1, 39.72878000047643)
+	f(0.9, 12.701803086472331)
+}
+
+func TestRollupHoeffdingBoundUpper(t *testing.T) {
+	f := func(phi, vExpected float64) {
+		t.Helper()
+		phis := []*timeseries{{
+			Values:     []float64{phi},
+			Timestamps: []int64{123},
+		}}
+		var me metricsql.MetricExpr
+		args := []interface{}{phis, &metricsql.RollupExpr{Expr: &me}}
+		testRollupFunc(t, "hoeffding_bound_upper", args, &me, vExpected)
+	}
+
+	f(0.5, 65.9471726514563)
+	f(-1, 47.083333333333336)
+	f(0, 47.083333333333336)
+	f(1, inf)
+	f(2, inf)
+	f(0.1, 54.43788666619024)
+	f(0.9, 81.46486358019433)
+}
+
 func TestRollupNewRollupFuncSuccess(t *testing.T) {
 	f := func(funcName string, vExpected float64) {
 		t.Helper()
-		var me metricExpr
-		args := []interface{}{&rollupExpr{Expr: &me}}
+		var me metricsql.MetricExpr
+		args := []interface{}{&metricsql.RollupExpr{Expr: &me}}
 		testRollupFunc(t, funcName, args, &me, vExpected)
 	}

 	f("default_rollup", 34)
 	f("changes", 11)
-	f("delta", -89)
+	f("delta", 34)
 	f("deriv", -266.85860231406065)
 	f("deriv_fast", -712)
 	f("idelta", 0)
-	f("increase", 275)
+	f("increase", 398)
 	f("irate", 0)
 	f("rate", 2200)
 	f("resets", 5)
+	f("range_over_time", 111)
 	f("avg_over_time", 47.083333333333336)
 	f("min_over_time", 12)
 	f("max_over_time", 123)
+	f("tmin_over_time", 0.08)
+	f("tmax_over_time", 0.005)
 	f("sum_over_time", 565)
 	f("sum2_over_time", 37951)
 	f("geomean_over_time", 39.33466603189148)
@@ -327,7 +420,7 @@ func TestRollupNewRollupFuncError(t *testing.T) {
 		Values:     []float64{321},
 		Timestamps: []int64{123},
 	}}
-	me := &metricExpr{}
+	me := &metricsql.MetricExpr{}
 	f("holt_winters", []interface{}{123, 123, 321})
 	f("holt_winters", []interface{}{me, 123, 321})
 	f("holt_winters", []interface{}{me, scalarTs, 321})
@@ -409,7 +502,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{nan, 123, 123, 123, 34, 34}
+		valuesExpected := []float64{nan, 123, nan, 34, nan, 44}
 		timestampsExpected := []int64{0, 5, 10, 15, 20, 25}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -423,7 +516,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{12, 44, 34, nan}
+		valuesExpected := []float64{44, 32, 34, nan}
 		timestampsExpected := []int64{100, 120, 140, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -437,7 +530,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{nan, nan, 123, 54, 44}
+		valuesExpected := []float64{nan, nan, 123, 34, 32}
 		timestampsExpected := []int64{-50, 0, 50, 100, 150}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -499,7 +592,7 @@ func TestRollupFuncsLookbackDelta(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{99, 12, 44, nan, 32, 34, nan}
+		valuesExpected := []float64{99, nan, 44, nan, 32, 34, nan}
 		timestampsExpected := []int64{80, 90, 100, 110, 120, 130, 140}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -513,7 +606,7 @@ func TestRollupFuncsLookbackDelta(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{99, 12, 44, 44, 32, 34, nan}
+		valuesExpected := []float64{99, nan, 44, nan, 32, 34, nan}
 		timestampsExpected := []int64{80, 90, 100, 110, 120, 130, 140}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -527,7 +620,7 @@ func TestRollupFuncsLookbackDelta(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{34, 12, 12, 44, 44, 34, nan}
+		valuesExpected := []float64{99, nan, 44, nan, 32, 34, nan}
 		timestampsExpected := []int64{80, 90, 100, 110, 120, 130, 140}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -544,7 +637,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{nan, 123, 21, 12, 34}
+		valuesExpected := []float64{nan, 123, 54, 44, 34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -572,7 +665,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{nan, 21, 12, 12, 34}
+		valuesExpected := []float64{nan, 21, 12, 32, 34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -614,7 +707,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{nan, -102, -9, 22, 0}
+		valuesExpected := []float64{nan, nan, -9, 22, 0}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -772,6 +865,20 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
+	t.Run("deriv_fast", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupDerivFast,
+			Start:  0,
+			End:    20,
+			Step:   4,
+			Window: 0,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, nan, nan, 0, -8900, 0}
+		timestampsExpected := []int64{0, 4, 8, 12, 16, 20}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
 	t.Run("ideriv", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupIderiv,
--- a/app/vmselect/promql/transform.go
+++ b/app/vmselect/promql/transform.go
@@ -12,6 +12,7 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/valyala/histogram"
 )
@@ -64,6 +65,8 @@ var transformFuncs = map[string]transformFunc{
 	"label_move":         transformLabelMove,
 	"label_transform":    transformLabelTransform,
 	"label_value":        transformLabelValue,
+	"label_match":        transformLabelMatch,
+	"label_mismatch":     transformLabelMismatch,
 	"union":              transformUnion,
 	"":                   transformUnion, // empty func is a synonim to union
 	"keep_last_value":    transformKeepLastValue,
@@ -92,6 +95,7 @@ var transformFuncs = map[string]transformFunc{
 	"asin":               newTransformFuncOneArg(transformAsin),
 	"acos":               newTransformFuncOneArg(transformAcos),
 	"prometheus_buckets": transformPrometheusBuckets,
+	"histogram_share":    transformHistogramShare,
 }

 func getTransformFunc(s string) transformFunc {
@@ -99,13 +103,9 @@ func getTransformFunc(s string) transformFunc {
 	return transformFuncs[s]
 }

-func isTransformFunc(s string) bool {
-	return getTransformFunc(s) != nil
-}
-
 type transformFuncArg struct {
 	ec   *EvalConfig
-	fe   *funcExpr
+	fe   *metricsql.FuncExpr
 	args [][]*timeseries
 }

@@ -126,7 +126,7 @@ func newTransformFuncOneArg(tf func(v float64) float64) transformFunc {
 	}
 }

-func doTransformValues(arg []*timeseries, tf func(values []float64), fe *funcExpr) ([]*timeseries, error) {
+func doTransformValues(arg []*timeseries, tf func(values []float64), fe *metricsql.FuncExpr) ([]*timeseries, error) {
 	name := strings.ToLower(fe.Name)
 	keepMetricGroup := transformFuncsKeepMetricGroup[name]
 	for _, ts := range arg {
@@ -149,28 +149,10 @@ func transformAbsent(tfa *transformFuncArg) ([]*timeseries, error) {
 		return nil, err
 	}
 	arg := args[0]
-
 	if len(arg) == 0 {
-		// Copy tags from arg
-		rvs := evalNumber(tfa.ec, 1)
-		rv := rvs[0]
-		me, ok := tfa.fe.Args[0].(*metricExpr)
-		if !ok {
-			return rvs, nil
-		}
-		for i := range me.TagFilters {
-			tf := &me.TagFilters[i]
-			if len(tf.Key) == 0 {
-				continue
-			}
-			if tf.IsRegexp || tf.IsNegative {
-				continue
-			}
-			rv.MetricName.AddTagBytes(tf.Key, tf.Value)
-		}
+		rvs := getAbsentTimeseries(tfa.ec, tfa.fe.Args[0])
 		return rvs, nil
 	}
-
 	for _, ts := range arg {
 		ts.MetricName.ResetMetricGroup()
 		for i, v := range ts.Values {
@@ -185,6 +167,28 @@ func transformAbsent(tfa *transformFuncArg) ([]*timeseries, error) {
 	return arg, nil
 }

+func getAbsentTimeseries(ec *EvalConfig, arg metricsql.Expr) []*timeseries {
+	// Copy tags from arg
+	rvs := evalNumber(ec, 1)
+	rv := rvs[0]
+	me, ok := arg.(*metricsql.MetricExpr)
+	if !ok {
+		return rvs
+	}
+	tfs := toTagFilters(me.LabelFilters)
+	for i := range tfs {
+		tf := &tfs[i]
+		if len(tf.Key) == 0 {
+			continue
+		}
+		if tf.IsRegexp || tf.IsNegative {
+			continue
+		}
+		rv.MetricName.AddTagBytes(tf.Key, tf.Value)
+	}
+	return rvs
+}
+
 func transformCeil(v float64) float64 {
 	return math.Ceil(v)
 }
@@ -359,6 +363,7 @@ func vmrangeBucketsToLE(tss []*timeseries) []*timeseries {
 			ts := xs.ts
 			if isZeroTS(ts) {
 				// Skip time series with zeros. They are substituted by xssNew below.
+				xsPrev = xs
 				continue
 			}
 			if xs.start != xsPrev.end {
@@ -398,25 +403,236 @@ func vmrangeBucketsToLE(tss []*timeseries) []*timeseries {
 	return rvs
 }

-func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
+func transformHistogramShare(tfa *transformFuncArg) ([]*timeseries, error) {
 	args := tfa.args
-	if err := expectTransformArgsNum(args, 2); err != nil {
-		return nil, err
+	if len(args) < 2 || len(args) > 3 {
+		return nil, fmt.Errorf("unexpected number of args; got %d; want 2...3", len(args))
 	}
-	phis, err := getScalar(args[0], 0)
+	les, err := getScalar(args[0], 0)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("cannot parse le: %s", err)
 	}

 	// Convert buckets with `vmrange` labels to buckets with `le` labels.
 	tss := vmrangeBucketsToLE(args[1])

-	// Group metrics by all tags excluding "le"
-	type x struct {
-		le float64
-		ts *timeseries
+	// Parse boundsLabel. See https://github.com/prometheus/prometheus/issues/5706 for details.
+	var boundsLabel string
+	if len(args) > 2 {
+		s, err := getString(args[2], 2)
+		if err != nil {
+			return nil, fmt.Errorf("cannot parse boundsLabel (arg #3): %s", err)
+		}
+		boundsLabel = s
 	}
-	m := make(map[string][]x)
+
+	// Group metrics by all tags excluding "le"
+	m := groupLeTimeseries(tss)
+
+	// Calculate share for les
+
+	share := func(i int, les []float64, xss []leTimeseries) (q, lower, upper float64) {
+		leReq := les[i]
+		if math.IsNaN(leReq) || len(xss) == 0 {
+			return nan, nan, nan
+		}
+		fixBrokenBuckets(i, xss)
+		if leReq < 0 {
+			return 0, 0, 0
+		}
+		if math.IsInf(leReq, 1) {
+			return 1, 1, 1
+		}
+		var vPrev, lePrev float64
+		for _, xs := range xss {
+			v := xs.ts.Values[i]
+			le := xs.le
+			if leReq >= le {
+				vPrev = v
+				lePrev = le
+				continue
+			}
+			// precondition: lePrev <= leReq < le
+			vLast := xss[len(xss)-1].ts.Values[i]
+			lower = vPrev / vLast
+			if math.IsInf(le, 1) {
+				return lower, lower, 1
+			}
+			if lePrev == leReq {
+				return lower, lower, lower
+			}
+			upper = v / vLast
+			q = lower + (v-vPrev)/vLast*(leReq-lePrev)/(le-lePrev)
+			return q, lower, upper
+		}
+		// precondition: leReq > leLast
+		return 1, 1, 1
+	}
+	rvs := make([]*timeseries, 0, len(m))
+	for _, xss := range m {
+		sort.Slice(xss, func(i, j int) bool {
+			return xss[i].le < xss[j].le
+		})
+		dst := xss[0].ts
+		var tsLower, tsUpper *timeseries
+		if len(boundsLabel) > 0 {
+			tsLower = &timeseries{}
+			tsLower.CopyFromShallowTimestamps(dst)
+			tsLower.MetricName.RemoveTag(boundsLabel)
+			tsLower.MetricName.AddTag(boundsLabel, "lower")
+			tsUpper = &timeseries{}
+			tsUpper.CopyFromShallowTimestamps(dst)
+			tsUpper.MetricName.RemoveTag(boundsLabel)
+			tsUpper.MetricName.AddTag(boundsLabel, "upper")
+		}
+		for i := range dst.Values {
+			q, lower, upper := share(i, les, xss)
+			dst.Values[i] = q
+			if len(boundsLabel) > 0 {
+				tsLower.Values[i] = lower
+				tsUpper.Values[i] = upper
+			}
+		}
+		rvs = append(rvs, dst)
+		if len(boundsLabel) > 0 {
+			rvs = append(rvs, tsLower)
+			rvs = append(rvs, tsUpper)
+		}
+	}
+	return rvs, nil
+}
+
+func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
+	args := tfa.args
+	if len(args) < 2 || len(args) > 3 {
+		return nil, fmt.Errorf("unexpected number of args; got %d; want 2...3", len(args))
+	}
+	phis, err := getScalar(args[0], 0)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse phi: %s", err)
+	}
+
+	// Convert buckets with `vmrange` labels to buckets with `le` labels.
+	tss := vmrangeBucketsToLE(args[1])
+
+	// Parse boundsLabel. See https://github.com/prometheus/prometheus/issues/5706 for details.
+	var boundsLabel string
+	if len(args) > 2 {
+		s, err := getString(args[2], 2)
+		if err != nil {
+			return nil, fmt.Errorf("cannot parse boundsLabel (arg #3): %s", err)
+		}
+		boundsLabel = s
+	}
+
+	// Group metrics by all tags excluding "le"
+	m := groupLeTimeseries(tss)
+
+	// Calculate quantile for each group in m
+
+	lastNonInf := func(i int, xss []leTimeseries) float64 {
+		for len(xss) > 0 {
+			xsLast := xss[len(xss)-1]
+			v := xsLast.ts.Values[i]
+			if v == 0 {
+				return nan
+			}
+			if !math.IsInf(xsLast.le, 0) {
+				return xsLast.le
+			}
+			xss = xss[:len(xss)-1]
+		}
+		return nan
+	}
+	quantile := func(i int, phis []float64, xss []leTimeseries) (q, lower, upper float64) {
+		phi := phis[i]
+		if math.IsNaN(phi) {
+			return nan, nan, nan
+		}
+		fixBrokenBuckets(i, xss)
+		vLast := float64(0)
+		if len(xss) > 0 {
+			vLast = xss[len(xss)-1].ts.Values[i]
+		}
+		if vLast == 0 {
+			return nan, nan, nan
+		}
+		if phi < 0 {
+			return -inf, -inf, xss[0].ts.Values[i]
+		}
+		if phi > 1 {
+			return inf, vLast, inf
+		}
+		vReq := vLast * phi
+		vPrev := float64(0)
+		lePrev := float64(0)
+		for _, xs := range xss {
+			v := xs.ts.Values[i]
+			le := xs.le
+			if v <= 0 {
+				// Skip zero buckets.
+				lePrev = le
+				continue
+			}
+			if v < vReq {
+				vPrev = v
+				lePrev = le
+				continue
+			}
+			if math.IsInf(le, 0) {
+				vv := lastNonInf(i, xss)
+				return vv, vv, inf
+			}
+			if v == vPrev {
+				return lePrev, lePrev, v
+			}
+			vv := lePrev + (le-lePrev)*(vReq-vPrev)/(v-vPrev)
+			return vv, lePrev, le
+		}
+		vv := lastNonInf(i, xss)
+		return vv, vv, inf
+	}
+	rvs := make([]*timeseries, 0, len(m))
+	for _, xss := range m {
+		sort.Slice(xss, func(i, j int) bool {
+			return xss[i].le < xss[j].le
+		})
+		dst := xss[0].ts
+		var tsLower, tsUpper *timeseries
+		if len(boundsLabel) > 0 {
+			tsLower = &timeseries{}
+			tsLower.CopyFromShallowTimestamps(dst)
+			tsLower.MetricName.RemoveTag(boundsLabel)
+			tsLower.MetricName.AddTag(boundsLabel, "lower")
+			tsUpper = &timeseries{}
+			tsUpper.CopyFromShallowTimestamps(dst)
+			tsUpper.MetricName.RemoveTag(boundsLabel)
+			tsUpper.MetricName.AddTag(boundsLabel, "upper")
+		}
+		for i := range dst.Values {
+			v, lower, upper := quantile(i, phis, xss)
+			dst.Values[i] = v
+			if len(boundsLabel) > 0 {
+				tsLower.Values[i] = lower
+				tsUpper.Values[i] = upper
+			}
+		}
+		rvs = append(rvs, dst)
+		if len(boundsLabel) > 0 {
+			rvs = append(rvs, tsLower)
+			rvs = append(rvs, tsUpper)
+		}
+	}
+	return rvs, nil
+}
+
+type leTimeseries struct {
+	le float64
+	ts *timeseries
+}
+
+func groupLeTimeseries(tss []*timeseries) map[string][]leTimeseries {
+	m := make(map[string][]leTimeseries)
 	bb := bbPool.Get()
 	for _, ts := range tss {
 		tagValue := ts.MetricName.GetTagValue("le")
@@ -430,106 +646,28 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
 		ts.MetricName.ResetMetricGroup()
 		ts.MetricName.RemoveTag("le")
 		bb.B = marshalMetricTagsSorted(bb.B[:0], &ts.MetricName)
-		m[string(bb.B)] = append(m[string(bb.B)], x{
+		m[string(bb.B)] = append(m[string(bb.B)], leTimeseries{
 			le: le,
 			ts: ts,
 		})
 	}
 	bbPool.Put(bb)
+	return m
+}

-	// Calculate quantile for each group in m
-
-	lastNonInf := func(i int, xss []x) float64 {
-		for len(xss) > 0 {
-			xsLast := xss[len(xss)-1]
-			v := xsLast.ts.Values[i]
-			if v == 0 {
-				return nan
-			}
-			if !math.IsNaN(v) && !math.IsInf(xsLast.le, 0) {
-				return xsLast.le
-			}
-			xss = xss[:len(xss)-1]
+func fixBrokenBuckets(i int, xss []leTimeseries) {
+	// Fix broken buckets.
+	// They are already sorted by le, so their values must be in ascending order,
+	// since the next bucket includes all the previous buckets.
+	vPrev := float64(0)
+	for _, xs := range xss {
+		v := xs.ts.Values[i]
+		if v < vPrev || math.IsNaN(v) {
+			xs.ts.Values[i] = vPrev
+		} else {
+			vPrev = v
 		}
-		return nan
 	}
-	quantile := func(i int, phis []float64, xss []x) float64 {
-		phi := phis[i]
-		if math.IsNaN(phi) {
-			return nan
-		}
-		// Fix broken buckets.
-		// They are already sorted by le, so their values must be in ascending order,
-		// since the next bucket value includes all the previous buckets.
-		vPrev := float64(0)
-		for _, xs := range xss {
-			v := xs.ts.Values[i]
-			if v < vPrev {
-				xs.ts.Values[i] = vPrev
-			} else if !math.IsNaN(v) {
-				vPrev = v
-			}
-		}
-		vLast := nan
-		for len(xss) > 0 {
-			vLast = xss[len(xss)-1].ts.Values[i]
-			if !math.IsNaN(vLast) {
-				break
-			}
-			xss = xss[:len(xss)-1]
-		}
-		if vLast == 0 || math.IsNaN(vLast) {
-			return nan
-		}
-		if phi < 0 {
-			return -inf
-		}
-		if phi > 1 {
-			return inf
-		}
-		vReq := vLast * phi
-		vPrev = 0
-		lePrev := float64(0)
-		for _, xs := range xss {
-			v := xs.ts.Values[i]
-			if math.IsNaN(v) {
-				// Skip NaNs - they may appear if the selected time range
-				// contains multiple different bucket sets.
-				continue
-			}
-			le := xs.le
-			if v <= 0 {
-				// Skip zero buckets.
-				lePrev = le
-				continue
-			}
-			if v < vReq {
-				vPrev = v
-				lePrev = le
-				continue
-			}
-			if math.IsInf(le, 0) {
-				return lastNonInf(i, xss)
-			}
-			if v == vPrev {
-				return lePrev
-			}
-			return lePrev + (le-lePrev)*(vReq-vPrev)/(v-vPrev)
-		}
-		return lastNonInf(i, xss)
-	}
-	rvs := make([]*timeseries, 0, len(m))
-	for _, xss := range m {
-		sort.Slice(xss, func(i, j int) bool {
-			return xss[i].le < xss[j].le
-		})
-		dst := xss[0].ts
-		for i := range dst.Values {
-			dst.Values[i] = quantile(i, phis, xss)
-		}
-		rvs = append(rvs, dst)
-	}
-	return rvs, nil
 }

 func transformHour(t time.Time) int {
@@ -990,7 +1128,7 @@ func transformLabelTransform(tfa *transformFuncArg) ([]*timeseries, error) {
 		return nil, err
 	}

-	r, err := compileRegexp(regex)
+	r, err := metricsql.CompileRegexp(regex)
 	if err != nil {
 		return nil, fmt.Errorf(`cannot compile regex %q: %s`, regex, err)
 	}
@@ -1019,7 +1157,7 @@ func transformLabelReplace(tfa *transformFuncArg) ([]*timeseries, error) {
 		return nil, err
 	}

-	r, err := compileRegexpAnchored(regex)
+	r, err := metricsql.CompileRegexpAnchored(regex)
 	if err != nil {
 		return nil, fmt.Errorf(`cannot compile regex %q: %s`, regex, err)
 	}
@@ -1068,6 +1206,62 @@ func transformLabelValue(tfa *transformFuncArg) ([]*timeseries, error) {
 	return rvs, nil
 }

+func transformLabelMatch(tfa *transformFuncArg) ([]*timeseries, error) {
+	args := tfa.args
+	if err := expectTransformArgsNum(args, 3); err != nil {
+		return nil, err
+	}
+	labelName, err := getString(args[1], 1)
+	if err != nil {
+		return nil, fmt.Errorf("cannot get label name: %s", err)
+	}
+	labelRe, err := getString(args[2], 2)
+	if err != nil {
+		return nil, fmt.Errorf("cannot get regexp: %s", err)
+	}
+	r, err := metricsql.CompileRegexpAnchored(labelRe)
+	if err != nil {
+		return nil, fmt.Errorf(`cannot compile regexp %q: %s`, labelRe, err)
+	}
+	tss := args[0]
+	rvs := tss[:0]
+	for _, ts := range tss {
+		labelValue := ts.MetricName.GetTagValue(labelName)
+		if r.Match(labelValue) {
+			rvs = append(rvs, ts)
+		}
+	}
+	return rvs, nil
+}
+
+func transformLabelMismatch(tfa *transformFuncArg) ([]*timeseries, error) {
+	args := tfa.args
+	if err := expectTransformArgsNum(args, 3); err != nil {
+		return nil, err
+	}
+	labelName, err := getString(args[1], 1)
+	if err != nil {
+		return nil, fmt.Errorf("cannot get label name: %s", err)
+	}
+	labelRe, err := getString(args[2], 2)
+	if err != nil {
+		return nil, fmt.Errorf("cannot get regexp: %s", err)
+	}
+	r, err := metricsql.CompileRegexpAnchored(labelRe)
+	if err != nil {
+		return nil, fmt.Errorf(`cannot compile regexp %q: %s`, labelRe, err)
+	}
+	tss := args[0]
+	rvs := tss[:0]
+	for _, ts := range tss {
+		labelValue := ts.MetricName.GetTagValue(labelName)
+		if !r.Match(labelValue) {
+			rvs = append(rvs, ts)
+		}
+	}
+	return rvs, nil
+}
+
 func transformLn(v float64) float64 {
 	return math.Log(v)
 }
@@ -1130,7 +1324,7 @@ func transformScalar(tfa *transformFuncArg) ([]*timeseries, error) {

 	// Verify whether the arg is a string.
 	// Then try converting the string to number.
-	if se, ok := tfa.fe.Args[0].(*stringExpr); ok {
+	if se, ok := tfa.fe.Args[0].(*metricsql.StringExpr); ok {
 		n, err := strconv.ParseFloat(se.S, 64)
 		if err != nil {
 			n = nan
--- a/app/vmstorage/main.go
+++ b/app/vmstorage/main.go
@@ -62,8 +62,8 @@ func InitWithoutMetrics() {
 	blocksCount := tm.SmallBlocksCount + tm.BigBlocksCount
 	rowsCount := tm.SmallRowsCount + tm.BigRowsCount
 	sizeBytes := tm.SmallSizeBytes + tm.BigSizeBytes
-	logger.Infof("successfully opened storage %q in %s; partsCount: %d; blocksCount: %d; rowsCount: %d; sizeBytes: %d",
-		*DataPath, time.Since(startTime), partsCount, blocksCount, rowsCount, sizeBytes)
+	logger.Infof("successfully opened storage %q in %.3f seconds; partsCount: %d; blocksCount: %d; rowsCount: %d; sizeBytes: %d",
+		*DataPath, time.Since(startTime).Seconds(), partsCount, blocksCount, rowsCount, sizeBytes)
 }

 // Storage is a storage.
@@ -133,7 +133,7 @@ func Stop() {
 	startTime := time.Now()
 	WG.WaitAndBlock()
 	Storage.MustClose()
-	logger.Infof("successfully closed the storage in %s", time.Since(startTime))
+	logger.Infof("successfully closed the storage in %.3f seconds", time.Since(startTime).Seconds())

 	logger.Infof("the storage has been stopped")
 }
--- a/dashboards/victoriametrics.json
+++ b/dashboards/victoriametrics.json
@@ -14,7 +14,7 @@
      "type": "grafana",
      "id": "grafana",
      "name": "Grafana",
-      "version": "6.4.4"
+      "version": "6.5.0"
    },
    {
      "type": "panel",
@@ -60,12 +60,12 @@
      }
    ]
  },
-  "description": "Overview for single node VictoriaMetrics v1.29.0 or higher",
+  "description": "Overview for single node VictoriaMetrics v1.30.3 or higher",
  "editable": true,
  "gnetId": 10229,
  "graphTooltip": 0,
  "id": null,
-  "iteration": 1573509727687,
+  "iteration": 1575825261972,
  "links": [
    {
      "icon": "doc",
@@ -499,6 +499,7 @@
        "x": 0,
        "y": 11
      },
+      "hiddenSeries": false,
      "id": 12,
      "legend": {
        "alignAsTable": true,
@@ -592,6 +593,7 @@
        "x": 12,
        "y": 11
      },
+      "hiddenSeries": false,
      "id": 22,
      "legend": {
        "alignAsTable": true,
@@ -685,6 +687,7 @@
        "x": 0,
        "y": 19
      },
+      "hiddenSeries": false,
      "id": 51,
      "legend": {
        "avg": false,
@@ -781,6 +784,7 @@
        "x": 12,
        "y": 19
      },
+      "hiddenSeries": false,
      "id": 33,
      "legend": {
        "avg": false,
@@ -885,6 +889,7 @@
        "x": 0,
        "y": 27
      },
+      "hiddenSeries": false,
      "id": 66,
      "legend": {
        "avg": false,
@@ -972,6 +977,7 @@
        "x": 12,
        "y": 27
      },
+      "hiddenSeries": false,
      "id": 35,
      "legend": {
        "alignAsTable": true,
@@ -1065,6 +1071,7 @@
        "x": 0,
        "y": 35
      },
+      "hiddenSeries": false,
      "id": 60,
      "legend": {
        "avg": false,
@@ -1157,6 +1164,7 @@
        "x": 12,
        "y": 35
      },
+      "hiddenSeries": false,
      "id": 59,
      "legend": {
        "alignAsTable": true,
@@ -1266,6 +1274,7 @@
        "x": 0,
        "y": 43
      },
+      "hiddenSeries": false,
      "id": 37,
      "legend": {
        "avg": false,
@@ -1358,6 +1367,7 @@
        "x": 12,
        "y": 43
      },
+      "hiddenSeries": false,
      "id": 49,
      "legend": {
        "avg": false,
@@ -1464,6 +1474,7 @@
        "x": 0,
        "y": 52
      },
+      "hiddenSeries": false,
      "id": 10,
      "legend": {
        "alignAsTable": true,
@@ -1563,6 +1574,7 @@
        "x": 12,
        "y": 52
      },
+      "hiddenSeries": false,
      "id": 34,
      "legend": {
        "avg": false,
@@ -1668,6 +1680,7 @@
        "x": 0,
        "y": 60
      },
+      "hiddenSeries": false,
      "id": 30,
      "legend": {
        "avg": false,
@@ -1758,6 +1771,7 @@
        "x": 12,
        "y": 60
      },
+      "hiddenSeries": false,
      "id": 36,
      "legend": {
        "avg": false,
@@ -1848,6 +1862,7 @@
        "x": 0,
        "y": 68
      },
+      "hiddenSeries": false,
      "id": 53,
      "legend": {
        "avg": false,
@@ -1938,6 +1953,7 @@
        "x": 12,
        "y": 68
      },
+      "hiddenSeries": false,
      "id": 55,
      "legend": {
        "avg": false,
@@ -2027,6 +2043,7 @@
        "x": 0,
        "y": 76
      },
+      "hiddenSeries": false,
      "id": 62,
      "legend": {
        "avg": false,
@@ -2115,6 +2132,7 @@
        "x": 12,
        "y": 76
      },
+      "hiddenSeries": false,
      "id": 64,
      "legend": {
        "avg": false,
@@ -2203,6 +2221,7 @@
        "x": 0,
        "y": 84
      },
+      "hiddenSeries": false,
      "id": 58,
      "legend": {
        "avg": false,
@@ -2280,6 +2299,99 @@
        "alignLevel": null
      }
    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "description": "Shows the rate of logging the messages by their level. Unexpected spike in rate is a good reason to check logs.",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 84
+      },
+      "hiddenSeries": false,
+      "id": 67,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "sum(rate(vm_log_messages_total{job=\"$job\"}[5m])) by (level) ",
+          "format": "time_series",
+          "hide": false,
+          "intervalFactor": 1,
+          "legendFormat": "{{level}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Logging rate",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "decimals": null,
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
    {
      "collapsed": false,
      "datasource": "${DS_PROMETHEUS}",
@@ -2309,6 +2421,7 @@
        "x": 0,
        "y": 93
      },
+      "hiddenSeries": false,
      "id": 44,
      "legend": {
        "avg": false,
@@ -2415,6 +2528,7 @@
        "x": 12,
        "y": 93
      },
+      "hiddenSeries": false,
      "id": 57,
      "legend": {
        "avg": false,
@@ -2504,6 +2618,7 @@
        "x": 0,
        "y": 101
      },
+      "hiddenSeries": false,
      "id": 47,
      "legend": {
        "avg": false,
@@ -2594,6 +2709,7 @@
        "x": 12,
        "y": 101
      },
+      "hiddenSeries": false,
      "id": 42,
      "legend": {
        "avg": false,
@@ -2683,6 +2799,7 @@
        "x": 0,
        "y": 109
      },
+      "hiddenSeries": false,
      "id": 48,
      "legend": {
        "avg": false,
@@ -2761,7 +2878,7 @@
    }
  ],
  "refresh": "30s",
-  "schemaVersion": 20,
+  "schemaVersion": 21,
  "style": "dark",
  "tags": [],
  "templating": {
@@ -2844,5 +2961,5 @@
  "timezone": "",
  "title": "VictoriaMetrics",
  "uid": "wNf0q_kZk",
-  "version": 4
+  "version": 1
 }
--- a/deployment/docker/Makefile
+++ b/deployment/docker/Makefile
@@ -1,13 +1,15 @@
-DOCKER_NAMESPACE := victoriametrics
-BUILDER_IMAGE := local/builder:go1.13.4
+# All these commands must run from repository root.
+
+DOCKER_NAMESPACE := docker.io/victoriametrics
+BUILDER_IMAGE := local/builder:go1.13.6
 CERTS_IMAGE := local/certs:1.0.3

 package-certs:
-	(docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(CERTS_IMAGE)') \
+	(docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(CERTS_IMAGE)$$') \
 		|| docker build -t $(CERTS_IMAGE) deployment/docker/certs

 package-builder:
-	(docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(BUILDER_IMAGE)') \
+	(docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(BUILDER_IMAGE)$$') \
 		|| docker build -t $(BUILDER_IMAGE) deployment/docker/builder

 app-via-docker: package-certs package-builder
@@ -25,21 +27,118 @@ app-via-docker: package-certs package-builder
 			-o bin/$(APP_NAME)$(APP_SUFFIX)-prod $(PKG_PREFIX)/app/$(APP_NAME)

 package-via-docker:
-	(docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE)') || (\
+	(docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(APP_SUFFIX)$(RACE)$$') || (\
 		$(MAKE) app-via-docker && \
-		docker build -t $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE) -f app/$(APP_NAME)/deployment/Dockerfile .)
+		docker build \
+			--build-arg src_binary=$(APP_NAME)$(APP_SUFFIX)-prod \
+			--build-arg certs_image=$(CERTS_IMAGE) \
+			-t $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(APP_SUFFIX)$(RACE) \
+			-f app/$(APP_NAME)/deployment/Dockerfile bin)

-publish-via-docker: package-via-docker
-	docker push $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE)
-	docker tag $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE) $(DOCKER_NAMESPACE)/$(APP_NAME):latest
-	docker push $(DOCKER_NAMESPACE)/$(APP_NAME):latest
+package-manifest: \
+		package-via-docker-amd64 \
+		package-via-docker-arm \
+		package-via-docker-arm64 \
+		package-via-docker-ppc64le \
+		package-via-docker-386
+	$(MAKE) package-manifest-internal
+
+package-manifest-internal:
+	docker push $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-amd64$(RACE)
+	docker push $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-arm$(RACE)
+	docker push $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-arm64$(RACE)
+	docker push $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-ppc64le$(RACE)
+	docker push $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-386$(RACE)
+	DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create --amend $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE) \
+				$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-amd64$(RACE) \
+				$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-arm$(RACE) \
+				$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-arm64$(RACE) \
+				$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-ppc64le$(RACE) \
+				$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-386$(RACE)
+	GOARCH=amd64 $(MAKE) package-manifest-annotate-goarch
+	GOARCH=arm $(MAKE) package-manifest-annotate-goarch
+	GOARCH=arm64 $(MAKE) package-manifest-annotate-goarch
+	GOARCH=ppc64le $(MAKE) package-manifest-annotate-goarch
+	GOARCH=386 $(MAKE) package-manifest-annotate-goarch
+
+package-manifest-annotate-goarch:
+	DOCKER_CLI_EXPERIMENTAL=enabled docker manifest annotate $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE) \
+				$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-$(GOARCH)$(RACE) --os linux --arch $(GOARCH)
+
+publish-via-docker: package-manifest
+	docker tag $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-amd64$(RACE) $(DOCKER_NAMESPACE)/$(APP_NAME):latest-amd64$(RACE)
+	docker tag $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-arm$(RACE) $(DOCKER_NAMESPACE)/$(APP_NAME):latest-arm$(RACE)
+	docker tag $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-arm64$(RACE) $(DOCKER_NAMESPACE)/$(APP_NAME):latest-arm64$(RACE)
+	docker tag $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-ppc64le$(RACE) $(DOCKER_NAMESPACE)/$(APP_NAME):latest-ppc64le$(RACE)
+	docker tag $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-386$(RACE) $(DOCKER_NAMESPACE)/$(APP_NAME):latest-386$(RACE)
+	PKG_TAG=latest $(MAKE) package-manifest-internal
+	DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push --purge $(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE)
+	DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push --purge $(DOCKER_NAMESPACE)/$(APP_NAME):latest$(RACE)

 run-via-docker: package-via-docker
 	docker run -it --rm \
 		--user $(shell id -u):$(shell id -g) \
 		--net host \
 		$(DOCKER_OPTS) \
-		$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE) $(ARGS)
+		$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(APP_SUFFIX)$(RACE) $(ARGS)
+
+app-via-docker-goarch:
+	APP_SUFFIX='-$(GOARCH)' \
+	DOCKER_OPTS='--env CGO_ENABLED=$(CGO_ENABLED) --env GOOS=linux --env GOARCH=$(GOARCH)' \
+	$(MAKE) app-via-docker
+
+app-via-docker-goarch-cgo:
+	CGO_ENABLED=1 $(MAKE) app-via-docker-goarch
+
+app-via-docker-goarch-nocgo:
+	CGO_ENABLED=0 $(MAKE) app-via-docker-goarch
+
+app-via-docker-pure:
+	APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
+
+app-via-docker-amd64:
+	GOARCH=amd64 $(MAKE) app-via-docker-goarch-cgo
+
+app-via-docker-arm:
+	GOARCH=arm $(MAKE) app-via-docker-goarch-nocgo
+
+app-via-docker-arm64:
+	GOARCH=arm64 $(MAKE) app-via-docker-goarch-nocgo
+
+app-via-docker-ppc64le:
+	GOARCH=ppc64le $(MAKE) app-via-docker-goarch-nocgo
+
+app-via-docker-386:
+	GOARCH=386 $(MAKE) app-via-docker-goarch-nocgo
+
+package-via-docker-goarch:
+	APP_SUFFIX='-$(GOARCH)' \
+	DOCKER_OPTS='--env CGO_ENABLED=$(CGO_ENABLED) --env GOOS=linux --env GOARCH=$(GOARCH)' \
+	$(MAKE) package-via-docker
+
+package-via-docker-goarch-cgo:
+	CGO_ENABLED=1 $(MAKE) package-via-docker-goarch
+
+package-via-docker-goarch-nocgo:
+	CGO_ENABLED=0 $(MAKE) package-via-docker-goarch
+
+package-via-docker-pure:
+	APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) package-via-docker
+
+package-via-docker-amd64:
+	GOARCH=amd64 $(MAKE) package-via-docker-goarch-cgo
+
+package-via-docker-arm:
+	GOARCH=arm $(MAKE) package-via-docker-goarch-nocgo
+
+package-via-docker-arm64:
+	GOARCH=arm64 $(MAKE) package-via-docker-goarch-nocgo
+
+package-via-docker-ppc64le:
+	GOARCH=ppc64le $(MAKE) package-via-docker-goarch-nocgo
+
+package-via-docker-386:
+	GOARCH=386 $(MAKE) package-via-docker-goarch-nocgo

 remove-docker-images:
 	docker image ls --format '{{.Repository}}\t{{.ID}}' | grep $(DOCKER_NAMESPACE)/ | grep -v /builder | awk '{print $$2}' | xargs docker image rm -f
--- a/deployment/docker/builder/Dockerfile
+++ b/deployment/docker/builder/Dockerfile
@@ -1,2 +1,2 @@
-FROM golang:1.13.4
+FROM golang:1.13.6
 STOPSIGNAL SIGINT
--- a/deployment/docker/docker-compose.yml
+++ b/deployment/docker/docker-compose.yml
@@ -2,7 +2,7 @@ version: '3.5'
 services:
  prometheus:
    container_name: prometheus
-    image: prom/prometheus:v2.14.0
+    image: prom/prometheus:v2.15.2
    depends_on:
      - "victoriametrics"
    ports:
@@ -35,7 +35,7 @@ services:
    restart: always
  grafana:
    container_name: grafana
-    image: grafana/grafana:6.5.0
+    image: grafana/grafana:6.5.2
    entrypoint: >
      /bin/sh -c "
      cd /var/lib/grafana &&
--- a/docs/Articles.md
+++ b/docs/Articles.md
@@ -1,3 +1,5 @@
+# Articles
+
 * [Open-sourcing VictoriaMetrics](https://medium.com/@valyala/open-sourcing-victoriametrics-f31e34485c2b)
 * [How we created VictoriaMetrics](https://medium.com/devopslinks/victoriametrics-creating-the-best-remote-storage-for-prometheus-5d92d66787ac)
 * [VictoriaMetrics vs TimescaleDB vs InfluxDB benchmarks on 40K unique time series](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
@@ -17,3 +19,5 @@
 * [Speeding up backups for big time series databases](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883)
 * [Evaluation performance and correctness: VictoriaMetrics response](https://medium.com/@valyala/evaluating-performance-and-correctness-victoriametrics-response-e27315627e87)
 * [Improving histogram usability for Prometheus and Grafana](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350)
+* [Prometheus storage: tech terms for humans](https://medium.com/@valyala/prometheus-storage-technical-terms-for-humans-4ab4de6c3d48)
+* [Billy: how VictoriaMetrics deals with more than 500 billion rows](https://medium.com/@valyala/billy-how-victoriametrics-deals-with-more-than-500-billion-rows-e82ff8f725da)
--- a/docs/CaseStudies.md
+++ b/docs/CaseStudies.md
@@ -0,0 +1,84 @@
+## Case studies and talks
+
+Below are approved public case studies and talks from VictoriaMetrics users. Join our [community Slack channel](http://slack.victoriametrics.com/)
+and feel free asking for references, reviews and additional case studies from real VictoriaMetrics users there.
+
+### Adidas
+
+See [slides](https://promcon.io/2019-munich/slides/remote-write-storage-wars.pdf) and [video](https://youtu.be/OsH6gPdxR4s)
+from [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk at [PromCon 2019](https://promcon.io/2019-munich/).
+VictoriaMetrics is compared to Thanos, Corex and M3DB in the talk.
+
+
+### COLOPL
+
+[COLOPL](http://www.colopl.co.jp/en/) is Japaneese Game Development company. It started using VictoriaMetrics
+after evaulating the following remote storage solutions for Prometheus:
+
+* Cortex
+* Thanos
+* M3DB
+* VictoriaMetrics
+
+See [slides](https://speakerdeck.com/inletorder/monitoring-platform-with-victoria-metrics) and [video](https://www.youtube.com/watch?v=hUpHIluxw80)
+from `Large-scale, super-load system monitoring platform built with VictoriaMetrics` talk at [Prometheus Meetup Tokyo #3](https://prometheus.connpass.com/event/157721/).
+
+
+### Wix.com
+
+[Wix.com](https://en.wikipedia.org/wiki/Wix.com) is the leading web development platform.
+
+> We needed to redesign metric infrastructure from the ground up after the move to Kubernethes. A few approaches/designs have been tried before the one that works great has been chosen: Prometheus instance in every datacenter with 2 hours retention for local storage and remote write into [HA pair of single-node VictoriaMetrics instances](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#high-availability).
+
+Numbers:
+
+* The number of active time series per VictoriaMetrics instance is 20M.
+* The total number of time series per VictoriaMetrics instance is 400M+.
+* Ingestion rate per VictoriaMetrics instance is 800K data points per second.
+* The average time series churn rate is ~3M per day.
+* The average query rate is ~1K per minute (mostly alert queries).
+* Query duration: median is ~70ms, 99th percentile is ~2sec.
+* Retention: 6 months.
+
+> Alternatives that we’ve played with before choosing VictoriaMetrics are: federated Prometheus, Cortex, IronDB and Thanos.
+> Points that were critical to us when we were choosing a central tsdb, in order of importance:
+
+* At least 3 month worth of history.
+* Raw data, no aggregation, no sampling.
+* High query speed.
+* Clean fail state for HA (multi-node clusters may return partial data resulting in false alerts).
+* Enough head room/scaling capacity for future growth, up to 100M active time series.
+* Ability to split DB replicas per workload. Alert queries go to one replica, user queries go to another (speed for users, effective cache).
+
+> Optimizing for those points and our specific workload VictoriaMetrics proved to be the best option. As an icing on a cake we’ve got [PromQL extensions](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL) - `default 0` and `histogram` are my favorite ones, for example. What we specially like is having a lot of tsdb params easily available via config options, that makes tsdb easy to tune for specific use case. Also worth noting is a great community in [Slack channel](http://slack.victoriametrics.com/) and of course maintainer support.
+
+Alex Ulstein, Head of Monitoring, Wix.com
+
+
+### Wedos.com
+
+> [Wedos](https://www.wedos.com/) is the Biggest Czech Hosting. We have our own private data center, that holds only our servers and technologies. The second data center, where the servers will be cooled in an oil bath, is being built. We started using [cluster VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md) to store Prometheus metrics from all our infrastructure after receiving positive references from our friends who successfully use VictoriaMetrics.
+
+Numbers:
+
+* The number of acitve time series: 5M.
+* Ingestion rate: 170K data points per second.
+* Query duration: median is ~2ms, 99th percentile is ~50ms.
+
+> We like configuration simplicity and zero maintenance for VictoriaMetrics - once installed and forgot about it. It works out of the box without any issues.
+
+
+### Dreamteam
+
+[Dreamteam](https://dreamteam.gg/) successfully uses single-node VictoriaMetrics in multiple environments.
+
+Numbers:
+
+* Active time series: from 350K to 725K.
+* Total number of time series: from 100M to 320M.
+* Total number of datapoints: from 120 billions to 155 billions.
+* Retention: 3 months.
+
+VictoriaMetrics in production environment runs on 2 M5 EC2 instances in "HA" mode, managed by Terraform and Ansible TF module.
+2 Prometheus instances are writing to both VMs, with 2 [Promxy](https://github.com/jacksontj/promxy) replicas
+as load balancer for reads.
--- a/docs/Cluster-VictoriaMetrics.md
+++ b/docs/Cluster-VictoriaMetrics.md
@@ -1,9 +1,9 @@
-# Cluster version of VictoriaMetrics
+# Cluster version

 VictoriaMetrics is fast, cost-effective and scalable time series database. It can be used as a long-term remote storage for Prometheus.

 It is recommended using [single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics) instead of cluster version
-for ingestion rates lower than 10 million of data points per second.
+for ingestion rates lower than a million of data points per second.
 Single-node version [scales perfectly](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae)
 with the number of CPU cores, RAM and available storage space.
 Single-node version is easier to configure and operate comparing to cluster version, so think twice before sticking to cluster version.
@@ -27,6 +27,9 @@ VictoriaMetrics cluster consists of the following services:
 - `vmselect` - performs incoming queries using the data from `vmstorage`

 Each service may scale independently and may run on the most suitable hardware.
+`vmstorage` nodes don't know about each other, don't communicate with each other and don't share any data.
+This is [shared nothing architecture](https://en.wikipedia.org/wiki/Shared-nothing_architecture).
+It increases cluster availability, simplifies cluster maintenance and cluster scaling.

 <img src="https://docs.google.com/drawings/d/e/2PACX-1vTvk2raU9kFgZ84oF-OKolrGwHaePhHRsZEcfQ1I_EC5AB_XPWwB392XshxPramLJ8E4bqptTnFn5LL/pub?w=1104&amp;h=746">

@@ -130,6 +133,8 @@ with [the official Grafana dashboard for VictoriaMetrics cluster](https://grafan
  - `<suffix>` may have the following values:
     - `prometheus` - for inserting data with [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
     - `influx/write` or `influx/api/v2/write` - for inserting data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
+     - `opentsdb/api/put` - for accepting [OpenTSDB HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html).
+     - `prometheus/api/v1/import` - for importing data obtained via `api/v1/export` on `vmselect` (see below).

 * URLs for querying: `http://<vmselect>:8481/select/<accountID>/prometheus/<suffix>`, where:
  - `<accountID>` is an arbitrary number identifying data namespace for the query (aka tenant)
--- a/docs/ExtendedPromQL.md
+++ b/docs/ExtendedPromQL.md
@@ -1,16 +1,36 @@
-VictoriaMetrics supports [standard PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/)
-including [subqueries](https://prometheus.io/blog/2019/01/28/subquery-support/).
-Additionally it supports useful extensions mentioned below.
-Try these extensions on [an editable Grafana dashboard](http://play-grafana.victoriametrics.com:3000/d/4ome8yJmz/node-exporter-on-victoriametrics-demo).
+# MetricsQL
+
+VictoriaMetrics implements MetricsQL - query language inspired by [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/).
+It is backwards compatible with PromQL, so Grafana dashboards backed by Prometheus datasource should work the same after switching from Prometheus to VictoriaMetrics.
+[Standalone MetricsQL package](https://godoc.org/github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql) can be used for parsing MetricsQL in external apps.
+
+The following functionality is implemented differently in MetricsQL comparing to PromQL in order to improve user experience:
+* MetricsQL takes into account the previous point before the window in square brackets for range functions such as `rate` and `increase`.
+  It also doesn't extrapolate range function results. This addresses [this issue from Prometheus](https://github.com/prometheus/prometheus/issues/3746).
+* MetricsQL returns the expected non-empty responses for requests with `step` values smaller than scrape interval. This addresses [this issue from Grafana](https://github.com/grafana/grafana/issues/11451).
+* MetricsQL treats `scalar` type the same as `instant vector` without labels, since subtle difference between these types usually confuses users.
+  See [the corresponding Prometheus docs](https://prometheus.io/docs/prometheus/latest/querying/basics/#expression-language-data-types) for details.
+
+Other PromQL functionality should work the same in MetricsQL. [File an issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues)
+if you notice discrepancies between PromQL and MetricsQL results other than mentioned above.
+
+MetricsQL provides additional functionality mentioned below, which is aimed towards solving practical cases.
+Feel free [filing a feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you think MetricsQL misses certain useful functionality.
+
+*Note that the functionality mentioned below doesn't work in PromQL, so it is impossible switching back to Prometheus after you start using it.*
+
+This functionality can be tried at [an editable Grafana dashboard](http://play-grafana.victoriametrics.com:3000/d/4ome8yJmz/node-exporter-on-victoriametrics-demo).

 - [`WITH` templates](https://play.victoriametrics.com/promql/expand-with-exprs). This feature simplifies writing and managing complex queries. Go to [`WITH` templates playground](https://victoriametrics.com/promql/expand-with-exprs) and try it.
 - Metric names and metric labels may contain escaped chars. For instance, `foo\-bar{baz\=aa="b"}` is valid expression. It returns time series with name `foo-bar` containing label `baz=aa` with value `b`. Additionally, `\xXX` escape sequence is supported, where `XX` is hexadecimal representation of escaped char.
 - `offset`, range duration and step value for range vector may refer to the current step aka `$__interval` value from Grafana.
  For instance, `rate(metric[10i] offset 5i)` would return per-second rate over a range covering 10 previous steps with the offset of 5 steps.
+- `offset` may be put anywere in the query. For instance, `sum(foo) offset 24h`.
+- `offset` may be negative. For example, `q offset -1h`.
 - `default` binary operator. `q1 default q2` substitutes `NaN` values from `q1` with the corresponding values from `q2`.
+- `histogram_quantile` accepts optional third arg - `boundsLabel`. In this case it returns `lower` and `upper` bounds for the estimated percentile. See [this issue for details](https://github.com/prometheus/prometheus/issues/5706).
 - `if` binary operator. `q1 if q2` removes values from `q1` for `NaN` values from `q2`.
 - `ifnot` binary operator. `q1 ifnot q2` removes values from `q1` for non-`NaN` values from `q2`.
- `offset` may be put anywere in the query. For instance, `sum(foo) offset 24h`.
 - Trailing commas on all the lists are allowed - label filters, function args and with expressions. For instance, the following queries are valid: `m{foo="bar",}`, `f(a, b,)`, `WITH (x=y,) x`. This simplifies maintenance of multi-line queries.
 - String literals may be concatenated. This is useful with `WITH` templates: `WITH (commonPrefix="long_metric_prefix_") {__name__=commonPrefix+"suffix1"} / {__name__=commonPrefix+"suffix2"}`.
 - Range duration in functions such as [rate](https://prometheus.io/docs/prometheus/latest/querying/functions/#rate()) may be omitted. VictoriaMetrics automatically selects range duration depending on the current step used for building the graph. For instance, the following query is valid in VictoriaMetrics: `rate(node_network_receive_bytes_total)`.
@@ -32,6 +52,7 @@ Try these extensions on [an editable Grafana dashboard](http://play-grafana.vict
  - `label_move(q, src_label1, dst_label1, ... src_labelN, dst_labelN)` for moving label values from `src_*` to `dst_*`.
  - `label_transform(q, label, regexp, replacement)` for replacing all the `regexp` occurences with `replacement` in the `label` values from `q`.
  - `label_value(q, label)` - returns numeric values for the given `label` from `q`.
+- `label_match(q, label, regexp)` and `label_mismatch(q, label, regexp)` for filtering time series with labels matching (or not matching) the given regexps.
 - `step()` function for returning the step in seconds used in the query.
 - `start()` and `end()` functions for returning the start and end timestamps of the `[start ... end]` range used in the query.
 - `integrate(m[d])` for returning integral over the given duration `d` for the given metric `m`.
@@ -45,6 +66,7 @@ Try these extensions on [an editable Grafana dashboard](http://play-grafana.vict
 - `lifetime(q[d])` - returns lifetime of `q` over `d` in seconds. It is expected that `d` exceeds the lifetime of `q`.
 - `scrape_interval(q[d])` - returns the average interval in seconds between data points of `q` over `d` aka `scrape interval`.
 - Trigonometric functions - `sin(q)`, `cos(q)`, `asin(q)`, `acos(q)` and `pi()`.
+- `range_over_time(m[d])` - returns value range for `m` over `d` time window, i.e. `max_over_time(m[d])-min_over_time(m[d])`.
 - `median_over_time(m[d])` - calculates median values for `m` over `d` time window. Shorthand to `quantile_over_time(0.5, m[d])`.
 - `median(q)` - median aggregate. Shorthand to `quantile(0.5, q)`.
 - `limitk(k, q)` - limits the number of time series returned from `q` to `k`.
@@ -58,4 +80,30 @@ Try these extensions on [an editable Grafana dashboard](http://play-grafana.vict
 - `rand()`, `rand_normal()` and `rand_exponential()` functions - for generating pseudo-random series with even, normal and exponential distribution.
 - `increases_over_time(m[d])` and `decreases_over_time(m[d])` - returns the number of `m` increases or decreases over the given duration `d`.
 - `prometheus_buckets(q)` - converts [VictoriaMetrics histogram](https://godoc.org/github.com/VictoriaMetrics/metrics#Histogram) buckets to Prometheus buckets with `le` labels.
- `histogram(q)` - calculates aggregate histogram over `q` time series for each point on the graph.
+- `histogram(q)` - calculates aggregate histogram over `q` time series for each point on the graph. See [this article](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) for more details.
+- `histogram_over_time(m[d])` - calculates [VictoriaMetrics histogram](https://godoc.org/github.com/VictoriaMetrics/metrics#Histogram) for `m` over `d`.
+  For example, the following query calculates median temperature by country over the last 24 hours:
+  `histogram_quantile(0.5, sum(histogram_over_time(temperature[24h])) by (vmbucket, country))`.
+- `histogram_share(le, buckets)` - returns share (in the range 0..1) for `buckets`. Useful for calculating SLI and SLO.
+  For instance, the following query returns the share of requests which are performed under 1.5 seconds: `histogram_share(1.5, sum(request_duration_seconds_bucket) by (le))`.
+- `topk_*` and `bottomk_*` aggregate functions, which return up to K time series. Note that the standard `topk` function may return more than K time series -
+   see [this article](https://www.robustperception.io/graph-top-n-time-series-in-grafana) for details.
+   - `topk_min(k, q)` - returns top K time series with the max minimums on the given time range
+   - `topk_max(k, q)` - returns top K time series with the max maximums on the given time range
+   - `topk_avg(k, q)` - returns top K time series with the max averages on the given time range
+   - `topk_median(k, q)` - returns top K time series with the max medians on the given time range
+   - `bottomk_min(k, q)` - returns bottom K time series with the min minimums on the given time range
+   - `bottomk_max(k, q)` - returns bottom K time series with the min maximums on the given time range
+   - `bottomk_avg(k, q)` - returns bottom K time series with the min averages on the given time range
+   - `bottomk_median(k, q)` - returns bottom K time series with the min medians on the given time range
+- `share_le_over_time(m[d], le)` - returns share (in the range 0..1) of values in `m` over `d`, which are smaller or equal to `le`. Useful for calculating SLI and SLO.
+  Example: `share_le_over_time(memory_usage_bytes[24h], 100*1024*1024)` returns the share of time series values for the last 24 hours when memory usage was below or equal to 100MB.
+- `share_gt_over_time(m[d], gt)` - returns share (in the range 0..1) of values in `m` over `d`, which are bigger than `gt`. Useful for calculating SLI and SLO.
+  Example: `share_gt_over_time(up[24h], 0)` - returns service availability for the last 24 hours.
+- `tmin_over_time(m[d])` - returns timestamp for the minimum value for `m` over `d` time range.
+- `tmax_over_time(m[d])` - returns timestamp for the maximum value for `m` over `d` time range.
+- `aggr_over_time(("aggr_func1", "aggr_func2", ...), m[d])` - simultaneously calculates all the listed `aggr_func*` for `m` over `d` time range.
+  `aggr_func*` can contain any functions that accept range vector. For instance, `aggr_over_time(("min_over_time", "max_over_time", "rate"), m[d])`
+  would calculate `min_over_time`, `max_over_time` and `rate` for `m[d]`.
+- `hoeffding_bound_upper(phi, m[d])` and `hoeffding_bound_lower(phi, m[d])` - return upper and lower [Hoeffding bounds](https://en.wikipedia.org/wiki/Hoeffding%27s_inequality)
+  for the given `phi` in the range `[0..1]`.
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -1,3 +1,5 @@
+# FAQ
+
 ### What is the main purpose of VictoriaMetrics?

 To provide the best long-term [remote storage](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage) solution for [Prometheus](https://prometheus.io/).
@@ -14,7 +16,7 @@ To provide the best long-term [remote storage](https://prometheus.io/docs/operat
 * High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
  may be crammed into a limited storage comparing to TimescaleDB.
 * Optimized for storage with high-latency IO and low iops (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
-* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, Uber M3, Cortex, InfluxDB or TimescaleDB.
+* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB.
  See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae)
  and [comparing Thanos to VictoriaMetrics](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
 * Easy operation:
@@ -52,14 +54,56 @@ Yes. Prometheus continues writing data to local storage after enabling remote st
 and new data is available for querying via Prometheus as usual.


-### How does VictoriaMetrics compare to other clustered TSDBs on top of Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/improbable-eng/thanos), [Cortex](https://github.com/cortexproject/cortex), etc.?
+### How does VictoriaMetrics compare to other clustered TSDBs on top of Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/thanos-io/thanos), [Cortex](https://github.com/cortexproject/cortex), etc.?

-VictoriaMetrics is simpler, faster, more cost-effective and it provides [useful extensions for PromQL](ExtendedPromQL). The simplicity is twofold:
- It is simpler to configure and operate. There is no need in configuring third-party [sidecars](https://github.com/improbable-eng/thanos/blob/master/docs/components/sidecar.md)
-  or fighting with [gossip protocol](https://github.com/improbable-eng/thanos/blob/master/docs/proposals/completed/201809_gossip-removal.md).
- VictoriaMetrics has simpler architecture, which means less bugs and more useful features in a long run comparing to competing TSDBs.
+VictoriaMetrics is simpler, faster, more cost-effective and it provides [MetricsQL with useful extensions for PromQL](ExtendedPromQL). The simplicity is twofold:
+- It is simpler to configure and operate. There is no need in configuring third-party [sidecars](https://github.com/thanos-io/thanos/blob/master/docs/components/sidecar.md)
+  or fighting with [gossip protocol](https://github.com/thanos-io/thanos/blob/master/docs/proposals/completed/201809_gossip-removal.md).
+- VictoriaMetrics has simpler architecture, which means less bugs and more useful features in the long run comparing to competing TSDBs.

-See [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
+See [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
+and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
+
+VictoriaMetrics also [uses less RAM than Thanos components](https://github.com/thanos-io/thanos/issues/448).
+
+
+### What is the difference between VictoriaMetrics and [Cortex](https://github.com/cortexproject/cortex)?
+
+VictoriaMetrics is similar to Cortex in the following aspects:
+- Both systems accept data from Prometheus via standard [remote_write API](https://prometheus.io/docs/practices/remote_write/),
+  i.e. there is no need in running sidecars unlike in [Thanos](https://github.com/thanos-io/thanos) case.
+- Both systems support multi-tenancy out of the box. See [the corresponding docs for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format).
+
+The main differences between Corex and VictoriaMetrics:
+- Cortex re-uses Prometheus source code, while VictoriaMetrics is written from scratch.
+- Cortex provides [Ruler](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#ruler) and [Alertmanager](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#alertmanager) components,
+  which are currently missing in VictoriaMetrics. However, these components can be substituted by [Promxy](https://github.com/jacksontj/promxy#how-do-i-use-alertingrecording-rules-in-promxy).
+- Cortex heavily relies on third-party services such as Consul, Memcache, DynamoDB, BigTable, Cassandra, etc.
+  This may increase operational complexity and reduce system reliability comparing to VictoriaMetrics' case,
+  which doesn't use any external services. Compare [Cortex Architecture](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md)
+  to [VictoriaMetrics architecture](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#architecture-overview).
+- VictoriaMetrics provides [production-ready single-node solution](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md),
+  which is much easier to setup and operate than Cortex cluster.
+- Cortex may lose up to 12 hours of recent data on Ingestor failure - see [the corresponding docs](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#ingesters-failure-and-data-loss).
+  VictoriaMetrics may lose only a few seconds of recent data, which isn't synced to persistent storage yet.
+  See [this article for details](https://medium.com/@valyala/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704).
+- Cortex is usually slower and requires more CPU and RAM than VictoriaMetrics. See [this talk from Adidas at PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
+
+
+### What is the difference between VictoriaMetrics and [Thanos](https://github.com/thanos-io/thanos)?
+
+- Thanos re-uses Prometheus source code, while VictoriaMetrics is written from scratch.
+- Thanos provides [Ruler component](https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md),
+  while VictoriaMetrics relies on [Promxy for alerting and recording rules](https://github.com/jacksontj/promxy#how-do-i-use-alertingrecording-rules-in-promxy).
+- VictoriaMetrics accepts data via [standard remote_write API for Prometheus](https://prometheus.io/docs/practices/remote_write/),
+  while Thanos uses non-standard [Sidecar](https://github.com/thanos-io/thanos/blob/master/docs/components/sidecar.md), which must run alongside each Prometheus instance.
+- Thanos Sidecar requires disabling data compaction in Prometheus, which may hurt Prometheus performance and increase RAM usage.
+- Thanos stores data on object storage (Amazon S3 or Google GCS), while VictoriaMetrics stores data on block storage (GCP persistent disks, Amazon EBS or bare metal HDD).
+- Thanos may lose up to 2 hours of recent data, which wasn't uploaded yet to object storage. VictoriaMetrics may lose only a few seconds of recent data,
+  which isn't synced to persistent storage yet. See [this article for details](https://medium.com/@valyala/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704).
+- Thanos may be harder to setup and operate comparing to VictoriaMetrics, since it has more moving parts, which can be connected with less reliable networks.
+  See [this article for details](https://medium.com/faun/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
+- Thanos is usually slower and requires more CPU and RAM than VictoriaMetrics. See [this talk from Adidas at PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).


 ### How does VictoriaMetrics compare to [InfluxDB](https://www.influxdata.com/time-series-platform/influxdb/)?
@@ -74,12 +118,13 @@ TimescaleDB insists on using SQL as a query language. While SQL is more powerful
 Additionally, VictoriaMetrics requires [up to 70x less storage space comparing to TimescaleDB](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) for storing the same amount of time series data.


-### Does VictoriaMetrics use Prometheus technologies like other clustered TSDBs built on top of Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/improbable-eng/thanos), [Cortex](https://github.com/cortexproject/cortex)?
+### Does VictoriaMetrics use Prometheus technologies like other clustered TSDBs built on top of Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/thanos-io/thanos), [Cortex](https://github.com/cortexproject/cortex)?

 No. VictoriaMetrics core is written in Go from scratch by [fasthttp](https://github.com/valyala/fasthttp) [author](https://github.com/valyala).
 The architecture is [optimized for storing and querying large amounts of time series data with high cardinality](https://medium.com/devopslinks/victoriametrics-creating-the-best-remote-storage-for-prometheus-5d92d66787ac). VictoriaMetrics storage uses [certain ideas from ClickHouse](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). Special thanks to [Alexey Milovidov](https://github.com/alexey-milovidov).


+
 ### Are there performance comparisons with other solutions?

 Yes:
@@ -113,7 +158,7 @@ This is slow and expensive.
 Prometheus remote read API isn't intended for querying foreign data aka `global query view`. See [this issue](https://github.com/prometheus/prometheus/issues/4456) for details.

 So just query VictoriaMetrics directly via [Prometheus Querying API](https://prometheus.io/docs/prometheus/latest/querying/api/)
-or via [Prometheus datasoruce in Grafana](http://docs.grafana.org/features/datasources/prometheus/).
+or via [Prometheus datasource in Grafana](http://docs.grafana.org/features/datasources/prometheus/).


 ### Does VictoriaMetrics deduplicate data from Prometheus instances scraping the same targets (aka `HA pairs`)?
@@ -131,8 +176,8 @@ The deduplication for Prometheus HA pair may be easily implemented on top of Vic
 ### Where is the source code of VictoriaMetrics?

 Source code for the following versions is available in the following places:
-* [Single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics).
-* [Cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
+* [Single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics)
+* [Cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster)


 ### Does VictoriaMetrics fit for data from IoT sensors and industrial sensors?
@@ -145,7 +190,11 @@ and scales horizontally to multiple nodes.

 ### Where can I ask questions about VictoriaMetrics?

-See [VictoriaMetrics-users group](https://groups.google.com/forum/#!forum/victorametrics-users).
+Questions about VictoriaMetrics can be asked via the following channels:
+
+- [Slack channel](http://slack.victoriametrics.com/)
+- [Telegram channel](https://t.me/VictoriaMetrics_en)
+- [Google group](https://groups.google.com/forum/#!forum/victorametrics-users)


 ### Where can I file bugs and feature requests regarding VictoriaMetrics?
--- a/docs/Home.md
+++ b/docs/Home.md
@@ -1,11 +1,13 @@
-# VictoriaMetrics docs
+# Docs

 * [Quick start](Quick-Start)
 * [`WITH` templates playground](https://play.victoriametrics.com/promql/expand-with-exprs)
 * [Grafana playground](http://play-grafana.victoriametrics.com:3000/d/4ome8yJmz/node-exporter-on-victoriametrics-demo)
-* [Extended PromQL](ExtendedPromQL)
+* [MetricsQL](ExtendedPromQL)
 * [Single-node version](Single-server-VictoriaMetrics)
 * [FAQ](FAQ)
 * [Cluster version](Cluster-VictoriaMetrics)
 * [Articles](Articles)
-
+* [Case Studies](CaseStudies)
+* [vmbackup](vmbackup)
+* [vmrestore](vmrestore)
--- a/docs/Release-Guide.md
+++ b/docs/Release-Guide.md
@@ -1,4 +1,6 @@
-# Release version and Docker images
+Release process guidance
+
+## Release version and Docker images

 1. Create release tag with `git tag v1.xx.y`.
 2. Run `make release` for creating `*.tar.gz` release archive with the corresponding `_checksums.txt` inside `bin` directory.
@@ -36,4 +38,7 @@ In that case, don't need to bump the helm chart version
 All changes from `docs` folder and `.md` extension automatically push to Wiki

 **_Note_**: no vice versa, direct changes on Wiki will be overitten after any changes in `docs/*.md` 
-     
+
+## Github pages
+
+All changes in `README.md`, `docs` folder and `.md` extension automatically push to Wiki
--- a/docs/Single-server-VictoriaMetrics.md
+++ b/docs/Single-server-VictoriaMetrics.md
@@ -1,17 +1,26 @@
-# Single-node VictoriaMetrics
+## VictoriaMetrics

 VictoriaMetrics is fast, cost-effective and scalable time-series database. It can be used as long-term remote storage for Prometheus.
 It is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
 [docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and
-in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
+in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics). Just download VictoriaMetrics and see [how to start it](#how-to-start-victoriametrics).

 Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).


+## Case studies and talks
+
+* [Adidas](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#adidas)
+* [COLOPL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#colopl)
+* [Wix.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wixcom)
+* [Wedos.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wedoscom)
+* [Dreamteam](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#dreamteam)
+
+
 ## Prominent features

 * Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
-  Additionally, VictoriaMetrics extends PromQL with opt-in [useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL).
+  VictoriaMetrics implements [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL) query language, which is inspired by PromQL.
 * Supports global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query.
 * High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
  and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
@@ -21,9 +30,11 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
 * High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
  may be crammed into limited storage comparing to TimescaleDB.
 * Optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
-* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, Uber M3, Cortex, InfluxDB or TimescaleDB.
-  See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae)
-  and [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
+* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB.
+  See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae),
+  [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
+  and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk
+  from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
 * Easy operation:
  * VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d) without external dependencies.
  * All the configuration is done via explicit command-line flags with reasonable defaults.
@@ -34,11 +45,12 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
 * Storage is protected from corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
 * Supports metrics' ingestion and [backfilling](#backfilling) via the following protocols:
  * [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
-  * [InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
-  * [Graphite plaintext protocol](https://graphite.readthedocs.io/en/latest/feeding-carbon.html) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
+  * [InfluxDB line protocol](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
+  * [Graphite plaintext protocol](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
    if `-graphiteListenAddr` is set.
-  * [OpenTSDB put message](http://opentsdb.net/docs/build/html/api_telnet/put.html) if `-opentsdbListenAddr` is set.
-  * [HTTP OpenTSDB /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html) if `-opentsdbHTTPListenAddr` is set.
+  * [OpenTSDB put message](#sending-data-via-telnet-put-protocol) if `-opentsdbListenAddr` is set.
+  * [HTTP OpenTSDB /api/put requests](#sending-opentsdb-data-via-http-apiput-requests) if `-opentsdbHTTPListenAddr` is set.
+  * [/api/v1/import](#how-to-import-time-series-data)
 * Ideally works with big amounts of time series data from Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data and various Enterprise workloads.
 * Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).

@@ -57,6 +69,7 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
  - [How to send data from Graphite-compatible agents such as StatsD?](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
  - [Querying Graphite data](#querying-graphite-data)
  - [How to send data from OpenTSDB-compatible agents?](#how-to-send-data-from-opentsdb-compatible-agents)
+  - [Prometheus querying API usage](#prometheus-querying-api-usage)
  - [How to build from sources](#how-to-build-from-sources)
    - [Development build](#development-build)
    - [Production build](#production-build)
@@ -65,13 +78,14 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
    - [Building docker images](#building-docker-images)
  - [Start with docker-compose](#start-with-docker-compose)
  - [Setting up service](#setting-up-service)
-  - [Third-party contributions](#third-party-contributions)
  - [How to work with snapshots?](#how-to-work-with-snapshots)
  - [How to delete time series?](#how-to-delete-time-series)
  - [How to export time series?](#how-to-export-time-series)
+  - [How to import time series data?](#how-to-import-time-series-data)
  - [Federation](#federation)
  - [Capacity planning](#capacity-planning)
  - [High availability](#high-availability)
+  - [Retention](#retention)
  - [Multiple retentions](#multiple-retentions)
  - [Downsampling](#downsampling)
  - [Multi-tenancy](#multi-tenancy)
@@ -87,6 +101,7 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
 - [Roadmap](#roadmap)
 - [Contacts](#contacts)
 - [Community and contributions](#community-and-contributions)
+- [Third-party contributions](#third-party-contributions)
 - [Reporting bugs](#reporting-bugs)
 - [Victoria Metrics Logo](#victoria-metrics-logo)
  - [Logo Usage Guidelines](#logo-usage-guidelines)
@@ -116,14 +131,13 @@ It is recommended setting up [monitoring](#monitoring) for VictoriaMetrics.

 ### Prometheus setup

-Add the following lines to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`):
+Prometheus must be configured with [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) 
+in order to send data to VictoriaMetrics. Add the following lines 
+to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`):

 ```yml
 remote_write:
  - url: http://<victoriametrics-addr>:8428/api/v1/write
-    queue_config:
-      max_samples_per_send: 10000
-      max_shards: 30
 ```

 Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
@@ -150,6 +164,22 @@ This instructs Prometheus to add `datacenter=dc-123` label to each time series s
 The label name may be arbitrary - `datacenter` is just an example. The label value must be unique
 across Prometheus instances, so those time series may be filtered and grouped by this label.

+For highly loaded Prometheus instances (400k+ samples per second)
+the following tuning may be applied:
+```
+remote_write:
+  - url: http://<victoriametrics-addr>:8428/api/v1/write
+    queue_config:
+      max_samples_per_send: 10000
+      capacity: 20000
+      max_shards: 30
+```
+
+Using remote write increases memory usage for Prometheus up to ~25%
+and depends on the shape of data. If you are experiencing issues with
+too high memory consumption try to lower `max_samples_per_send` 
+and `capacity` params (keep in mind that these two params are tightly connected).
+Read more about tuning remote write for Prometheus [here](https://prometheus.io/docs/practices/remote_write).

 It is recommended upgrading Prometheus to [v2.12.0](https://github.com/prometheus/prometheus/releases) or newer,
 since the previous versions may have issues with `remote_write`.
@@ -293,7 +323,7 @@ The `/api/v1/export` endpoint should return the following response:
 ### Querying Graphite data

 Data sent to VictoriaMetrics via `Graphite plaintext protocol` may be read either via
-[Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/)
+[Prometheus querying API](#prometheus-querying-api-usage)
 or via [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml).


@@ -373,6 +403,31 @@ The `/api/v1/export` endpoint should return the following response:
 ```


+### Prometheus querying API usage
+
+VictoriaMetrics supports the following handlers from [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/):
+
+* [/api/v1/query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries)
+* [/api/v1/query_range](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries)
+* [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)
+* [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names)
+* [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values)
+
+These handlers can be queried from Prometheus-compatible clients such as Grafana or curl.
+
+VictoriaMetrics accepts additional args for `/api/v1/labels` and `/api/v1/label/.../values` handlers.
+See [this feature request](https://github.com/prometheus/prometheus/issues/6178) for details:
+
+* Any number [time series selectors](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors) via `match[]` query arg.
+* Optional `start` and `end` query args for limiting the time range for the selected labels or label values.
+
+Additionally VictoriaMetrics provides the following handlers:
+
+* `/api/v1/series/count` - it returns the total number of time series in the database. Note that this handler scans all the inverted index,
+  so it can be slow if the database contains tens of millions of time series.
+* `/api/v1/labels/count` - it returns a list of `label: values_count` entries. It can be used for determining labels with the maximum number of values.
+
+
 ### How to build from sources

 We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
@@ -438,11 +493,6 @@ More details may be found [here](https://github.com/VictoriaMetrics/VictoriaMetr
 Read [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/43) on how to set up VictoriaMetrics as a service in your OS.


-### Third-party contributions
-
-* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm))
-
-
 ### How to work with snapshots?

 VictoriaMetrics can create [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
@@ -482,12 +532,26 @@ the deleted time series isn't freed instantly - it is freed during subsequent me
 It is recommended verifying which metrics will be deleted with the call to `http://<victoria-metrics-addr>:8428/api/v1/series?match[]=<timeseries_selector_for_delete>`
 before actually deleting the metrics.

+The delete API is intended mainly for the following cases:
+
+- One-off deleting of accidentally written invalid (or undesired) time series.
+- One-off deleting of user data due to [GDPR](https://en.wikipedia.org/wiki/General_Data_Protection_Regulation).
+
+It isn't recommended using delete API for the following cases, since it brings non-zero overhead:
+
+- Regular cleanups for unneded data. Just prevent writing unneeded data into VictoriaMetrics.
+- Reducing disk space usage by deleting unneded time series. This doesn't work as expected, since the deleted
+  time series occupy disk space until the next merge operation, which can never occur.
+
+It is better using `-retentionPeriod` command-line flag for efficient pruning of old data.
+

 ### How to export time series?

 Send a request to `http://<victoriametrics-addr>:8428/api/v1/export?match[]=<timeseries_selector_for_export>`,
 where `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
-for metrics to export. The response would contain all the data for the selected time series in [JSON streaming format](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON).
+for metrics to export. Use `{__name__!=""}` selector for fetching all the time series.
+The response would contain all the data for the selected time series in [JSON streaming format](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON).
 Each JSON line would contain data for a single time series. An example output:

 ```
@@ -498,6 +562,52 @@ Each JSON line would contain data for a single time series. An example output:
 Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
 unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.

+Pass `Accept-Encoding: gzip` HTTP header in the request to `/api/v1/export` in order to reduce network bandwidth during exporing big amounts
+of time series data. This enables gzip compression for the exported data. Example for exporting gzipped data:
+
+```
+curl -H 'Accept-Encoding: gzip' http://localhost:8428/api/v1/export -d 'match[]={__name__!=""}' > data.jsonl.gz
+```
+
+The maximum duration for each request to `/api/v1/export` is limited by `-search.maxExportDuration` command-line flag.
+
+Exported data can be imported via POST'ing it to [/api/v1/import](#how-to-import-time-series-data).
+
+
+### How to import time series data?
+
+Time series data can be imported via any supported ingestion protocol:
+
+* [Prometheus remote_write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
+* [Influx line protocol](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
+* [Graphite plaintext protocol](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
+* [OpenTSDB telnet put protocol](#sending-data-via-telnet-put-protocol)
+* [OpenTSDB http /api/put](#sending-opentsdb-data-via-http-apiput-requests)
+* `/api/v1/import` http POST handler, which accepts data from [/api/v1/export](#how-to-export-time-series).
+
+The most efficient protocol for importing data into VictoriaMetrics is `/api/v1/import`. Example for importing data obtained via `/api/v1/export`:
+
+```
+# Export the data from <source-victoriametrics>:
+curl http://source-victoriametrics:8428/api/v1/export -d 'match={__name__!=""}' > exported_data.jsonl
+
+# Import the data to <destination-victoriametrics>:
+curl -X POST http://destination-victoriametrics:8428/api/v1/import -T exported_data.jsonl
+```
+
+Pass `Content-Encoding: gzip` HTTP request header to `/api/v1/import` for importing gzipped data:
+
+```
+# Export gzipped data from <source-victoriametrics>:
+curl -H 'Accept-Encoding: gzip' http://source-victoriametrics:8428/api/v1/export -d 'match={__name__!=""}' > exported_data.jsonl.gz
+
+# Import gzipped data to <destination-victoriametrics>:
+curl -X POST -H 'Content-Encoding: gzip' http://destination-victoriametrics:8428/api/v1/import -T exported_data.jsonl.gz
+```
+
+Each request to `/api/v1/import` can load up to a single vCPU core on VictoriaMetrics. Import speed can be improved by splitting the original file into smaller parts
+and importing them concurrently. Note that the original file must be split on newlines.
+

 ### Federation

@@ -518,7 +628,7 @@ A rough estimation of the required resources for ingestion path:
 * RAM size: less than 1KB per active time series. So, ~1GB of RAM is required for 1M active time series.
  Time series is considered active if new data points have been added to it recently or if it has been recently queried.
  The number of active time series may be obtained from `vm_cache_entries{type="storage/hour_metric_ids"}` metric
-  exproted on the `/metrics` page.
+  exported on the `/metrics` page.
  VictoriaMetrics stores various caches in RAM. Memory size for these caches may be limited by `-memory.allowedPercent` flag.

 * CPU cores: a CPU core per 300K inserted data points per second. So, ~4 CPU cores are required for processing
@@ -582,6 +692,16 @@ If you have Prometheus HA pairs with replicas `r1` and `r2` in each pair, then c
 to write data to `victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.


+### Retention
+
+Retention is configured with `-retentionPeriod` command-line flag. For instance, `-retentionPeriod=3` means
+that the data will be stored for 3 months and then deleted.
+Data is split in per-month subdirectories inside `<-storageDataPath>/data/small` and `<-storageDataPath>/data/big` folders.
+Directories for months outside the configured retention are deleted on the first day of new month.
+In order to keep data according to `-retentionPeriod` max disk space usage is going to be `-retentionPeriod` + 1 month.
+For example if `-retentionPeriod` is set to 1, data for January is deleted on March 1st.
+
+
 ### Multiple retentions

 Just start multiple VictoriaMetrics instances with distinct values for the following flags:
@@ -599,7 +719,7 @@ There is no downsampling support at the moment, but:
 - VictoriaMetrics has good compression for on-disk data. See [this article](https://medium.com/@valyala/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932)
  for details.

-These properties reduce the need in downsampling. We plan to implement downsampling in the future.
+These properties reduce the need of downsampling. We plan to implement downsampling in the future.
 See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36) for details.


@@ -621,8 +741,10 @@ horizontally scalable long-term remote storage for really large Prometheus deplo

 ### Alerting

-VictoriaMetrics doesn't support rule evaluation and alerting yet, so these actions must be performed either
-on [Prometheus side](https://prometheus.io/docs/alerting/overview/) or on [Grafana side](https://grafana.com/docs/alerting/rules/).
+VictoriaMetrics doesn't support rule evaluation and alerting yet, so these actions can be performed at the following places:
+* At Prometheus - see [the corresponding docs](https://prometheus.io/docs/alerting/overview/).
+* At Promxy - see [the corresponding docs](https://github.com/jacksontj/promxy/blob/master/README.md#how-do-i-use-alertingrecording-rules-in-promxy).
+* At Grafana - see [the corresponding docs](https://grafana.com/docs/alerting/rules/).


 ### Security
@@ -642,14 +764,14 @@ For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<i

 ### Tuning

-* There is no need in VictoriaMetrics tuning since it uses reasonable defaults for command-line flags,
+* There is no need for VictoriaMetrics tuning since it uses reasonable defaults for command-line flags,
  which are automatically adjusted for the available CPU and RAM resources.
-* There is no need in Operating System tuning since VictoriaMetrics is optimized for default OS settings.
+* There is no need for Operating System tuning since VictoriaMetrics is optimized for default OS settings.
  The only option is increasing the limit on [the number of open files in the OS](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a),
  so Prometheus instances could establish more connections to VictoriaMetrics.
 * The recommended filesystem is `ext4`, the recommended persistent storage is [persistent HDD-based disk on GCP](https://cloud.google.com/compute/docs/disks/#pdspecs),
  since it is protected from hardware failures via internal replication and it can be [resized on the fly](https://cloud.google.com/compute/docs/disks/add-persistent-disk#resize_pd).
-  If you plan storing more than 1TB of data on `ext4` partition or plan extending it to more than 16TB,
+  If you plan to store more than 1TB of data on `ext4` partition or plan extending it to more than 16TB,
  then the following options are recommended to pass to `mkfs.ext4`:

 ```
@@ -659,9 +781,12 @@ mkfs.ext4 ... -O 64bit,huge_file,extent -T huge

 ### Monitoring

-VictoriaMetrics exports internal metrics in Prometheus format on the `/metrics` page.
-Add this page to Prometheus' scrape config in order to collect VictoriaMetrics metrics.
-There is [an official Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229).
+VictoriaMetrics exports internal metrics in Prometheus format at `/metrics` page.
+These metrics may be collected either via Prometheus by adding the corresponding scrape config to it.
+Alternatively they can be self-scraped by setting `-selfScrapeInterval` command-line flag to duration greater than 0.
+For example, `-scrapeInterval=10s` would enable self-scraping of `/metrics` page with 10 seconds interval.
+
+There are officials Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229) and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176).

 The most interesting metrics are:

@@ -680,7 +805,7 @@ The most interesting metrics are:
 ### Troubleshooting

 * It is recommended to use default command-line flag values (i.e. don't set them explicitly) until the need
-  in tweaking these flag values arises.
+  of tweaking these flag values arises.

 * If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
  then it is likely you have too many active time series for the current amount of RAM.
@@ -694,13 +819,14 @@ The most interesting metrics are:
  has at least 20% of free space comparing to disk size.

 * If VictoriaMetrics doesn't work because of certain parts are corrupted due to disk errors,
-  then just remove directoreis with broken parts. This will recover VictoriaMetrics at the cost
+  then just remove directories with broken parts. This will recover VictoriaMetrics at the cost
  of data loss stored in the broken parts. In the future, `vmrecover` tool will be created
  for automatic recovering from such errors.


 ### Backfilling

+VictoriaMetrics accepts historical data in arbitrary order of time.
 Make sure that configured `-retentionPeriod` covers timestamps for the backfilled data.

 It is recommended disabling query cache with `-search.disableCache` command-line flag when writing
@@ -745,7 +871,7 @@ The collected profiles may be analyzed with [go tool pprof](https://github.com/g
 - [ ] CLI tool for data migration, re-balancing and adding/removing nodes [#103](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/103)


-The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment any item or add own one.
+The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment on any item or add you own one.


 ## Contacts
@@ -758,6 +884,7 @@ Contact us with any questions regarding VictoriaMetrics at [info@victoriametrics
 Feel free asking any questions regarding VictoriaMetrics:

 - [slack](http://slack.victoriametrics.com/)
+- [reddit](https://www.reddit.com/r/VictoriaMetrics/)
 - [telegram-en](https://t.me/VictoriaMetrics_en)
 - [telegram-ru](https://t.me/VictoriaMetrics_ru1)
 - [google groups](https://groups.google.com/forum/#!forum/victorametrics-users)
@@ -781,6 +908,13 @@ We are open to third-party pull requests provided they follow [KISS design princ
 Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people.


+### Third-party contributions
+
+* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm))
+* [Prometheus -> VictoriaMetrics exporter #1](https://github.com/ryotarai/prometheus-tsdb-dump)
+* [Prometheus -> VictoriaMetrics exporter #2](https://github.com/AnchorFree/tsdb-remote-write)
+
+
 ## Reporting bugs

 Report bugs and propose new features [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
@@ -788,7 +922,7 @@ Report bugs and propose new features [here](https://github.com/VictoriaMetrics/V

 ## Victoria Metrics Logo

-[Zip](VM_logo.zip) contains three folders with different image orientation (main color and inverted version).
+[Zip](VM_logo.zip) contains three folders with different image orientations (main color and inverted version).

 Files included in each folder:

--- a/docs/vmbackup.md
+++ b/docs/vmbackup.md
@@ -0,0 +1,181 @@
+## vmbackup
+
+`vmbackup` creates VictoriaMetrics data backups from [instant snapshots](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots).
+
+Supported storage systems for backups:
+
+* [GCS](https://cloud.google.com/storage/). Example: `gcs://<bucket>/<path/to/backup>`
+* [S3](https://aws.amazon.com/s3/). Example: `s3://<bucket>/<path/to/backup>`
+* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/docs/mimic/radosgw/s3/) or [Swift](https://www.swiftstack.com/docs/admin/middleware/s3_middleware.html). See `-customS3Endpoint` command-line flag.
+* Local filesystem. Example: `fs://</absolute/path/to/backup>`
+
+Incremental backups and full backups are supported. Incremental backups are created automatically if the destination path already contains data from the previous backup.
+Full backups can be sped up with `-origin` pointing to already existing backup on the same remote storage. In this case `vmbackup` makes server-side copy for the shared
+data between the existing backup and new backup. This saves time and costs on data transfer.
+
+Backup process can be interrupted at any time. It is automatically resumed from the interruption point when restarting `vmbackup` with the same args.
+
+Backed up data can be restored with [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md).
+
+See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
+
+
+### Use cases
+
+#### Regular backups
+
+Regular backup can be performed with the following command:
+
+```
+vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gcs://<bucket>/<path/to/new/backup>
+```
+
+* `</path/to/victoria-metrics-data>` - path to VictoriaMetrics data pointed by `-storageDataPath` command-line flag in single-node VictoriaMetrics or in cluster `vmstorage`.
+  There is no need to stop VictoriaMetrics for creating backups, since they are performed from immutable [instant snapshots](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots).
+* `<local-snapshot>` is the snapshot to backup. See [how to create instant snapshots](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots).
+* `<bucket>` is already existing name for [GCS bucket](https://cloud.google.com/storage/docs/creating-buckets).
+* `<path/to/new/backup>` is the destination path where new backup will be placed.
+
+
+#### Regular backups with server-side copy from existing backup
+
+If the destination GCS bucket already contains the previous backup at `-origin` path, then new backup can be sped up
+with the following command:
+
+```
+vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gcs://<bucket>/<path/to/new/backup> -origin=gcs://<bucket>/<path/to/existing/backup>
+```
+
+This saves time and network bandwidth costs by performing server-side copy for the shared data from the `-origin` to `-dst`.
+
+
+#### Incremental backups
+
+Incremental backups are performed if `-dst` points to already existing backup. In this case only new data is uploaded to remote storage.
+This saves time and network bandwidth costs when working with big backups:
+
+```
+vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gcs://<bucket>/<path/to/existing/backup>
+```
+
+
+#### Smart backups
+
+Smart backups mean storing full daily backups into `YYYYMMDD` folders and creating incremental hourly backup into `latest` folder:
+
+* Run the following command every hour:
+
+```
+vmbackup -snapshotName=<latest-snapshot> -dst=gcs://<bucket>/latest
+```
+
+Where `<latest-snapshot>` is the latest [snapshot](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots).
+The command will upload only changed data to `gcs://<bucket>/latest`.
+
+* Run the following command once a day:
+
+```
+vmbackup -snapshotName=<daily-snapshot> -dst=gcs://<bucket>/<YYYYMMDD> -origin=gcs://<bucket>/latest
+```
+
+Where `<daily-snapshot>` is the snapshot for the last day `<YYYYMMDD>`.
+
+
+This apporach saves network bandwidth costs on hourly backups (since they are incremental) and allows recovering data from either the last hour (`latest` backup)
+or from any day (`YYYYMMDD` backups). Note that hourly backup shouldn't run when creating daily backup.
+
+Do not forget removing old snapshots and backups when they are no longer needed for saving storage costs.
+
+
+### How does it work?
+
+The backup algorithm is the following:
+
+1. Collect information about files in the `-snapshotName`, in the `-dst` and in the `-origin`.
+2. Determine files in `-dst`, which are missing in `-snapshotName`, and delete them. These are usually small files, which are already merged into bigger files in the snapshot.
+3. Determine files from `-snapshotName`, which are missing in `-dst`. These are usually small new files and bigger merged files.
+4. Determine files from step 3, which exist in the `-origin`, and perform server-side copy of these files from `-origin` to `-dst`.
+   This are usually the biggest and the oldest files, which are shared between backups.
+5. Upload the remaining files from setp 3 from `-snapshotName` to `-dst`.
+
+The algorithm splits source files into 100MB chunks in the backup. Each chunk is stored as a separate file in the backup.
+Such splitting minimizes the amounts of data to re-transfer after temporary errors.
+
+`vmbackup` relies on [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) properties:
+
+- All the files in the snapshot are immutable.
+- Old files are periodically merged into new files.
+- Smaller files have higher probability to be merged.
+- Consecutive snapshots share many identical files.
+
+These properties allow performing fast and cheap incremental backups and server-side copying from `-origin` paths.
+See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
+`vmbackup` can work improperly or slowly when these properties are violated.
+
+
+### Troubleshooting
+
+* If the backup is slow, then try setting higher value for `-concurrency` flag. This will increase the number of concurrent workers that upload data to backup storage.
+* If `vmbackup` eats all the network bandwidth, then set `-maxBytesPerSecond` to the desired value.
+* If `vmbackup` has been interrupted due to temporary error, then just restart it with the same args. It will resume the backup process.
+
+
+### Advanced usage
+
+Run `vmbackup -help` in order to see all the available options:
+
+```
+  -concurrency int
+    	The number of concurrent workers. Higher concurrency may reduce backup duration (default 10)
+  -configFilePath string
+    	Path to file with S3 configs. Configs are loaded from default location if not set.
+    	See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
+  -configProfile string
+    	Profile name for S3 configs (default "default")
+  -credsFilePath string
+    	Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
+    	See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
+  -customS3Endpoint string
+    	Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
+  -dst string
+    	Where to put the backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
+    	-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded
+  -loggerLevel string
+    	Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC (default "INFO")
+  -maxBytesPerSecond int
+    	The maximum upload speed. There is no limit if it is set to 0
+  -memory.allowedPercent float
+    	Allowed percent of system memory VictoriaMetrics caches may occupy (default 60)
+  -origin string
+    	Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups
+  -snapshotName string
+    	Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots
+  -storageDataPath string
+    	Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage (default "victoria-metrics-data")
+  -version
+    	Show VictoriaMetrics version
+```
+
+
+### How to build from sources
+
+It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - see `vmutils-*` archives there.
+
+
+#### Development build
+
+1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
+2. Run `make vmbackup` from the root folder of the repository.
+   It builds `vmbackup` binary and puts it into the `bin` folder.
+
+#### Production build
+
+1. [Install docker](https://docs.docker.com/install/).
+2. Run `make vmbackup-prod` from the root folder of the repository.
+   It builds `vmbackup-prod` binary and puts it into the `bin` folder.
+
+#### Building docker images
+
+Run `make package-vmbackup`. It builds `victoriametrics/vmbackup:<PKG_TAG>` docker image locally.
+`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
+The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmbackup`.
--- a/docs/vmrestore.md
+++ b/docs/vmrestore.md
@@ -0,0 +1,86 @@
+## vmrestore
+
+`vmrestore` restores data from backups created by [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md).
+VictoriaMetrics `v1.29.0` and newer versions must be used for working with the restored data.
+
+Restore process can be interrupted at any time. It is automatically resumed from the inerruption point
+when restarting `vmrestore` with the same args.
+
+
+### Usage
+
+VictoriaMetrics must be stopped during the restore process.
+
+```
+vmrestore -src=gcs://<bucket>/<path/to/backup> -storageDataPath=<local/path/to/restore>
+
+```
+
+* `<bucket>` is [GCS bucket](https://cloud.google.com/storage/docs/creating-buckets) name.
+* `<path/to/backup>` is the path to backup made with [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md) on GCS bucket.
+* `<local/path/to/restore>` is the path to folder where data will be restored. This folder must be passed
+  to VictoriaMetrics in `-storageDataPath` command-line flag after the restore process is complete.
+
+The original `-storageDataPath` directory may contain old files. They will be susbstituted by the files from backup.
+
+
+### Troubleshooting
+
+* If `vmrestore` eats all the network bandwidth, then set `-maxBytesPerSecond` to the desired value.
+* If `vmrestore` has been interrupted due to temporary error, then just restart it with the same args. It will resume the restore process.
+
+
+### Advanced usage
+
+Run `vmrestore -help` in order to see all the available options:
+
+```
+  -concurrency int
+    	The number of concurrent workers. Higher concurrency may reduce restore duration (default 10)
+  -configFilePath string
+    	Path to file with S3 configs. Configs are loaded from default location if not set.
+    	See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
+  -configProfile string
+    	Profile name for S3 configs (default "default")
+  -credsFilePath string
+    	Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
+    	See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
+  -customS3Endpoint string
+    	Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
+  -loggerLevel string
+    	Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC (default "INFO")
+  -maxBytesPerSecond int
+    	The maximum download speed. There is no limit if it is set to 0
+  -memory.allowedPercent float
+    	Allowed percent of system memory VictoriaMetrics caches may occupy (default 60)
+  -src string
+    	Source path with backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
+  -storageDataPath string
+    	Destination path where backup must be restored. VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case only missing data is downloaded from backup (default "victoria-metrics-data")
+  -version
+    	Show VictoriaMetrics version
+```
+
+
+### How to build from sources
+
+It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - see `vmutils-*` archives there.
+
+
+#### Development build
+
+1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
+2. Run `make vmrestore` from the root folder of the repository.
+   It builds `vmrestore` binary and puts it into the `bin` folder.
+
+#### Production build
+
+1. [Install docker](https://docs.docker.com/install/).
+2. Run `make vmrestore-prod` from the root folder of the repository.
+   It builds `vmrestore-prod` binary and puts it into the `bin` folder.
+
+#### Building docker images
+
+Run `make package-vmrestore`. It builds `victoriametrics/vmrestore:<PKG_TAG>` docker image locally.
+`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
+The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmrestore`.
--- a/go.mod
+++ b/go.mod
@@ -1,32 +1,27 @@
 module github.com/VictoriaMetrics/VictoriaMetrics

 require (
-	cloud.google.com/go v0.49.0 // indirect
-	cloud.google.com/go/storage v1.4.0
-	github.com/VictoriaMetrics/fastcache v1.5.4
-	github.com/VictoriaMetrics/metrics v1.9.2
-	github.com/aws/aws-sdk-go v1.25.43
+	cloud.google.com/go v0.51.0 // indirect
+	cloud.google.com/go/storage v1.5.0
+	github.com/VictoriaMetrics/fastcache v1.5.7
+	github.com/VictoriaMetrics/metrics v1.9.3
+	github.com/aws/aws-sdk-go v1.28.7
 	github.com/cespare/xxhash/v2 v2.1.1
-	github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9 // indirect
+	github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
 	github.com/golang/snappy v0.0.1
-	github.com/jstemmer/go-junit-report v0.9.1 // indirect
-	github.com/klauspost/compress v1.9.2
-	github.com/valyala/fastjson v1.4.1
+	github.com/klauspost/compress v1.9.8
+	github.com/valyala/fastjson v1.4.5
 	github.com/valyala/fastrand v1.0.0
-	github.com/valyala/gozstd v1.6.3
+	github.com/valyala/gozstd v1.6.4
 	github.com/valyala/histogram v1.0.1
 	github.com/valyala/quicktemplate v1.4.1
-	go.opencensus.io v0.22.2 // indirect
-	golang.org/x/exp v0.0.0-20191127035308-9964a5a80460 // indirect
-	golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f // indirect
-	golang.org/x/net v0.0.0-20191126235420-ef20fe5d7933 // indirect
-	golang.org/x/oauth2 v0.0.0-20191122200657-5d9234df094c // indirect
-	golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2
-	golang.org/x/tools v0.0.0-20191127064951-724660f1afeb // indirect
-	google.golang.org/api v0.14.0
-	google.golang.org/appengine v1.6.5 // indirect
-	google.golang.org/genproto v0.0.0-20191115221424-83cc0476cb11 // indirect
-	google.golang.org/grpc v1.25.1 // indirect
+	golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a // indirect
+	golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa // indirect
+	golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d // indirect
+	golang.org/x/sys v0.0.0-20200122134326-e047566fdf82
+	golang.org/x/tools v0.0.0-20200122042241-dc16b66866f1 // indirect
+	google.golang.org/api v0.15.0
+	google.golang.org/genproto v0.0.0-20200117163144-32f20d992d24 // indirect
 )

 go 1.12
--- a/go.sum
+++ b/go.sum
@@ -5,45 +5,53 @@ cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6A
 cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
 cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
 cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
-cloud.google.com/go v0.49.0 h1:CH+lkubJzcPYB1Ggupcq0+k8Ni2ILdG2lYjDIgavDBQ=
-cloud.google.com/go v0.49.0/go.mod h1:hGvAdzcWNbyuxS3nWhD7H2cIJxjRRTRLQVB0bdputVY=
-cloud.google.com/go/bigquery v1.0.1 h1:hL+ycaJpVE9M7nLoiXb/Pn10ENE2u+oddxbD8uu0ZVU=
+cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
+cloud.google.com/go v0.51.0 h1:PvKAVQWCtlGUSlZkGW3QLelKaWq7KYv/MW1EboG8bfM=
+cloud.google.com/go v0.51.0/go.mod h1:hWtGJ6gnXH+KgDv+V0zFGDvpi07n3z8ZNj3T1RW0Gcw=
 cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
+cloud.google.com/go/bigquery v1.3.0 h1:sAbMqjY1PEQKZBWfbu6Y6bsupJ9c4QdHnzg/VvYTLcE=
+cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
 cloud.google.com/go/datastore v1.0.0 h1:Kt+gOPPp2LEPWp8CSfxhsM8ik9CcyE/gYu+0r+RnZvM=
 cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
-cloud.google.com/go/pubsub v1.0.1 h1:W9tAK3E57P75u0XLLR82LZyw8VpAnhmyTOxW9qzmyj8=
 cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
+cloud.google.com/go/pubsub v1.1.0 h1:9/vpR43S4aJaROxqQHQ3nH9lfyKKV0dC3vOmnw8ebQQ=
+cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=
 cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
-cloud.google.com/go/storage v1.4.0 h1:KDdqY5VTXBTqpSbctVTt0mVvfanP6JZzNzLE0qNY100=
-cloud.google.com/go/storage v1.4.0/go.mod h1:ZusYJWlOshgSBGbt6K3GnB3MT3H1xs2id9+TCl4fDBA=
+cloud.google.com/go/storage v1.5.0 h1:RPUcBvDeYgQFMfQu1eBMq6piD1SXmLH+vK3qjewZPus=
+cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
 dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
 github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
-github.com/VictoriaMetrics/fastcache v1.5.4 h1:0BaXbRH01RycJk79OOBwMCXlNryko9z4yEf6RqbP+Xo=
-github.com/VictoriaMetrics/fastcache v1.5.4/go.mod h1:ptDBkNMQI4RtmVo8VS/XwRY6RoTu1dAWCbrk+6WsEM8=
-github.com/VictoriaMetrics/metrics v1.9.2 h1:+CNV5OOPe1PuHff3AsOa5cbN8qCWkWwZZA7eOmFjOwQ=
-github.com/VictoriaMetrics/metrics v1.9.2/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ=
+github.com/VictoriaMetrics/fastcache v1.5.7 h1:4y6y0G8PRzszQUYIQHHssv/jgPHAb5qQuuDNdCbyAgw=
+github.com/VictoriaMetrics/fastcache v1.5.7/go.mod h1:ptDBkNMQI4RtmVo8VS/XwRY6RoTu1dAWCbrk+6WsEM8=
+github.com/VictoriaMetrics/metrics v1.9.3 h1:+1kZnOIb8RY825Nb9q9yMrPcOYuPE2GrZWxUh59XnHI=
+github.com/VictoriaMetrics/metrics v1.9.3/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ=
 github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8=
 github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM=
-github.com/aws/aws-sdk-go v1.25.43 h1:R5YqHQFIulYVfgRySz9hvBRTWBjudISa+r0C8XQ1ufg=
-github.com/aws/aws-sdk-go v1.25.43/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
+github.com/aws/aws-sdk-go v1.28.7 h1:8RUfzsEmyXR8a9G7o2snfUKwrSuqks/k4C7TIfXDDrY=
+github.com/aws/aws-sdk-go v1.28.7/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
 github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
 github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
-github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9 h1:uHTyIjqVhYRhLbJ8nIiOJHkEZZ+5YoOsAbD3sk82NiE=
-github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7 h1:5ZkaAPbicIKTF2I64qf5Fh8Aa83Q/dnOafMYV0OMwjA=
+github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
+github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
 github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
 github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
 github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
@@ -56,18 +64,21 @@ github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW
 github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
-github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg=
+github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no=
 github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
 github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
 github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
+github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
 github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
 github.com/googleapis/gax-go/v2 v2.0.5 h1:sjZBwGj9Jlw33ImPtvFviGYvseOtDM7hkSKB7+Tv3SM=
 github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
 github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
 github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
+github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM=
 github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
 github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
@@ -76,8 +87,8 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
 github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
-github.com/klauspost/compress v1.9.2 h1:LfVyl+ZlLlLDeQ/d2AqfGIIH4qEDu0Ed2S5GyhCWIWY=
-github.com/klauspost/compress v1.9.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
+github.com/klauspost/compress v1.9.8 h1:VMAMUUOh+gaxKTMk+zqbjsSjsIcUcL/LF4o63i82QyA=
+github.com/klauspost/compress v1.9.8/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
 github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
@@ -90,19 +101,18 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasthttp v1.2.0/go.mod h1:4vX61m6KN+xDduDNwXrhIAVZaZaZiQ1luJk8LWSxF3s=
-github.com/valyala/fastjson v1.4.1 h1:hrltpHpIpkaxll8QltMU8c3QZ5+qIiCL8yKqPFJI/yE=
-github.com/valyala/fastjson v1.4.1/go.mod h1:nV6MsjxL2IMJQUoHDIrjEI7oLyeqK6aBD7EFWPsvP8o=
+github.com/valyala/fastjson v1.4.5 h1:uSuLfXk2LzRtzwd3Fy5zGRBe0Vs7zhs11vjdko32xb4=
+github.com/valyala/fastjson v1.4.5/go.mod h1:nV6MsjxL2IMJQUoHDIrjEI7oLyeqK6aBD7EFWPsvP8o=
 github.com/valyala/fastrand v1.0.0 h1:LUKT9aKer2dVQNUi3waewTbKV+7H17kvWFNKs2ObdkI=
 github.com/valyala/fastrand v1.0.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
-github.com/valyala/gozstd v1.6.3 h1:kr3oF/F1RvxYr8wgPjrH04gvHuMEL99pPC9e+5pPQdU=
-github.com/valyala/gozstd v1.6.3/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ=
+github.com/valyala/gozstd v1.6.4 h1:nFLddjEf90SFl5cVWyElSHozQDsbvLljPK703/skBS0=
+github.com/valyala/gozstd v1.6.4/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ=
 github.com/valyala/histogram v1.0.1 h1:FzA7n2Tz/wKRMejgu3PV1vw3htAklTjjuoI6z3d4KDg=
 github.com/valyala/histogram v1.0.1/go.mod h1:lQy0xA4wUz2+IUnf97SivorsJIp8FxsnRd6x25q7Mto=
 github.com/valyala/quicktemplate v1.4.1 h1:tEtkSN6mTCJlYVT7As5x4wjtkk2hj2thsb0M+AcAVeM=
@@ -115,14 +125,17 @@ go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
 golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek=
-golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136 h1:A1gGSx58LAGVHUUsOf7IiR0u8Xb6W51gRwfDBhkdcaw=
 golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
-golang.org/x/exp v0.0.0-20191127035308-9964a5a80460 h1:zNL062UG4d0GC48Bhm+lEI9lTOMsEHNL0WITb/cw7/s=
-golang.org/x/exp v0.0.0-20191127035308-9964a5a80460/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
+golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
+golang.org/x/exp v0.0.0-20191227195350-da58074b4299 h1:zQpM52jfKHG6II1ISZY1ZcpygvuSFZpLwfluuF89XOg=
+golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
+golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a h1:7Wlg8L54In96HTWOaI4sreLJ6qfyGuvSau5el3fK41Y=
+golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
 golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
 golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@@ -131,7 +144,6 @@ golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTk
 golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
-golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs=
 golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f h1:J5lckAjkw6qYlOZNj90mLYNTEKDvWeuc1yieZ8qUzUE=
 golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
@@ -139,6 +151,7 @@ golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU
 golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
 golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
 golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
+golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180911220305-26e67e76b6c3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -150,20 +163,23 @@ golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn
 golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20191126235420-ef20fe5d7933 h1:e6HwijUxhDe+hPNjZQQn9bA5PW3vNmnN64U2ZW759Lk=
-golang.org/x/net v0.0.0-20191126235420-ef20fe5d7933/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa h1:F+8P+gmewFQYRk6JoLQLwjBCTu3mcIURZfNkVweuRKA=
+golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 h1:SVwTIAaPC2U/AvvLNZ2a7OVsmBpC8L5BlwK1whH3hm0=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.0.0-20191122200657-5d9234df094c h1:HjRaKPaiWks0f5tA6ELVF7ZfqSppfPwOEEAvsrKUTO4=
-golang.org/x/oauth2 v0.0.0-20191122200657-5d9234df094c/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d h1:TzXSXBo42m9gQenoE3b9BGiEpg5IG2JkU5FkPIawgtw=
+golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -172,8 +188,11 @@ golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2 h1:/J2nHFg1MTqaRLFO7M+J78ASNsJoz3r0cvHBPQ77fsE=
-golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200122134326-e047566fdf82 h1:ywK/j/KkyTHcdyYSZNXGjMwgmDSfjglYZ3vStQ/gSCU=
+golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
@@ -195,17 +214,23 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw
 golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191127064951-724660f1afeb h1:K4JMHRJSgd1q/yXZNrKKyneQJcLm1rn7JsokEs/xE9I=
-golang.org/x/tools v0.0.0-20191127064951-724660f1afeb/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200122042241-dc16b66866f1 h1:468gVSKEm8NObiNTQ3it08aAGsPfuvz+WXUHmnq8Wws=
+golang.org/x/tools v0.0.0-20200122042241-dc16b66866f1/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
 google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
 google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
 google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
-google.golang.org/api v0.14.0 h1:uMf5uLi4eQMRrMKhCplNik4U4H8Z6C1br3zOtAa/aDE=
+google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
 google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
+google.golang.org/api v0.15.0 h1:yzlyyDW/J0w8yNFJIhiAJy4kq74S+1DOLdawELNxFMA=
+google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
@@ -220,15 +245,18 @@ google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRn
 google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
 google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
 google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
+google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
 google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
-google.golang.org/genproto v0.0.0-20191115221424-83cc0476cb11 h1:51D++eCgOHufw5VfDE9Uzqyyc+OyQIjb9hkYy9LN5Fk=
-google.golang.org/genproto v0.0.0-20191115221424-83cc0476cb11/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20200117163144-32f20d992d24 h1:wDju+RU97qa0FZT0QnZDg9Uc2dH0Ql513kFvHocz+WM=
+google.golang.org/genproto v0.0.0-20200117163144-32f20d992d24/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
 google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
 google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
 google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
-google.golang.org/grpc v1.25.1 h1:wdKvqQk7IttEw92GoRyKG2IDrUIpgpj6H6m81yfeMW0=
-google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
+google.golang.org/grpc v1.26.0 h1:2dTRdpdFEEhJYQD8EMLB61nnrzSCTbG38PhqdhvOltg=
+google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
--- a/lib/backup/actions/backup.go
+++ b/lib/backup/actions/backup.go
@@ -7,6 +7,7 @@ import (
 	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fscommon"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fsnil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@@ -41,17 +42,33 @@ type Backup struct {

 // Run runs b with the provided settings.
 func (b *Backup) Run() error {
-	startTime := time.Now()
-
 	concurrency := b.Concurrency
 	src := b.Src
 	dst := b.Dst
 	origin := b.Origin

+	if origin != nil && origin.String() == dst.String() {
+		origin = nil
+	}
 	if origin == nil {
 		origin = &fsnil.FS{}
 	}

+	if err := dst.DeleteFile(fscommon.BackupCompleteFilename); err != nil {
+		return fmt.Errorf("cannot delete `backup complete` file at %s: %s", dst, err)
+	}
+	if err := runBackup(src, dst, origin, concurrency); err != nil {
+		return err
+	}
+	if err := dst.CreateFile(fscommon.BackupCompleteFilename, []byte("ok")); err != nil {
+		return fmt.Errorf("cannot create `backup complete` file at %s: %s", dst, err)
+	}
+	return nil
+}
+
+func runBackup(src *fslocal.FS, dst common.RemoteFS, origin common.OriginFS, concurrency int) error {
+	startTime := time.Now()
+
 	logger.Infof("starting backup from %s to %s using origin %s", src, dst, origin)

 	logger.Infof("obtaining list of parts at %s", src)
@@ -149,8 +166,8 @@ func (b *Backup) Run() error {
 		}
 	}

-	logger.Infof("backed up %d bytes in %s; deleted %d bytes; server-side copied %d bytes; uploaded %d bytes",
-		backupSize, time.Since(startTime), deleteSize, copySize, uploadSize)
+	logger.Infof("backed up %d bytes in %.3f seconds; deleted %d bytes; server-side copied %d bytes; uploaded %d bytes",
+		backupSize, time.Since(startTime).Seconds(), deleteSize, copySize, uploadSize)

 	return nil
 }
--- a/lib/backup/actions/restore.go
+++ b/lib/backup/actions/restore.go
@@ -7,6 +7,7 @@ import (
 	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fscommon"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@@ -29,6 +30,11 @@ type Restore struct {
 	// If dst points to existing directory, then incremental restore is performed,
 	// i.e. only new data is downloaded from src.
 	Dst *fslocal.FS
+
+	// SkipBackupCompleteCheck may be set in order to skip for `backup complete` file in Src.
+	//
+	// This may be needed for restoring from old backups with missing `backup complete` file.
+	SkipBackupCompleteCheck bool
 }

 // Run runs r with the provided settings.
@@ -48,6 +54,18 @@ func (r *Restore) Run() error {
 	concurrency := r.Concurrency
 	src := r.Src
 	dst := r.Dst
+
+	if !r.SkipBackupCompleteCheck {
+		ok, err := src.HasFile(fscommon.BackupCompleteFilename)
+		if err != nil {
+			return err
+		}
+		if !ok {
+			return fmt.Errorf("cannot find %s file in %s; this means either incomplete backup or old backup; "+
+				"pass `-skipBackupCompleteCheck` command-line flag if you still need restoring from this backup", fscommon.BackupCompleteFilename, src)
+		}
+	}
+
 	logger.Infof("starting restore from %s to %s", src, dst)

 	logger.Infof("obtaining list of parts at %s", src)
@@ -164,7 +182,8 @@ func (r *Restore) Run() error {
 		}
 	}

-	logger.Infof("restored %d bytes from backup in %s; deleted %d bytes; downloaded %d bytes", backupSize, time.Since(startTime), deleteSize, downloadSize)
+	logger.Infof("restored %d bytes from backup in %.3f seconds; deleted %d bytes; downloaded %d bytes",
+		backupSize, time.Since(startTime).Seconds(), deleteSize, downloadSize)

 	return nil
 }
--- a/lib/backup/common/fs.go
+++ b/lib/backup/common/fs.go
@@ -38,4 +38,13 @@ type RemoteFS interface {

 	// UploadPart must upload part p from r to RemoteFS.
 	UploadPart(p Part, r io.Reader) error
+
+	// DeleteFile deletes filePath at RemoteFS
+	DeleteFile(filePath string) error
+
+	// CreateFile creates filePath at RemoteFS and puts data into it.
+	CreateFile(filePath string, data []byte) error
+
+	// HasFile returns true if filePath exists at RemoteFS.
+	HasFile(filePath string) (bool, error)
 }
--- a/lib/backup/fscommon/fscommon.go
+++ b/lib/backup/fscommon/fscommon.go
@@ -260,3 +260,11 @@ func removeEmptyDirsInternal(d *os.File) (bool, error) {
 	}
 	return true, nil
 }
+
+// IgnorePath returns true if the given path must be ignored.
+func IgnorePath(path string) bool {
+	return strings.HasSuffix(path, ".ignore")
+}
+
+// BackupCompleteFilename is a filename, which is created in the destination fs when backup is complete.
+const BackupCompleteFilename = "backup_complete.ignore"
--- a/lib/backup/fsremote/fsremote.go
+++ b/lib/backup/fsremote/fsremote.go
@@ -3,6 +3,7 @@ package fsremote
 import (
 	"fmt"
 	"io"
+	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strings"
@@ -48,6 +49,9 @@ func (fs *FS) ListParts() ([]common.Part, error) {
 		if !strings.HasPrefix(file, dir) {
 			logger.Panicf("BUG: unexpected prefix for file %q; want %q", file, dir)
 		}
+		if fscommon.IgnorePath(file) {
+			continue
+		}
 		var p common.Part
 		if !p.ParseFromRemotePath(file[len(dir):]) {
 			logger.Infof("skipping unknown file %s", file)
@@ -188,3 +192,42 @@ func (fs *FS) mkdirAll(filePath string) error {
 func (fs *FS) path(p common.Part) string {
 	return p.RemotePath(fs.Dir)
 }
+
+// DeleteFile deletes filePath at fs.
+//
+// The function does nothing if the filePath doesn't exist.
+func (fs *FS) DeleteFile(filePath string) error {
+	path := filepath.Join(fs.Dir, filePath)
+	err := os.Remove(path)
+	if err != nil && !os.IsNotExist(err) {
+		return fmt.Errorf("cannot remove %q: %s", path, err)
+	}
+	return nil
+}
+
+// CreateFile creates filePath at fs and puts data into it.
+//
+// The file is overwritten if it exists.
+func (fs *FS) CreateFile(filePath string, data []byte) error {
+	path := filepath.Join(fs.Dir, filePath)
+	if err := ioutil.WriteFile(path, data, 0600); err != nil {
+		return fmt.Errorf("cannot write %d bytes to %q: %s", len(data), path, err)
+	}
+	return nil
+}
+
+// HasFile returns true if filePath exists at fs.
+func (fs *FS) HasFile(filePath string) (bool, error) {
+	path := filepath.Join(fs.Dir, filePath)
+	fi, err := os.Stat(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return false, nil
+		}
+		return false, fmt.Errorf("cannot stat %q: %s", path, err)
+	}
+	if fi.IsDir() {
+		return false, fmt.Errorf("%q is directory, while file is needed", path)
+	}
+	return true, nil
+}
--- a/lib/backup/gcsremote/gcs.go
+++ b/lib/backup/gcsremote/gcs.go
@@ -8,6 +8,7 @@ import (

 	"cloud.google.com/go/storage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fscommon"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"google.golang.org/api/iterator"
 	"google.golang.org/api/option"
@@ -97,6 +98,9 @@ func (fs *FS) ListParts() ([]common.Part, error) {
 		if !strings.HasPrefix(file, dir) {
 			return nil, fmt.Errorf("unexpected prefix for gcs key %q; want %q", file, dir)
 		}
+		if fscommon.IgnorePath(file) {
+			continue
+		}
 		var p common.Part
 		if !p.ParseFromRemotePath(file[len(dir):]) {
 			logger.Infof("skipping unknown object %q", file)
@@ -187,3 +191,56 @@ func (fs *FS) object(p common.Part) *storage.ObjectHandle {
 	path := p.RemotePath(fs.Dir)
 	return fs.bkt.Object(path)
 }
+
+// DeleteFile deletes filePath at fs if it exists.
+//
+// The function does nothing if the filePath doesn't exists.
+func (fs *FS) DeleteFile(filePath string) error {
+	path := fs.Dir + filePath
+	o := fs.bkt.Object(path)
+	ctx := context.Background()
+	if err := o.Delete(ctx); err != nil {
+		if err != storage.ErrObjectNotExist {
+			return fmt.Errorf("cannot delete %q at %s (remote path %q): %s", filePath, fs, o.ObjectName(), err)
+		}
+	}
+	return nil
+}
+
+// CreateFile creates filePath at fs and puts data into it.
+//
+// The file is overwritten if it exists.
+func (fs *FS) CreateFile(filePath string, data []byte) error {
+	path := fs.Dir + filePath
+	o := fs.bkt.Object(path)
+	ctx := context.Background()
+	w := o.NewWriter(ctx)
+	n, err := w.Write(data)
+	if err != nil {
+		_ = w.Close()
+		return fmt.Errorf("cannot upload %d bytes to %q at %s (remote path %q): %s", len(data), filePath, fs, o.ObjectName(), err)
+	}
+	if n != len(data) {
+		_ = w.Close()
+		return fmt.Errorf("wrong data size uploaded to %q at %s (remote path %q); got %d bytes; want %d bytes", filePath, fs, o.ObjectName(), n, len(data))
+	}
+	if err := w.Close(); err != nil {
+		return fmt.Errorf("cannot close %q at %s (remote path %q): %s", filePath, fs, o.ObjectName(), err)
+	}
+	return nil
+}
+
+// HasFile returns ture if filePath exists at fs.
+func (fs *FS) HasFile(filePath string) (bool, error) {
+	path := fs.Dir + filePath
+	o := fs.bkt.Object(path)
+	ctx := context.Background()
+	_, err := o.Attrs(ctx)
+	if err != nil {
+		if err == storage.ErrObjectNotExist {
+			return false, nil
+		}
+		return false, fmt.Errorf("unexpected error when obtaining attributes for %q at %s (remote path %q): %s", filePath, fs, o.ObjectName(), err)
+	}
+	return true, nil
+}
--- a/lib/backup/s3remote/s3.go
+++ b/lib/backup/s3remote/s3.go
@@ -1,14 +1,17 @@
 package s3remote

 import (
+	"bytes"
 	"context"
 	"fmt"
 	"io"
 	"strings"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fscommon"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/aws/awserr"
 	"github.com/aws/aws-sdk-go/aws/session"
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3manager"
@@ -113,6 +116,9 @@ func (fs *FS) ListParts() ([]common.Part, error) {
 				errOuter = fmt.Errorf("unexpected prefix for s3 key %q; want %q", file, dir)
 				return false
 			}
+			if fscommon.IgnorePath(file) {
+				continue
+			}
 			var p common.Part
 			if !p.ParseFromRemotePath(file[len(dir):]) {
 				logger.Infof("skipping unknown object %q", file)
@@ -220,6 +226,78 @@ func (fs *FS) UploadPart(p common.Part, r io.Reader) error {
 	return nil
 }

+// DeleteFile deletes filePath from fs if it exists.
+//
+// The function does nothing if the file doesn't exist.
+func (fs *FS) DeleteFile(filePath string) error {
+	// It looks like s3 may return `AccessDenied: Access Denied` instead of `s3.ErrCodeNoSuchKey`
+	// on an attempt to delete non-existing file.
+	// so just check whether the filePath exists before deleting it.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/284 for details.
+	ok, err := fs.HasFile(filePath)
+	if err != nil {
+		return err
+	}
+	if !ok {
+		// Missing file - nothing to delete.
+		return nil
+	}
+
+	path := fs.Dir + filePath
+	input := &s3.DeleteObjectInput{
+		Bucket: aws.String(fs.Bucket),
+		Key:    aws.String(path),
+	}
+	if _, err := fs.s3.DeleteObject(input); err != nil {
+		return fmt.Errorf("cannot delete %q at %s (remote path %q): %s", filePath, fs, path, err)
+	}
+	return nil
+}
+
+// CreateFile creates filePath at fs and puts data into it.
+//
+// The file is overwritten if it already exists.
+func (fs *FS) CreateFile(filePath string, data []byte) error {
+	path := fs.Dir + filePath
+	sr := &statReader{
+		r: bytes.NewReader(data),
+	}
+	input := &s3manager.UploadInput{
+		Bucket: aws.String(fs.Bucket),
+		Key:    aws.String(path),
+		Body:   sr,
+	}
+	_, err := fs.uploader.Upload(input)
+	if err != nil {
+		return fmt.Errorf("cannot upoad data to %q at %s (remote path %q): %s", filePath, fs, path, err)
+	}
+	l := int64(len(data))
+	if sr.size != l {
+		return fmt.Errorf("wrong data size uploaded to %q at %s; got %d bytes; want %d bytes", filePath, fs, sr.size, l)
+	}
+	return nil
+}
+
+// HasFile returns true if filePath exists at fs.
+func (fs *FS) HasFile(filePath string) (bool, error) {
+	path := fs.Dir + filePath
+	input := &s3.GetObjectInput{
+		Bucket: aws.String(fs.Bucket),
+		Key:    aws.String(path),
+	}
+	o, err := fs.s3.GetObject(input)
+	if err != nil {
+		if ae, ok := err.(awserr.Error); ok && ae.Code() == s3.ErrCodeNoSuchKey {
+			return false, nil
+		}
+		return false, fmt.Errorf("cannot open %q at %s (remote path %q): %s", filePath, fs, path, err)
+	}
+	if err := o.Body.Close(); err != nil {
+		return false, fmt.Errorf("cannot close %q at %s (remote path %q): %s", filePath, fs, path, err)
+	}
+	return true, nil
+}
+
 func (fs *FS) path(p common.Part) string {
 	return p.RemotePath(fs.Dir)
 }
--- a/lib/encoding/encoding.go
+++ b/lib/encoding/encoding.go
@@ -166,24 +166,24 @@ func unmarshalInt64Array(dst []int64, src []byte, mt MarshalType, firstValue int
 		bb := bbPool.Get()
 		bb.B, err = DecompressZSTD(bb.B[:0], src)
 		if err != nil {
-			return nil, fmt.Errorf("cannot decompress zstd data of size %d: %s", len(src), err)
+			return nil, fmt.Errorf("cannot decompress zstd data of size %d: %s; src_zstd=%X", len(src), err, src)
 		}
 		dst, err = unmarshalInt64NearestDelta(dst, bb.B, firstValue, itemsCount)
 		bbPool.Put(bb)
 		if err != nil {
-			return nil, fmt.Errorf("cannot unmarshal nearest delta data after zstd decompression: %s", err)
+			return nil, fmt.Errorf("cannot unmarshal nearest delta data after zstd decompression: %s; src_zstd=%X", err, src)
 		}
 		return dst, nil
 	case MarshalTypeZSTDNearestDelta2:
 		bb := bbPool.Get()
 		bb.B, err = DecompressZSTD(bb.B[:0], src)
 		if err != nil {
-			return nil, fmt.Errorf("cannot decompress zstd data of size %d: %s", len(src), err)
+			return nil, fmt.Errorf("cannot decompress zstd data of size %d: %s; src_zstd=%X", len(src), err, src)
 		}
 		dst, err = unmarshalInt64NearestDelta2(dst, bb.B, firstValue, itemsCount)
 		bbPool.Put(bb)
 		if err != nil {
-			return nil, fmt.Errorf("cannot unmarshal nearest delta2 data after zstd decompression: %s", err)
+			return nil, fmt.Errorf("cannot unmarshal nearest delta2 data after zstd decompression: %s; src_zstd=%X", err, src)
 		}
 		return dst, nil
 	case MarshalTypeNearestDelta:
--- a/lib/encoding/nearest_delta.go
+++ b/lib/encoding/nearest_delta.go
@@ -60,10 +60,10 @@ func unmarshalInt64NearestDelta(dst []int64, src []byte, firstValue int64, items

 	tail, err := UnmarshalVarInt64s(is.A, src)
 	if err != nil {
-		return nil, fmt.Errorf("cannot unmarshal nearest delta: %s", err)
+		return nil, fmt.Errorf("cannot unmarshal nearest delta from %d bytes; src=%X: %s", len(src), src, err)
 	}
 	if len(tail) > 0 {
-		return nil, fmt.Errorf("unexpected tail left after unmarshaling %d items; tail size=%d, value=%X", itemsCount, len(tail), tail)
+		return nil, fmt.Errorf("unexpected tail left after unmarshaling %d items from %d bytes; tail size=%d; src=%X; tail=%X", itemsCount, len(src), len(tail), src, tail)
 	}

 	v := firstValue
--- a/lib/encoding/nearest_delta2.go
+++ b/lib/encoding/nearest_delta2.go
@@ -63,10 +63,10 @@ func unmarshalInt64NearestDelta2(dst []int64, src []byte, firstValue int64, item

 	tail, err := UnmarshalVarInt64s(is.A, src)
 	if err != nil {
-		return nil, fmt.Errorf("cannot unmarshal nearest delta: %s", err)
+		return nil, fmt.Errorf("cannot unmarshal nearest delta from %d bytes; src=%X: %s", len(src), src, err)
 	}
 	if len(tail) > 0 {
-		return nil, fmt.Errorf("unexpected tail left after unmarshaling %d items; tail size=%d, value=%X", itemsCount, len(tail), tail)
+		return nil, fmt.Errorf("unexpected tail left after unmarshaling %d items from %d bytes; tail size=%d; src=%X; tail=%X", itemsCount, len(src), len(tail), src, tail)
 	}

 	v := firstValue
--- a/lib/encoding/zstd/zstd_pure.go
+++ b/lib/encoding/zstd/zstd_pure.go
@@ -53,14 +53,16 @@ func getEncoder(compressionLevel int) *zstd.Encoder {
 	mu.Lock()
 	// Create the encoder under lock in order to prevent from wasted work
 	// when concurrent goroutines create encoder for the same compressionLevel.
-	e = newEncoder(compressionLevel)
 	r1 := av.Load().(registry)
-	r2 := make(registry)
-	for k, v := range r1 {
-		r2[k] = v
+	if e = r1[compressionLevel]; e == nil {
+		e = newEncoder(compressionLevel)
+		r2 := make(registry)
+		for k, v := range r1 {
+			r2[k] = v
+		}
+		r2[compressionLevel] = e
+		av.Store(r2)
 	}
-	r2[compressionLevel] = e
-	av.Store(r2)
 	mu.Unlock()

 	return e
--- a/lib/fs/fadvise_darwin.go
+++ b/lib/fs/fadvise_darwin.go
@@ -0,0 +1,12 @@
+package fs
+
+import (
+	"os"
+)
+
+// MustFadviseSequentialRead hints the OS that f is read mostly sequentially.
+//
+// if prefetch is set, then the OS is hinted to prefetch f data.
+func MustFadviseSequentialRead(f *os.File, prefetch bool) {
+	// TODO: implement this properly
+}
--- a/lib/fs/fadvise_unix.go
+++ b/lib/fs/fadvise_unix.go
@@ -0,0 +1,24 @@
+// +build linux freebsd
+
+package fs
+
+import (
+	"os"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"golang.org/x/sys/unix"
+)
+
+// MustFadviseSequentialRead hints the OS that f is read mostly sequentially.
+//
+// if prefetch is set, then the OS is hinted to prefetch f data.
+func MustFadviseSequentialRead(f *os.File, prefetch bool) {
+	fd := int(f.Fd())
+	mode := unix.FADV_SEQUENTIAL
+	if prefetch {
+		mode |= unix.FADV_WILLNEED
+	}
+	if err := unix.Fadvise(int(fd), 0, 0, mode); err != nil {
+		logger.Panicf("FATAL: error returned from unix.Fadvise(%d): %s", mode, err)
+	}
+}
--- a/lib/httpserver/httpserver.go
+++ b/lib/httpserver/httpserver.go
@@ -2,7 +2,6 @@ package httpserver

 import (
 	"bufio"
-	"compress/gzip"
 	"context"
 	"crypto/tls"
 	"flag"
@@ -20,6 +19,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
 	"github.com/VictoriaMetrics/metrics"
+	"github.com/klauspost/compress/gzip"
 )

 var (
@@ -155,33 +155,50 @@ var metricsHandlerDuration = metrics.NewHistogram(`vm_http_request_duration_seco

 func handlerWrapper(w http.ResponseWriter, r *http.Request, rh RequestHandler) {
 	requestsTotal.Inc()
-	if !checkAuth(w, r) {
-		return
-	}
 	switch r.URL.Path {
 	case "/health":
 		w.Header().Set("Content-Type", "text/plain")
 		w.Write([]byte("OK"))
 		return
-	case "/metrics":
-		startTime := time.Now()
-		metricsRequests.Inc()
-		w.Header().Set("Content-Type", "text/plain")
-		writePrometheusMetrics(w)
-		metricsHandlerDuration.UpdateDuration(startTime)
+	case "/ping":
+		// This is needed for compatibility with Influx agents.
+		// See https://docs.influxdata.com/influxdb/v1.7/tools/api/#ping-http-endpoint
+		status := http.StatusNoContent
+		if verbose := r.FormValue("verbose"); verbose == "true" {
+			status = http.StatusOK
+		}
+		w.WriteHeader(status)
 		return
 	case "/favicon.ico":
 		faviconRequests.Inc()
 		w.WriteHeader(http.StatusNoContent)
 		return
+	case "/metrics":
+		metricsRequests.Inc()
+		if len(*metricsAuthKey) > 0 && r.FormValue("authKey") != *metricsAuthKey {
+			http.Error(w, "The provided authKey doesn't match -metricsAuthKey", http.StatusUnauthorized)
+			return
+		}
+		startTime := time.Now()
+		w.Header().Set("Content-Type", "text/plain")
+		WritePrometheusMetrics(w)
+		metricsHandlerDuration.UpdateDuration(startTime)
+		return
 	default:
 		if strings.HasPrefix(r.URL.Path, "/debug/pprof/") {
 			pprofRequests.Inc()
+			if len(*pprofAuthKey) > 0 && r.FormValue("authKey") != *pprofAuthKey {
+				http.Error(w, "The provided authKey doesn't match -pprofAuthKey", http.StatusUnauthorized)
+				return
+			}
 			DisableResponseCompression(w)
 			pprofHandler(r.URL.Path[len("/debug/pprof/"):], w, r)
 			return
 		}

+		if !checkBasicAuth(w, r) {
+			return
+		}
 		if rh(w, r) {
 			return
 		}
@@ -192,27 +209,6 @@ func handlerWrapper(w http.ResponseWriter, r *http.Request, rh RequestHandler) {
 	}
 }

-func checkAuth(w http.ResponseWriter, r *http.Request) bool {
-	path := r.URL.Path
-	if path == "/metrics" && len(*metricsAuthKey) > 0 {
-		authKey := r.FormValue("authKey")
-		if *metricsAuthKey == authKey {
-			return true
-		}
-		http.Error(w, "The provided authKey doesn't match -metricsAuthKey", http.StatusUnauthorized)
-		return false
-	}
-	if strings.HasPrefix(path, "/debug/pprof/") && len(*pprofAuthKey) > 0 {
-		authKey := r.FormValue("authKey")
-		if *pprofAuthKey == authKey {
-			return true
-		}
-		http.Error(w, "The provided authKey doesn't match -pprofAuthKey", http.StatusUnauthorized)
-		return false
-	}
-	return checkBasicAuth(w, r)
-}
-
 func checkBasicAuth(w http.ResponseWriter, r *http.Request) bool {
 	if len(*httpAuthUsername) == 0 {
 		// HTTP Basic Auth is disabled.
@@ -422,7 +418,7 @@ var (
 // Errorf writes formatted error message to w and to logger.
 func Errorf(w http.ResponseWriter, format string, args ...interface{}) {
 	errStr := fmt.Sprintf(format, args...)
-	logger.Errorf("%s", errStr)
+	logger.ErrorfSkipframes(1, "%s", errStr)

 	// Extract statusCode from args
 	statusCode := http.StatusBadRequest
--- a/lib/httpserver/metrics.go
+++ b/lib/httpserver/metrics.go
@@ -12,7 +12,8 @@ import (
 	"github.com/VictoriaMetrics/metrics"
 )

-func writePrometheusMetrics(w io.Writer) {
+// WritePrometheusMetrics writes all the registered metrics to w in Prometheus exposition format.
+func WritePrometheusMetrics(w io.Writer) {
 	metrics.WritePrometheus(w, true)

 	fmt.Fprintf(w, "vm_app_version{version=%q} 1\n", buildinfo.Version)
--- a/lib/logger/logger.go
+++ b/lib/logger/logger.go
@@ -16,7 +16,10 @@ import (
 	"github.com/VictoriaMetrics/metrics"
 )

-var loggerLevel = flag.String("loggerLevel", "INFO", "Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC")
+var (
+	loggerLevel  = flag.String("loggerLevel", "INFO", "Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC")
+	loggerFormat = flag.String("loggerFormat", "default", "Format for logs. Possible values: default, json")
+)

 // Init initializes the logger.
 //
@@ -25,6 +28,7 @@ var loggerLevel = flag.String("loggerLevel", "INFO", "Minimum level of errors to
 // There is no need in calling Init from tests.
 func Init() {
 	validateLoggerLevel()
+	validateLoggerFormat()
 	go errorsLoggedCleaner()
 	logAllFlags()
 }
@@ -38,6 +42,15 @@ func validateLoggerLevel() {
 	}
 }

+func validateLoggerFormat() {
+	switch *loggerFormat {
+	case "default", "json":
+	default:
+		// We cannot use logger.Pancif here, since the logger isn't initialized yet.
+		panic(fmt.Errorf("FATAL: unsupported `-loggerFormat` value: %q; supported values are: default, json", *loggerFormat))
+	}
+}
+
 var stdErrorLogger = log.New(&logWriter{}, "", 0)

 // StdErrorLogger returns standard error logger.
@@ -55,6 +68,11 @@ func Errorf(format string, args ...interface{}) {
 	logLevel("ERROR", format, args...)
 }

+// ErrorfSkipframes logs error message and skips the given number of frames for the caller.
+func ErrorfSkipframes(skipframes int, format string, args ...interface{}) {
+	logLevelSkipframes(skipframes, "ERROR", format, args...)
+}
+
 // Fatalf logs fatal message and terminates the app.
 func Fatalf(format string, args ...interface{}) {
 	logLevel("FATAL", format, args...)
@@ -66,19 +84,15 @@ func Panicf(format string, args ...interface{}) {
 }

 func logLevel(level, format string, args ...interface{}) {
+	logLevelSkipframes(1, level, format, args...)
+}
+
+func logLevelSkipframes(skipframes int, level, format string, args ...interface{}) {
 	if shouldSkipLog(level) {
 		return
 	}
-
-	// rate limit ERROR log messages
-	if level == "ERROR" {
-		if n := atomic.AddUint64(&errorsLogged, 1); n > 10 {
-			return
-		}
-	}
-
 	msg := fmt.Sprintf(format, args...)
-	logMessage(level, msg, 3)
+	logMessage(level, msg, 3+skipframes)
 }

 func errorsLoggedCleaner() {
@@ -94,14 +108,19 @@ type logWriter struct {
 }

 func (lw *logWriter) Write(p []byte) (int, error) {
-	if !shouldSkipLog("ERROR") {
-		logMessage("ERROR", string(p), 4)
-	}
+	logLevelSkipframes(2, "ERROR", "%s", p)
 	return len(p), nil
 }

 func logMessage(level, msg string, skipframes int) {
-	timestamp := time.Now().UTC().Format("2006-01-02T15:04:05.000+0000")
+	// rate limit ERROR log messages
+	if level == "ERROR" {
+		if n := atomic.AddUint64(&errorsLogged, 1); n > 10 {
+			return
+		}
+	}
+
+	timestamp := time.Now().UTC().Format("2006-01-02T15:04:05.000Z")
 	levelLowercase := strings.ToLower(level)
 	_, file, line, ok := runtime.Caller(skipframes)
 	if !ok {
@@ -115,7 +134,14 @@ func logMessage(level, msg string, skipframes int) {
 	for len(msg) > 0 && msg[len(msg)-1] == '\n' {
 		msg = msg[:len(msg)-1]
 	}
-	logMsg := fmt.Sprintf("%s\t%s\t%s:%d\t%s\n", timestamp, levelLowercase, file, line, msg)
+	var logMsg string
+	switch *loggerFormat {
+	case "json":
+		caller := fmt.Sprintf("%s:%d", file, line)
+		logMsg = fmt.Sprintf(`{"ts":%q,"level":%q,"caller":%q,"msg":%q}`+"\n", timestamp, levelLowercase, caller, msg)
+	default:
+		logMsg = fmt.Sprintf("%s\t%s\t%s:%d\t%s\n", timestamp, levelLowercase, file, line, msg)
+	}

 	// Serialize writes to log.
 	mu.Lock()
--- a/lib/mergeset/part.go
+++ b/lib/mergeset/part.go
@@ -5,7 +5,7 @@ import (
 	"path/filepath"
 	"sync"
 	"sync/atomic"
-	"unsafe"
+	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
@@ -44,7 +44,7 @@ var (
 	maxCachedInmemoryBlocksPerPartOnce sync.Once
 )

-type partInternals struct {
+type part struct {
 	ph partHeader

 	path string
@@ -56,16 +56,9 @@ type partInternals struct {
 	indexFile fs.ReadAtCloser
 	itemsFile fs.ReadAtCloser
 	lensFile  fs.ReadAtCloser
-}

-type part struct {
-	partInternals
-
-	// Align atomic counters inside caches by 8 bytes on 32-bit architectures.
-	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/212 .
-	_         [(8 - (unsafe.Sizeof(partInternals{}) % 8)) % 8]byte
-	idxbCache indexBlockCache
-	ibCache   inmemoryBlockCache
+	idxbCache *indexBlockCache
+	ibCache   *inmemoryBlockCache
 }

 func openFilePart(path string) (*part, error) {
@@ -132,8 +125,8 @@ func newPart(ph *partHeader, path string, size uint64, metaindexReader filestrea
 	p.lensFile = lensFile

 	p.ph.CopyFrom(ph)
-	p.idxbCache.Init()
-	p.ibCache.Init()
+	p.idxbCache = newIndexBlockCache()
+	p.ibCache = newInmemoryBlockCache()

 	if len(errors) > 0 {
 		// Return only the first error, since it has no sense in returning all errors.
@@ -149,8 +142,8 @@ func (p *part) MustClose() {
 	p.itemsFile.MustClose()
 	p.lensFile.MustClose()

-	p.idxbCache.Reset()
-	p.ibCache.Reset()
+	p.idxbCache.MustClose()
+	p.ibCache.MustClose()
 }

 type indexBlock struct {
@@ -179,27 +172,72 @@ type indexBlockCache struct {
 	requests uint64
 	misses   uint64

-	m         map[uint64]*indexBlock
-	missesMap map[uint64]uint64
-	mu        sync.RWMutex
+	m  map[uint64]indexBlockCacheEntry
+	mu sync.RWMutex
+
+	cleanerStopCh chan struct{}
+	cleanerWG     sync.WaitGroup
 }

-func (idxbc *indexBlockCache) Init() {
-	idxbc.m = make(map[uint64]*indexBlock)
-	idxbc.missesMap = make(map[uint64]uint64)
-	idxbc.requests = 0
-	idxbc.misses = 0
+type indexBlockCacheEntry struct {
+	// Atomically updated counters must go first in the struct, so they are properly
+	// aligned to 8 bytes on 32-bit architectures.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/212
+	lastAccessTime uint64
+
+	idxb *indexBlock
 }

-func (idxbc *indexBlockCache) Reset() {
+func newIndexBlockCache() *indexBlockCache {
+	var idxbc indexBlockCache
+	idxbc.m = make(map[uint64]indexBlockCacheEntry)
+	idxbc.cleanerStopCh = make(chan struct{})
+	idxbc.cleanerWG.Add(1)
+	go func() {
+		defer idxbc.cleanerWG.Done()
+		idxbc.cleaner()
+	}()
+	return &idxbc
+}
+
+func (idxbc *indexBlockCache) MustClose() {
+	close(idxbc.cleanerStopCh)
+	idxbc.cleanerWG.Wait()
+
 	atomic.AddUint64(&indexBlockCacheRequests, idxbc.requests)
 	atomic.AddUint64(&indexBlockCacheMisses, idxbc.misses)
 	// It is safe returning idxbc.m to pool, since the Reset must be called
 	// when the idxbc entries are no longer accessed by concurrent goroutines.
-	for _, idxb := range idxbc.m {
-		putIndexBlock(idxb)
+	for _, idxbe := range idxbc.m {
+		putIndexBlock(idxbe.idxb)
 	}
-	idxbc.Init()
+	idxbc.m = nil
+}
+
+// cleaner periodically cleans least recently used items.
+func (idxbc *indexBlockCache) cleaner() {
+	t := time.NewTimer(5 * time.Second)
+	for {
+		select {
+		case <-t.C:
+			idxbc.cleanByTimeout()
+		case <-idxbc.cleanerStopCh:
+			t.Stop()
+			return
+		}
+	}
+}
+
+func (idxbc *indexBlockCache) cleanByTimeout() {
+	currentTime := atomic.LoadUint64(&currentTimestamp)
+	idxbc.mu.Lock()
+	for k, idxbe := range idxbc.m {
+		// Delete items accessed more than 10 minutes ago.
+		if currentTime-atomic.LoadUint64(&idxbe.lastAccessTime) > 10*60 {
+			delete(idxbc.m, k)
+		}
+	}
+	idxbc.mu.Unlock()
 }

 var (
@@ -210,16 +248,17 @@ var (
 func (idxbc *indexBlockCache) Get(k uint64) *indexBlock {
 	atomic.AddUint64(&idxbc.requests, 1)
 	idxbc.mu.RLock()
-	idxb := idxbc.m[k]
+	idxbe, ok := idxbc.m[k]
 	idxbc.mu.RUnlock()

-	if idxb != nil {
-		return idxb
+	if ok {
+		currentTime := atomic.LoadUint64(&currentTimestamp)
+		if atomic.LoadUint64(&idxbe.lastAccessTime) != currentTime {
+			atomic.StoreUint64(&idxbe.lastAccessTime, currentTime)
+		}
+		return idxbe.idxb
 	}
 	atomic.AddUint64(&idxbc.misses, 1)
-	idxbc.mu.Lock()
-	idxbc.missesMap[k]++
-	idxbc.mu.Unlock()
 	return nil
 }

@@ -229,13 +268,6 @@ func (idxbc *indexBlockCache) Get(k uint64) *indexBlock {
 func (idxbc *indexBlockCache) Put(k uint64, idxb *indexBlock) bool {
 	idxbc.mu.Lock()

-	if idxbc.missesMap[k] < 2 {
-		// Do not pollute cache with infrequently accessed items, since they may
-		// evict frequently accessed items.
-		idxbc.mu.Unlock()
-		return false
-	}
-
 	// Remove superflouos entries.
 	if overflow := len(idxbc.m) - getMaxCachedIndexBlocksPerPart(); overflow > 0 {
 		// Remove 10% of items from the cache.
@@ -250,21 +282,13 @@ func (idxbc *indexBlockCache) Put(k uint64, idxb *indexBlock) bool {
 			}
 		}
 	}
-	if overflow := len(idxbc.missesMap) - 8*getMaxCachedIndexBlocksPerPart(); overflow > 0 {
-		// Remove 10% of items from the cache.
-		overflow = int(float64(len(idxbc.missesMap)) * 0.1)
-		for k := range idxbc.missesMap {
-			delete(idxbc.missesMap, k)
-			overflow--
-			if overflow == 0 {
-				break
-			}
-		}
-	}

-	// Store the frequently accessed idxb in the cache.
-	delete(idxbc.missesMap, k)
-	idxbc.m[k] = idxb
+	// Store idxb in the cache.
+	idxbe := indexBlockCacheEntry{
+		lastAccessTime: atomic.LoadUint64(&currentTimestamp),
+		idxb:           idxb,
+	}
+	idxbc.m[k] = idxbe
 	idxbc.mu.Unlock()
 	return true
 }
@@ -291,9 +315,11 @@ type inmemoryBlockCache struct {
 	requests uint64
 	misses   uint64

-	m         map[inmemoryBlockCacheKey]*inmemoryBlock
-	missesMap map[inmemoryBlockCacheKey]uint64
-	mu        sync.RWMutex
+	m  map[inmemoryBlockCacheKey]*inmemoryBlockCacheEntry
+	mu sync.RWMutex
+
+	cleanerStopCh chan struct{}
+	cleanerWG     sync.WaitGroup
 }

 type inmemoryBlockCacheKey struct {
@@ -309,22 +335,66 @@ func (ibck *inmemoryBlockCacheKey) Init(bh *blockHeader) {
 	ibck.itemsBlockOffset = bh.itemsBlockOffset
 }

-func (ibc *inmemoryBlockCache) Init() {
-	ibc.m = make(map[inmemoryBlockCacheKey]*inmemoryBlock)
-	ibc.missesMap = make(map[inmemoryBlockCacheKey]uint64)
-	ibc.requests = 0
-	ibc.misses = 0
+type inmemoryBlockCacheEntry struct {
+	// Atomically updated counters must go first in the struct, so they are properly
+	// aligned to 8 bytes on 32-bit architectures.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/212
+	lastAccessTime uint64
+
+	ib *inmemoryBlock
 }

-func (ibc *inmemoryBlockCache) Reset() {
+func newInmemoryBlockCache() *inmemoryBlockCache {
+	var ibc inmemoryBlockCache
+	ibc.m = make(map[inmemoryBlockCacheKey]*inmemoryBlockCacheEntry)
+
+	ibc.cleanerStopCh = make(chan struct{})
+	ibc.cleanerWG.Add(1)
+	go func() {
+		defer ibc.cleanerWG.Done()
+		ibc.cleaner()
+	}()
+	return &ibc
+}
+
+func (ibc *inmemoryBlockCache) MustClose() {
+	close(ibc.cleanerStopCh)
+	ibc.cleanerWG.Wait()
+
 	atomic.AddUint64(&inmemoryBlockCacheRequests, ibc.requests)
 	atomic.AddUint64(&inmemoryBlockCacheMisses, ibc.misses)
 	// It is safe returning ibc.m entries to pool, since the Reset function may be called
 	// only if no other goroutines access ibc entries.
-	for _, ib := range ibc.m {
-		putInmemoryBlock(ib)
+	for _, ibe := range ibc.m {
+		putInmemoryBlock(ibe.ib)
 	}
-	ibc.Init()
+	ibc.m = nil
+}
+
+// cleaner periodically cleans least recently used items.
+func (ibc *inmemoryBlockCache) cleaner() {
+	t := time.NewTimer(5 * time.Second)
+	for {
+		select {
+		case <-t.C:
+			ibc.cleanByTimeout()
+		case <-ibc.cleanerStopCh:
+			t.Stop()
+			return
+		}
+	}
+}
+
+func (ibc *inmemoryBlockCache) cleanByTimeout() {
+	currentTime := atomic.LoadUint64(&currentTimestamp)
+	ibc.mu.Lock()
+	for k, ibe := range ibc.m {
+		// Delete items accessed more than 10 minutes ago.
+		if currentTime-atomic.LoadUint64(&ibe.lastAccessTime) > 10*60 {
+			delete(ibc.m, k)
+		}
+	}
+	ibc.mu.Unlock()
 }

 var (
@@ -336,16 +406,17 @@ func (ibc *inmemoryBlockCache) Get(k inmemoryBlockCacheKey) *inmemoryBlock {
 	atomic.AddUint64(&ibc.requests, 1)

 	ibc.mu.RLock()
-	ib := ibc.m[k]
+	ibe := ibc.m[k]
 	ibc.mu.RUnlock()

-	if ib != nil {
-		return ib
+	if ibe != nil {
+		currentTime := atomic.LoadUint64(&currentTimestamp)
+		if atomic.LoadUint64(&ibe.lastAccessTime) != currentTime {
+			atomic.StoreUint64(&ibe.lastAccessTime, currentTime)
+		}
+		return ibe.ib
 	}
 	atomic.AddUint64(&ibc.misses, 1)
-	ibc.mu.Lock()
-	ibc.missesMap[k]++
-	ibc.mu.Unlock()
 	return nil
 }

@@ -355,14 +426,7 @@ func (ibc *inmemoryBlockCache) Get(k inmemoryBlockCacheKey) *inmemoryBlock {
 func (ibc *inmemoryBlockCache) Put(k inmemoryBlockCacheKey, ib *inmemoryBlock) bool {
 	ibc.mu.Lock()

-	if ibc.missesMap[k] < 2 {
-		// Do not cache entry with low number of accesses, since it may evict
-		// frequently accessed entries from the cache.
-		ibc.mu.Unlock()
-		return false
-	}
-
-	// Clean superflouos entries in ibc.m and ibc.missesMap.
+	// Clean superflouos entries in cache.
 	if overflow := len(ibc.m) - getMaxCachedInmemoryBlocksPerPart(); overflow > 0 {
 		// Remove 10% of items from the cache.
 		overflow = int(float64(len(ibc.m)) * 0.1)
@@ -376,21 +440,13 @@ func (ibc *inmemoryBlockCache) Put(k inmemoryBlockCacheKey, ib *inmemoryBlock) b
 			}
 		}
 	}
-	if overflow := len(ibc.missesMap) - 8*getMaxCachedInmemoryBlocksPerPart(); overflow > 0 {
-		// Remove 10% of items from the cache.
-		overflow = int(float64(len(ibc.missesMap)) * 0.1)
-		for k := range ibc.missesMap {
-			delete(ibc.missesMap, k)
-			overflow--
-			if overflow == 0 {
-				break
-			}
-		}
-	}

-	// The entry is frequently accessed, so store it in the cache.
-	delete(ibc.missesMap, k)
-	ibc.m[k] = ib
+	// Store ib in the cache.
+	ibe := &inmemoryBlockCacheEntry{
+		lastAccessTime: atomic.LoadUint64(&currentTimestamp),
+		ib:             ib,
+	}
+	ibc.m[k] = ibe
 	ibc.mu.Unlock()
 	return true
 }
@@ -409,3 +465,15 @@ func (ibc *inmemoryBlockCache) Requests() uint64 {
 func (ibc *inmemoryBlockCache) Misses() uint64 {
 	return atomic.LoadUint64(&ibc.misses)
 }
+
+func init() {
+	go func() {
+		t := time.NewTimer(time.Second)
+		for tm := range t.C {
+			t := uint64(tm.Unix())
+			atomic.StoreUint64(&currentTimestamp, t)
+		}
+	}()
+}
+
+var currentTimestamp uint64
--- a/lib/mergeset/part_search.go
+++ b/lib/mergeset/part_search.go
@@ -13,7 +13,7 @@ import (
 type partSearch struct {
 	// Item contains the last item found after the call to NextItem.
 	//
-	// The Item content is valud intil the next call to NextItem.
+	// The Item content is valid until the next call to NextItem.
 	Item []byte

 	// p is a part to search.
@@ -82,8 +82,8 @@ func (ps *partSearch) Init(p *part, shouldCacheBlock func(item []byte) bool) {
 	ps.reset()

 	ps.p = p
-	ps.idxbCache = &p.idxbCache
-	ps.ibCache = &p.ibCache
+	ps.idxbCache = p.idxbCache
+	ps.ibCache = p.ibCache
 }

 // Seek seeks for the first item greater or equal to k in ps.
--- a/lib/mergeset/table.go
+++ b/lib/mergeset/table.go
@@ -187,8 +187,8 @@ func OpenTable(path string, flushCallback func(), prepareBlock PrepareBlockCallb

 	var m TableMetrics
 	tb.UpdateMetrics(&m)
-	logger.Infof("table %q has been opened in %s; partsCount: %d; blocksCount: %d, itemsCount: %d; sizeBytes: %d",
-		path, time.Since(startTime), m.PartsCount, m.BlocksCount, m.ItemsCount, m.SizeBytes)
+	logger.Infof("table %q has been opened in %.3f seconds; partsCount: %d; blocksCount: %d, itemsCount: %d; sizeBytes: %d",
+		path, time.Since(startTime).Seconds(), m.PartsCount, m.BlocksCount, m.ItemsCount, m.SizeBytes)

 	tb.convertersWG.Add(1)
 	go func() {
@@ -206,17 +206,17 @@ func (tb *Table) MustClose() {
 	logger.Infof("waiting for raw items flusher to stop on %q...", tb.path)
 	startTime := time.Now()
 	tb.rawItemsFlusherWG.Wait()
-	logger.Infof("raw items flusher stopped in %s on %q", time.Since(startTime), tb.path)
+	logger.Infof("raw items flusher stopped in %.3f seconds on %q", time.Since(startTime).Seconds(), tb.path)

 	logger.Infof("waiting for converters to stop on %q...", tb.path)
 	startTime = time.Now()
 	tb.convertersWG.Wait()
-	logger.Infof("converters stopped in %s on %q", time.Since(startTime), tb.path)
+	logger.Infof("converters stopped in %.3f seconds on %q", time.Since(startTime).Seconds(), tb.path)

 	logger.Infof("waiting for part mergers to stop on %q...", tb.path)
 	startTime = time.Now()
 	tb.partMergersWG.Wait()
-	logger.Infof("part mergers stopped in %s on %q", time.Since(startTime), tb.path)
+	logger.Infof("part mergers stopped in %.3f seconds on %q", time.Since(startTime).Seconds(), tb.path)

 	logger.Infof("flushing inmemory parts to files on %q...", tb.path)
 	startTime = time.Now()
@@ -242,7 +242,7 @@ func (tb *Table) MustClose() {
 	if err := tb.mergePartsOptimal(pws, nil); err != nil {
 		logger.Panicf("FATAL: cannot flush inmemory parts to files in %q: %s", tb.path, err)
 	}
-	logger.Infof("%d inmemory parts have been flushed to files in %s on %q", len(pws), time.Since(startTime), tb.path)
+	logger.Infof("%d inmemory parts have been flushed to files in %.3f seconds on %q", len(pws), time.Since(startTime).Seconds(), tb.path)

 	// Remove references to parts from the tb, so they may be eventually closed
 	// after all the searches are done.
@@ -447,7 +447,7 @@ func (tb *Table) convertToV1280() {
 			logger.Errorf("failed round 1 of background conversion of %q to v1.28.0 format: %s", tb.path, err)
 			return
 		}
-		logger.Infof("finished round 1 of background conversion of %q to v1.28.0 format in %s", tb.path, time.Since(startTime))
+		logger.Infof("finished round 1 of background conversion of %q to v1.28.0 format in %.3f seconds", tb.path, time.Since(startTime).Seconds())

 		// The second round is needed in order to merge small blocks
 		// with tag->metricIDs rows left after the first round.
@@ -460,7 +460,7 @@ func (tb *Table) convertToV1280() {
 				return
 			}
 		}
-		logger.Infof("finished round 2 of background conversion of %q to v1.28.0 format in %s", tb.path, time.Since(startTime))
+		logger.Infof("finished round 2 of background conversion of %q to v1.28.0 format in %.3f seconds", tb.path, time.Since(startTime).Seconds())
 	}

 	if err := fs.WriteFileAtomically(flagFilePath, []byte("ok")); err != nil {
@@ -853,7 +853,8 @@ func (tb *Table) mergeParts(pws []*partWrapper, stopCh <-chan struct{}, isOuterP

 	d := time.Since(startTime)
 	if d > 10*time.Second {
-		logger.Infof("merged %d items in %s at %d items/sec to %q; sizeBytes: %d", outItemsCount, d, int(float64(outItemsCount)/d.Seconds()), dstPartPath, newPSize)
+		logger.Infof("merged %d items in %.3f seconds at %d items/sec to %q; sizeBytes: %d",
+			outItemsCount, d.Seconds(), int(float64(outItemsCount)/d.Seconds()), dstPartPath, newPSize)
 	}

 	return nil
@@ -1057,11 +1058,17 @@ func (tb *Table) CreateSnapshotAt(dstDir string) error {
 	parentDir := filepath.Dir(dstDir)
 	fs.MustSyncPath(parentDir)

-	logger.Infof("created Table snapshot of %q at %q in %s", srcDir, dstDir, time.Since(startTime))
+	logger.Infof("created Table snapshot of %q at %q in %.3f seconds", srcDir, dstDir, time.Since(startTime).Seconds())
 	return nil
 }

 func runTransactions(txnLock *sync.RWMutex, path string) error {
+	// Wait until all the previous pending transaction deletions are finished.
+	pendingTxnDeletionsWG.Wait()
+
+	// Make sure all the current transaction deletions are finished before exiting.
+	defer pendingTxnDeletionsWG.Wait()
+
 	txnDir := path + "/txn"
 	d, err := os.Open(txnDir)
 	if err != nil {
@@ -1146,17 +1153,20 @@ func runTransaction(txnLock *sync.RWMutex, pathPrefix, txnPath string) error {
 		if err := os.Rename(srcPath, dstPath); err != nil {
 			return fmt.Errorf("cannot rename %q to %q: %s", srcPath, dstPath, err)
 		}
-	} else {
-		// Verify dstPath exists.
-		if !fs.IsPathExist(dstPath) {
-			return fmt.Errorf("cannot find both source and destination paths: %q -> %q", srcPath, dstPath)
-		}
+	} else if !fs.IsPathExist(dstPath) {
+		// Emit info message for the expected condition after unclean shutdown on NFS disk.
+		// The dstPath part may be missing because it could be already merged into bigger part
+		// while old source parts for the current txn weren't still deleted due to NFS locks.
+		logger.Infof("cannot find both source and destination paths: %q -> %q; this may be the case after unclean shutdown (OOM, `kill -9`, hard reset) on NFS disk",
+			srcPath, dstPath)
 	}

 	// Flush pathPrefix directory metadata to the underying storage.
 	fs.MustSyncPath(pathPrefix)

+	pendingTxnDeletionsWG.Add(1)
 	go func() {
+		defer pendingTxnDeletionsWG.Done()
 		// Remove the transaction file only after all the source paths are deleted.
 		// This is required for NFS mounts. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 .
 		removeWG.Wait()
@@ -1168,6 +1178,8 @@ func runTransaction(txnLock *sync.RWMutex, pathPrefix, txnPath string) error {
 	return nil
 }

+var pendingTxnDeletionsWG syncwg.WaitGroup
+
 func validatePath(pathPrefix, path string) (string, error) {
 	var err error

--- a/lib/metricsql/aggr.go
+++ b/lib/metricsql/aggr.go
@@ -0,0 +1,51 @@
+package metricsql
+
+import (
+	"strings"
+)
+
+var aggrFuncs = map[string]bool{
+	// See https://prometheus.io/docs/prometheus/latest/querying/operators/#aggregation-operators
+	"sum":          true,
+	"min":          true,
+	"max":          true,
+	"avg":          true,
+	"stddev":       true,
+	"stdvar":       true,
+	"count":        true,
+	"count_values": true,
+	"bottomk":      true,
+	"topk":         true,
+	"quantile":     true,
+
+	// MetricsQL extension funcs
+	"median":         true,
+	"limitk":         true,
+	"distinct":       true,
+	"sum2":           true,
+	"geomean":        true,
+	"histogram":      true,
+	"topk_min":       true,
+	"topk_max":       true,
+	"topk_avg":       true,
+	"topk_median":    true,
+	"bottomk_min":    true,
+	"bottomk_max":    true,
+	"bottomk_avg":    true,
+	"bottomk_median": true,
+}
+
+func isAggrFunc(s string) bool {
+	s = strings.ToLower(s)
+	return aggrFuncs[s]
+}
+
+func isAggrFuncModifier(s string) bool {
+	s = strings.ToLower(s)
+	switch s {
+	case "by", "without":
+		return true
+	default:
+		return false
+	}
+}
--- a/app/vmselect/promql/aggr_test.go
+++ b/app/vmselect/promql/aggr_test.go
@@ -1,4 +1,4 @@
-package promql
+package metricsql

 import (
 	"testing"
--- a/lib/metricsql/binary_op.go
+++ b/lib/metricsql/binary_op.go
@@ -0,0 +1,205 @@
+package metricsql
+
+import (
+	"fmt"
+	"math"
+	"strings"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql/binaryop"
+)
+
+var binaryOps = map[string]bool{
+	"+": true,
+	"-": true,
+	"*": true,
+	"/": true,
+	"%": true,
+	"^": true,
+
+	// cmp ops
+	"==": true,
+	"!=": true,
+	">":  true,
+	"<":  true,
+	">=": true,
+	"<=": true,
+
+	// logical set ops
+	"and":    true,
+	"or":     true,
+	"unless": true,
+
+	// New ops for MetricsQL
+	"if":      true,
+	"ifnot":   true,
+	"default": true,
+}
+
+var binaryOpPriorities = map[string]int{
+	"default": -1,
+
+	"if":    0,
+	"ifnot": 0,
+
+	// See https://prometheus.io/docs/prometheus/latest/querying/operators/#binary-operator-precedence
+	"or": 1,
+
+	"and":    2,
+	"unless": 2,
+
+	"==": 3,
+	"!=": 3,
+	"<":  3,
+	">":  3,
+	"<=": 3,
+	">=": 3,
+
+	"+": 4,
+	"-": 4,
+
+	"*": 5,
+	"/": 5,
+	"%": 5,
+
+	"^": 6,
+}
+
+func isBinaryOp(op string) bool {
+	op = strings.ToLower(op)
+	return binaryOps[op]
+}
+
+func binaryOpPriority(op string) int {
+	op = strings.ToLower(op)
+	return binaryOpPriorities[op]
+}
+
+func scanBinaryOpPrefix(s string) int {
+	n := 0
+	for op := range binaryOps {
+		if len(s) < len(op) {
+			continue
+		}
+		ss := strings.ToLower(s[:len(op)])
+		if ss == op && len(op) > n {
+			n = len(op)
+		}
+	}
+	return n
+}
+
+func isRightAssociativeBinaryOp(op string) bool {
+	// See https://prometheus.io/docs/prometheus/latest/querying/operators/#binary-operator-precedence
+	return op == "^"
+}
+
+func isBinaryOpGroupModifier(s string) bool {
+	s = strings.ToLower(s)
+	switch s {
+	// See https://prometheus.io/docs/prometheus/latest/querying/operators/#vector-matching
+	case "on", "ignoring":
+		return true
+	default:
+		return false
+	}
+}
+
+func isBinaryOpJoinModifier(s string) bool {
+	s = strings.ToLower(s)
+	switch s {
+	case "group_left", "group_right":
+		return true
+	default:
+		return false
+	}
+}
+
+func isBinaryOpBoolModifier(s string) bool {
+	s = strings.ToLower(s)
+	return s == "bool"
+}
+
+// IsBinaryOpCmp returns true if op is comparison operator such as '==', '!=', etc.
+func IsBinaryOpCmp(op string) bool {
+	switch op {
+	case "==", "!=", ">", "<", ">=", "<=":
+		return true
+	default:
+		return false
+	}
+}
+
+func isBinaryOpLogicalSet(op string) bool {
+	op = strings.ToLower(op)
+	switch op {
+	case "and", "or", "unless":
+		return true
+	default:
+		return false
+	}
+}
+
+func binaryOpEval(op string, left, right float64, isBool bool) float64 {
+	if IsBinaryOpCmp(op) {
+		evalCmp := func(cf func(left, right float64) bool) float64 {
+			if isBool {
+				if cf(left, right) {
+					return 1
+				}
+				return 0
+			}
+			if cf(left, right) {
+				return left
+			}
+			return nan
+		}
+		switch op {
+		case "==":
+			left = evalCmp(binaryop.Eq)
+		case "!=":
+			left = evalCmp(binaryop.Neq)
+		case ">":
+			left = evalCmp(binaryop.Gt)
+		case "<":
+			left = evalCmp(binaryop.Lt)
+		case ">=":
+			left = evalCmp(binaryop.Gte)
+		case "<=":
+			left = evalCmp(binaryop.Lte)
+		default:
+			panic(fmt.Errorf("BUG: unexpected comparison binaryOp: %q", op))
+		}
+	} else {
+		switch op {
+		case "+":
+			left = binaryop.Plus(left, right)
+		case "-":
+			left = binaryop.Minus(left, right)
+		case "*":
+			left = binaryop.Mul(left, right)
+		case "/":
+			left = binaryop.Div(left, right)
+		case "%":
+			left = binaryop.Mod(left, right)
+		case "^":
+			left = binaryop.Pow(left, right)
+		case "and":
+			// Nothing to do
+		case "or":
+			// Nothing to do
+		case "unless":
+			left = nan
+		case "default":
+			left = binaryop.Default(left, right)
+		case "if":
+			left = binaryop.If(left, right)
+		case "ifnot":
+			left = binaryop.Ifnot(left, right)
+		default:
+			panic(fmt.Errorf("BUG: unexpected non-comparison binaryOp: %q", op))
+		}
+	}
+	return left
+}
+
+var nan = math.NaN()
--- a/app/vmselect/promql/binary_op_test.go
+++ b/app/vmselect/promql/binary_op_test.go
@@ -1,4 +1,4 @@
-package promql
+package metricsql

 import (
 	"testing"
--- a/lib/metricsql/binaryop/funcs.go
+++ b/lib/metricsql/binaryop/funcs.go
@@ -0,0 +1,104 @@
+package binaryop
+
+import (
+	"math"
+)
+
+var nan = math.NaN()
+
+// Eq returns true of left == right.
+func Eq(left, right float64) bool {
+	// Special handling for nan == nan.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
+	if math.IsNaN(left) {
+		return math.IsNaN(right)
+	}
+	return left == right
+}
+
+// Neq returns true of left != right.
+func Neq(left, right float64) bool {
+	// Special handling for comparison with nan.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
+	if math.IsNaN(left) {
+		return !math.IsNaN(right)
+	}
+	if math.IsNaN(right) {
+		return true
+	}
+	return left != right
+}
+
+// Gt returns true of left > right
+func Gt(left, right float64) bool {
+	return left > right
+}
+
+// Lt returns true if left < right
+func Lt(left, right float64) bool {
+	return left < right
+}
+
+// Gte returns true if left >= right
+func Gte(left, right float64) bool {
+	return left >= right
+}
+
+// Lte returns true if left <= right
+func Lte(left, right float64) bool {
+	return left <= right
+}
+
+// Plus returns left + right
+func Plus(left, right float64) float64 {
+	return left + right
+}
+
+// Minus returns left - right
+func Minus(left, right float64) float64 {
+	return left - right
+}
+
+// Mul returns left * right
+func Mul(left, right float64) float64 {
+	return left * right
+}
+
+// Div returns left / right
+func Div(left, right float64) float64 {
+	return left / right
+}
+
+// Mod returns mod(left, right)
+func Mod(left, right float64) float64 {
+	return math.Mod(left, right)
+}
+
+// Pow returns pow(left, right)
+func Pow(left, right float64) float64 {
+	return math.Pow(left, right)
+}
+
+// Default returns left or right if left is NaN.
+func Default(left, right float64) float64 {
+	if math.IsNaN(left) {
+		return right
+	}
+	return left
+}
+
+// If returns left if right is not NaN. Otherwise NaN is returned.
+func If(left, right float64) float64 {
+	if math.IsNaN(right) {
+		return nan
+	}
+	return left
+}
+
+// Ifnot returns left if right is NaN. Otherwise NaN is returned.
+func Ifnot(left, right float64) float64 {
+	if math.IsNaN(right) {
+		return left
+	}
+	return nan
+}
--- a/lib/metricsql/doc.go
+++ b/lib/metricsql/doc.go
@@ -0,0 +1,15 @@
+// Package metricsql implements MetricsQL parser.
+//
+// This parser can parse PromQL. Additionally it can parse all the MetricsQL extensions.
+// See https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL for details about MetricsQL extensions.
+//
+// Usage:
+//
+//    expr, err := metricsql.Parse(`sum(rate(foo{bar="baz"}[5m])) by (job)`)
+//    if err != nil {
+//        // parse error
+//    }
+//    // Now expr contains parsed MetricsQL as `*Expr` structs.
+//    // See Parse examples for more details.
+//
+package metricsql
--- a/app/vmselect/promql/lexer.go
+++ b/app/vmselect/promql/lexer.go
@@ -1,11 +1,9 @@
-package promql
+package metricsql

 import (
 	"fmt"
 	"strconv"
 	"strings"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 )

 type lexer struct {
@@ -105,7 +103,7 @@ again:
 		token = s[:n]
 		goto tokenFoundLabel
 	}
-	if n := scanDuration(s); n > 0 {
+	if n := scanDuration(s, false); n > 0 {
 		token = s[:n]
 		goto tokenFoundLabel
 	}
@@ -222,7 +220,7 @@ func scanIdent(s string) string {
 		}
 	}
 	if i == 0 {
-		logger.Panicf("BUG: scanIdent couldn't find a single ident char; make sure isIdentPrefix called before scanIdent")
+		panic("BUG: scanIdent couldn't find a single ident char; make sure isIdentPrefix called before scanIdent")
 	}
 	return s[:i]
 }
@@ -281,7 +279,7 @@ func toHex(n byte) byte {
 	return 'a' + (n - 10)
 }

-func appendEscapedIdent(dst, s []byte) []byte {
+func appendEscapedIdent(dst []byte, s string) []byte {
 	for i := 0; i < len(s); i++ {
 		ch := s[i]
 		if isIdentChar(ch) {
@@ -368,15 +366,30 @@ func isPositiveNumberPrefix(s string) bool {
 	return isDecimalChar(s[1])
 }

-func isDuration(s string) bool {
-	n := scanDuration(s)
+func isPositiveDuration(s string) bool {
+	n := scanDuration(s, false)
 	return n == len(s)
 }

+// PositiveDurationValue returns the duration in milliseconds for the given s
+// and the given step.
+func PositiveDurationValue(s string, step int64) (int64, error) {
+	d, err := DurationValue(s, step)
+	if err != nil {
+		return 0, err
+	}
+	if d < 0 {
+		return 0, fmt.Errorf("duration cannot be negative; got %q", s)
+	}
+	return d, nil
+}
+
 // DurationValue returns the duration in milliseconds for the given s
 // and the given step.
+//
+// The returned duration value can be negative.
 func DurationValue(s string, step int64) (int64, error) {
-	n := scanDuration(s)
+	n := scanDuration(s, true)
 	if n != len(s) {
 		return 0, fmt.Errorf("cannot parse duration %q", s)
 	}
@@ -408,8 +421,14 @@ func DurationValue(s string, step int64) (int64, error) {
 	return int64(mp * f * 1e3), nil
 }

-func scanDuration(s string) int {
+func scanDuration(s string, canBeNegative bool) int {
+	if len(s) == 0 {
+		return -1
+	}
 	i := 0
+	if s[0] == '-' && canBeNegative {
+		i++
+	}
 	for i < len(s) && isDecimalChar(s[i]) {
 		i++
 	}
--- a/Show More
+++ b/Show More