docs: use relative links

docs: mention about /api/v1/import in Single-server-VictoriaMetrics.md
docs: mention about /api/v1/import in Cluster-VictoriaMetrics.md
2026-05-17 08:36:55 +03:00 · 2019-12-09 23:05:39 +02:00 · 2019-12-09 23:05:38 +02:00 · 2019-12-09 23:00:37 +02:00 · 2019-12-09 22:57:43 +02:00 · 2019-12-09 22:37:49 +02:00
1439 changed files with 431792 additions and 40413 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -0,0 +1,41 @@
+name: main
+on:
+  - push
+  - pull_request
+jobs:
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Setup Go
+        uses: actions/setup-go@v1
+        with:
+          go-version: 1.13
+        id: go
+      - name: Code checkout
+        uses: actions/checkout@v1
+      - name: Dependencies
+        env:
+          GO111MODULE: off
+        run: |
+          go get -v golang.org/x/lint/golint
+          go get -u github.com/kisielk/errcheck
+      - name: Build
+        env:
+          GO111MODULE: on
+        run: |
+            export PATH=$PATH:$(go env GOPATH)/bin # temporary fix. See https://github.com/actions/setup-go/issues/14
+            make check-all
+            git diff --exit-code
+            make test-full
+            make test-pure
+            make test-full-386
+            make vminsert vmselect vmstorage
+            make vminsert-pure vmselect-pure vmstorage-pure
+            make vmutils
+            GOOS=freebsd go build -mod=vendor ./app/vminsert
+            GOOS=freebsd go build -mod=vendor ./app/vmselect
+            GOOS=freebsd go build -mod=vendor ./app/vmstorage
+            GOOS=darwin go build -mod=vendor ./app/vminsert
+            GOOS=darwin go build -mod=vendor ./app/vmselect
+            GOOS=darwin go build -mod=vendor ./app/vmstorage
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,10 @@
 /victoria-metrics-data
 /vmstorage-data
 /vmselect-cache
+.DS_Store
+
+
+### terraform
+terraform.tfstate
+terraform.tfstate.*
+.terraform/
--- a/102
+++ b/102
@@ -1,7 +1,7 @@
 PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics

 BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
-	      git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | sha1sum | grep -oP '^.{8}')))
+	      git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -c 10-17)))

 PKG_TAG ?= $(shell git tag -l --points-at HEAD)
 ifeq ($(PKG_TAG),)
@@ -11,7 +11,14 @@ endif
 GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(shell date -u +'%Y%m%d-%H%M%S')-$(BUILDINFO_TAG)'

 all: \
-	victoria-metrics-prod
+	vminsert \
+	vmselect \
+	vmstorage
+
+all-pure: \
+	vminsert-pure \
+	vmselect-pure \
+	vmstorage-pure

 include app/*/Makefile
 include deployment/*/Makefile
@@ -19,46 +26,113 @@ include deployment/*/Makefile
 clean:
 	rm -rf bin/*

-release: victoria-metrics-prod
-	cd bin && tar czf victoria-metrics-$(PKG_TAG).tar.gz victoria-metrics-prod
+publish: \
+	publish-vmstorage \
+	publish-vmselect \
+	publish-vminsert \
+	publish-vmbackup \
+	publish-vmrestore
+
+package: \
+	package-vmstorage \
+	package-vmselect \
+	package-vminsert \
+	package-vmbackup \
+	package-vmrestore
+
+vmutils: \
+	vmbackup \
+	vmrestore
+
+release: \
+	release-vmcluster \
+	release-vmutils
+
+release-vmcluster: \
+	vminsert-prod \
+	vmselect-prod \
+	vmstorage-prod
+	cd bin && tar czf victoria-metrics-$(PKG_TAG).tar.gz vminsert-prod vmselect-prod vmstorage-prod && \
+		sha256sum victoria-metrics-$(PKG_TAG).tar.gz > victoria-metrics-$(PKG_TAG)_checksums.txt
+
+release-vmutils: \
+	vmbackup-prod \
+	vmrestore-prod
+	cd bin && tar czf vmutils-$(PKG_TAG).tar.gz vmbackup-prod vmrestore-prod && \
+		sha256sum vmutils-$(PKG_TAG).tar.gz > vmutils-$(PKG_TAG)_checksums.txt
+
+pprof-cpu:
+	go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)

 fmt:
-	go fmt $(PKG_PREFIX)/lib/...
-	go fmt $(PKG_PREFIX)/app/...
+	GO111MODULE=on gofmt -l -w -s ./lib
+	GO111MODULE=on gofmt -l -w -s ./app

 vet:
-	go vet $(PKG_PREFIX)/lib/...
-	go vet $(PKG_PREFIX)/app/...
+	GO111MODULE=on go vet -mod=vendor ./lib/...
+	GO111MODULE=on go vet -mod=vendor ./app/...

 lint: install-golint
 	golint lib/...
 	golint app/...

 install-golint:
-	which golint || GO111MODULE=off go get -u github.com/golang/lint/golint
+	which golint || GO111MODULE=off go get -u golang.org/x/lint/golint

 errcheck: install-errcheck
 	errcheck -exclude=errcheck_excludes.txt ./lib/...
 	errcheck -exclude=errcheck_excludes.txt ./app/vminsert/...
 	errcheck -exclude=errcheck_excludes.txt ./app/vmselect/...
 	errcheck -exclude=errcheck_excludes.txt ./app/vmstorage/...
+	errcheck -exclude=errcheck_excludes.txt ./app/vmbackup/...
+	errcheck -exclude=errcheck_excludes.txt ./app/vmrestore/...

 install-errcheck:
 	which errcheck || GO111MODULE=off go get -u github.com/kisielk/errcheck

+check-all: fmt vet lint errcheck golangci-lint
+
 test:
-	go test $(PKG_PREFIX)/lib/...
+	GO111MODULE=on go test -mod=vendor ./lib/... ./app/...
+
+test-pure:
+	GO111MODULE=on CGO_ENABLED=0 go test -mod=vendor ./lib/... ./app/...
+
+test-full:
+	GO111MODULE=on go test -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
+
+test-full-386:
+	GO111MODULE=on GOARCH=386 go test -tags=integration -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...

 benchmark:
-	go test -bench=. $(PKG_PREFIX)/lib/...
+	GO111MODULE=on go test -mod=vendor -bench=. ./lib/...
+	GO111MODULE=on go test -mod=vendor -bench=. ./app/...
+
+benchmark-pure:
+	GO111MODULE=on CGO_ENABLED=0 go test -mod=vendor -bench=. ./lib/...
+	GO111MODULE=on CGO_ENABLED=0 go test -mod=vendor -bench=. ./app/...

 vendor-update:
-	go get -u
-	go mod tidy
-	go mod vendor
+	GO111MODULE=on go get -u ./lib/...
+	GO111MODULE=on go get -u ./app/...
+	GO111MODULE=on go mod tidy
+	GO111MODULE=on go mod vendor
+
+app-local:
+	CGO_ENABLED=1 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
+
+app-local-pure:
+	CGO_ENABLED=0 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-pure$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)

 quicktemplate-gen: install-qtc
 	qtc

 install-qtc:
 	which qtc || GO111MODULE=off go get -u github.com/valyala/quicktemplate/qtc
+
+
+golangci-lint: install-golangci-lint
+	golangci-lint run --exclude '(SA4003|SA1019):' -D errcheck -D structcheck
+
+install-golangci-lint:
+	which golangci-lint || GO111MODULE=off go get -u github.com/golangci/golangci-lint/cmd/golangci-lint
--- a/README.md
+++ b/README.md
@@ -1,386 +1,278 @@
-<img  text-align="center" alt="Victoria Metrics" src="logo.png">
+<img alt="Victoria Metrics" src="logo.png" height="200px">

-## Single-node VictoriaMetrics
+# Cluster version

-[![Latest Release](https://img.shields.io/github/release/VictoriaMetrics/VictoriaMetrics.svg?style=flat-square)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
+VictoriaMetrics is fast, cost-effective and scalable time series database. It can be used as a long-term remote storage for Prometheus.

-VictoriaMetrics is a long-term remote storage for Prometheus.
-It is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
-[docker images](https://hub.docker.com/r/valyala/victoria-metrics/) and
-in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
+It is recommended using [single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics) instead of cluster version
+for ingestion rates lower than 10 million of data points per second.
+Single-node version [scales perfectly](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae)
+with the number of CPU cores, RAM and available storage space.
+Single-node version is easier to configure and operate comparing to cluster version, so think twice before sticking to cluster version.

-Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
+Join [our Slack](http://slack.victoriametrics.com/) or [contact us](mailto:info@victoriametrics.com) with consulting and support questions.


 ## Prominent features

-* Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
-  Additionally, VictoriaMetrics extends PromQL with opt-in [useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL).
-* High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
-  and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
-  [Outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
-* [Uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) when working with millions of unique time series (aka high cardinality).
-* High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
-  may be crammed into a limited storage comparing to TimescaleDB.
-* Optimized for storage with high-latency IO and low iops (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
-* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, Uber M3, Cortex, InfluxDB or TimescaleDB.
-  See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
-* Easy operation:
-  * VictoriaMetrics consists of a single executable without external dependencies.
-  * All the configuration is done via explicit command-line flags with reasonable defaults.
-  * All the data is stored in a single directory pointed by `-storageDataPath` flag.
-  * Easy backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
-* Storage is protected from corruption on unclean shutdown (i.e. hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
-* Supports metrics' ingestion and backfilling via the following protocols:
-  * [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
-  * [InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
-  * [Graphite plaintext protocol](https://graphite.readthedocs.io/en/latest/feeding-carbon.html) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
-    if `-graphiteListenAddr` is set.
-  * [OpenTSDB put message](http://opentsdb.net/docs/build/html/api_telnet/put.html) if `-opentsdbListenAddr` is set.
-* Ideally works with big amounts of time series data from IoT sensors, connected car sensors and industrial sensors.
-* Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
+- Supports all the features of [single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics).
+- Performance and capacity scales horizontally.
+- Supports multiple independent namespaces for time series data (aka multi-tenancy).


-## Operation
+## Architecture overview
+
+VictoriaMetrics cluster consists of the following services:
+
+- `vmstorage` - stores the data
+- `vminsert` - proxies the ingested data to `vmstorage` shards using consistent hashing
+- `vmselect` - performs incoming queries using the data from `vmstorage`
+
+Each service may scale independently and may run on the most suitable hardware.
+`vmstorage` nodes don't know about each other, don't communicate with each other and don't share any data.
+This is [shared nothing architecture](https://en.wikipedia.org/wiki/Shared-nothing_architecture).
+It increases cluster availability, simplifies cluster maintenance and cluster scaling.
+
+<img src="https://docs.google.com/drawings/d/e/2PACX-1vTvk2raU9kFgZ84oF-OKolrGwHaePhHRsZEcfQ1I_EC5AB_XPWwB392XshxPramLJ8E4bqptTnFn5LL/pub?w=1104&amp;h=746">


-### Table of contents
+## Binaries

-* [How to build from sources](#how-to-build-from-sources)
-* [How to start VictoriaMetrics](#how-to-start-victoriametrics)
-* [Prometheus setup](#prometheus-setup)
-* [Grafana setup](#grafana-setup)
-* [How to send data from InfluxDB-compatible agents such as Telegraf](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
-* [How to send data from Graphite-compatible agents such as StatsD](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
-* [How to send data from OpenTSDB-compatible agents](#how-to-send-data-from-opentsdb-compatible-agents)
-* [How to apply new config / ugrade VictoriaMetrics](#how-to-apply-new-config--upgrade-victoriametrics)
-* [How to work with snapshots](#how-to-work-with-snapshots)
-* [How to delete time series](#how-to-delete-time-series)
-* [How to export time series](#how-to-export-time-series)
-* [Federation](#federation)
-* [Capacity planning](#capacity-planning)
-* [High Availability](#high-availability)
-* [Multiple retentions](#multiple-retentions)
-* [Scalability and cluster version](#scalability-and-cluster-version)
-* [Security](#security)
-* [Tuning](#tuning)
-* [Monitoring](#monitoring)
-* [Troubleshooting](#troubleshooting)
-* [Community and contributions](#community-and-contributions)
-* [Reporting bugs](#reporting-bugs)
+Compiled binaries for cluster version are available in the `assets` section of [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
+See archives containing `cluster` word.
+
+Docker images for cluster version are available here:
+
+- `vminsert` - https://hub.docker.com/r/victoriametrics/vminsert/tags
+- `vmselect` - https://hub.docker.com/r/victoriametrics/vmselect/tags
+- `vmstorage` - https://hub.docker.com/r/victoriametrics/vmstorage/tags


-### How to build from sources
+## Building from sources

-We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
-[docker images](https://hub.docker.com/r/valyala/victoria-metrics/) instead of building VictoriaMetrics
-from sources. Building from sources is reasonable when developing an additional features specific
-to your needs.
+Source code for cluster version is available at [cluster branch](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).


-#### Development build
+### Development Builds

-1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
-2. Run `go build ./app/victoria-metrics` from the root folder of the repository.
-   It will build `victoria-metrics` binary in the root folder of the repository.
+1. [Install go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
+2. Run `make` from the repository root. It should build `vmstorage`, `vmselect`
+   and `vminsert` binaries and put them into the `bin` folder.

-#### Production build

-1. [Install docker](https://docs.docker.com/install/).
-2. Run `make victoria-metrics-prod` from the root folder of the respository.
-   It will build `victoria-metrics-prod` binary and put it into the `bin` folder.
+### Production builds

-#### Building docker images
+There is no need in installing Go on a host system since binaries are built
+inside [the official docker container for Go](https://hub.docker.com/_/golang).
+This makes reproducible builds.
+So [install docker](https://docs.docker.com/install/) and run the following command:
+
+```
+make vminsert-prod vmselect-prod vmstorage-prod
+```
+
+Production binaries are built into statically linked binaries for `GOARCH=amd64`, `GOOS=linux`.
+They are put into `bin` folder with `-prod` suffixes:
+```
+$ make vminsert-prod vmselect-prod vmstorage-prod
+$ ls -1 bin
+vminsert-prod
+vmselect-prod
+vmstorage-prod
+```
+
+### Building docker images
+
+Run `make package`. It will build the following docker images locally:
+
+* `victoriametrics/vminsert:<PKG_TAG>`
+* `victoriametrics/vmselect:<PKG_TAG>`
+* `victoriametrics/vmstorage:<PKG_TAG>`

-Run `make package-victoria-metrics`. It will build `valyala/victoria-metrics:<PKG_TAG>` docker image locally.
 `<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
 The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package`.



-### How to start VictoriaMetrics
+## Operation

-Just start VictoriaMetrics executable or docker image with the desired command-line flags.
+### Cluster setup

-The following command line flags are used the most:
+A minimal cluster must contain the following nodes:

-* `-storageDataPath` - path to data directory. VictoriaMetrics stores all the data in this directory.
-* `-retentionPeriod` - retention period in months for the data. Older data is automatically deleted.
-* `-httpListenAddr` - TCP address to listen to for http requests. By default it listens port `8428` on all the network interfaces.
-* `-graphiteListenAddr` - TCP and UDP address to listen to for Graphite data. By default it is disabled.
-* `-opentsdbListenAddr` - TCP and UDP address to listen to for OpenTSDB data. By default it is disabled.
+* a single `vmstorage` node with `-retentionPeriod` and `-storageDataPath` flags
+* a single `vminsert` node with `-storageNode=<vmstorage_host>:8400`
+* a single `vmselect` node with `-storageNode=<vmstorage_host>:8401`

-Pass `-help` to see all the available flags with description and default values.
+It is recommended to run at least two nodes for each service
+for high availability purposes.

+An http load balancer must be put in front of `vminsert` and `vmselect` nodes:
+- requests starting with `/insert` must be routed to port `8480` on `vminsert` nodes.
+- requests starting with `/select` must be routed to port `8481` on `vmselect` nodes.

-### Prometheus setup
+Ports may be altered by setting `-httpListenAddr` on the corresponding nodes.

-Add the following lines to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`):
-
-```yml
-remote_write:
-  - url: http://<victoriametrics-addr>:8428/api/v1/write
-    queue_config:
-      max_samples_per_send: 10000
-```
-
-Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
-Then apply the new config via the following command:
-
-```
-kill -HUP `pidof prometheus`
-```
-
-Prometheus writes incoming data to local storage and to remote storage in parallel.
-This means the data remains available in local storage for `--storage.tsdb.retention.time` duration
-if remote storage stops working.
-
-If you plan sending data to VictoriaMetrics from multiple Prometheus instances, then add the following lines into `global` section
-of [Prometheus config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file):
-
-```yml
-global:
-  external_labels:
-    datacenter: dc-123
-```
-
-This instructs Prometheus to add `datacenter=dc-123` label to each time series sent to remote storage.
-The label name may be arbitrary - `datacenter` is just an example. The label value must be unique
-across Prometheus instances, so time series may be filtered and grouped by this label.
-
-
-### Grafana setup
-
-Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following Url:
-
-```
-http://<victoriametrics-addr>:8428
-```
-
-Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
-
-Then build graphs with the created datasource using [Prometheus query language](https://prometheus.io/docs/prometheus/latest/querying/basics/).
-VictoriaMetrics supports native PromQL and [extends it with useful features](ExtendedPromQL).
-
-
-### How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)?
-
-Just use `http://<victoriametric-addr>:8428` url instead of InfluxDB url in agents' configs.
-For instance, put the following lines into `Telegraf` config, so it sends data to VictoriaMetrics instead of InfluxDB:
-
-```
-[[outputs.influxdb]]
-  urls = ["http://<victoriametrics-addr>:8428"]
-```
-
-Do not forget substituting `<victoriametrics-addr>` with the real address where VictoriaMetrics runs.
-
-VictoriaMetrics maps Influx data using the following rules:
-* [`db` query arg](https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint) is mapped into `db` label value
-* Field names are mapped to time series names prefixed by `{measurement}.` value
-* Field values are mapped to time series values
-* Tags are mapped to Prometheus labels as-is
-
-
-### How to send data from Graphite-compatible agents such as [StatsD](https://github.com/etsy/statsd)?
-
-1) Enable Graphite receiver in VictoriaMetrics by setting `-graphiteListenAddr` command line flag. For instance,
-the following command will enable Graphite receiver in VictoriaMetrics on TCP and UDP port `2003`:
-
-```
-/path/to/victoria-metrics-prod ... -graphiteListenAddr=:2003
-```
-
-2) Use the configured address in Graphite-compatible agents. For instance, set `graphiteHost`
-to the VictoriaMetrics host in `StatsD` configs.
-
-
-### How to send data from OpenTSDB-compatible agents?
-
-1) Enable OpenTSDB receiver in VictoriaMetrics by setting `-opentsdbListenAddr` command line flag. For instance,
-the following command will enable OpenTSDB receiver in VictoriaMetrics on TCP and UDP port `4242`:
-
-```
-/path/to/victoria-metrics-prod ... -opentsdbListenAddr=:4242
-```
-
-2) Send data to the given address from OpenTSDB-compatible agents.
-
-
-### How to apply new config / upgrade VictoriaMetrics?
-
-VictoriaMetrics must be restarted in order to upgrade or apply new config:
-
-1) Send `SIGINT` signal to VictoriaMetrics process in order to gracefully stop it.
-2) Wait until the process stops. This can take a few seconds.
-3) Start the upgraded VictoriaMetrics with new config.
-
-
-### How to work with snapshots?
-
-Navigate to `http://<victoriametrics-addr>:8428/snapshot/create` in order to create an instant snapshot.
-The page will return the following JSON response:
-
-```
-{"status":"ok","snapshot":"<snapshot-name>"}
-```
-
-Snapshots are created under `<-storageDataPath>/snapshots` directory, where `<-storageDataPath>`
-is the command-line flag value. Snapshots can be archived to backup storage via `rsync -L`, `scp -r`
-or any similar tool that follows symlinks during copying.
-
-The `http://<victoriametrics-addr>:8428/snapshot/list` page contains the list of available snapshots.
-
-Navigate to `http://<victoriametrics-addr>:8428/snapshot/delete?snapshot=<snapshot-name>` in order
-to delete `<snapshot-name>` snapshot.
-
-Navigate to `http://<victoriametrics-addr>:8428/snapshot/delete_all` in order to delete all the snapshots.
-
-
-### How to delete time series?
-
-Send a request to `http://<victoriametrics-addr>:8428/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`,
-where `<timeseries_selector_for_delete>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
-for metrics to delete. After that all the time series matching the given selector are deleted. Storage space for
-the deleted time series isn't freed instantly - it is freed during subsequent merges of data files.
-
-
-### How to export time series?
-
-Send a request to `http://<victoriametrics-addr>:8428/api/v1/export?match[]=<timeseries_selector_for_export>`,
-where `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
-for metrics to export. The response would contain all the data for the selected time series in [JSON streaming format](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON).
-Each JSON line would contain data for a single time series. An example output:
-
-```
-{"metric":{"__name__":"up","job":"node_exporter","instance":"localhost:9100"},"values":[0,0,0],"timestamps":[1549891472010,1549891487724,1549891503438]}
-{"metric":{"__name__":"up","job":"prometheus","instance":"localhost:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
-```
-
-Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
-unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
-
-
-### Federation
-
-VictoriaMetrics exports [Prometheus-compatible federation data](https://prometheus.io/docs/prometheus/latest/federation/)
-at `http://<victoriametrics-addr>:8428/federate?match[]=<timeseries_selector_for_federation>`.
-
-Optional `start` and `end` args may be added to the request in order to scrape the last point for each selected time series on the `[start ... end]` interval.
-`start` and `end` may contain either unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values. By default the last point
-on the interval `[now - max_lookback ... now]` is scraped for each time series. Default value for `max_lookback` is `5m` (5 minutes), but can be overriden.
-For instance, `/federate?match[]=up&max_lookback=1h` would return last points on the `[now - 1h ... now]` interval. This may be useful for time series federation
-with scrape intervals exceeding `5m`.
-
-
-### Capacity planning
-
-Rough estimation of the required resources:
-
-* RAM size: less than 1KB per active time series. So, ~1GB of RAM is required for 1M active time series.
-  Time series is considered active if new data points have been added to it recently or if it has been recently queried.
-  VictoriaMetrics stores various caches in RAM. Memory size for these caches may be limited with `-memory.allowedPercent` flag.
-* CPU cores: a CPU core per 300K inserted data points per second. So, ~4 CPU cores are required for processing
-  the insert stream of 1M data points per second.
-  If you see lower numbers per CPU core, then it is likely active time series info doesn't fit caches,
-  so you need more RAM for lowering CPU usage.
-* Storage size: less than a byte per data point on average. So, ~260GB is required for storing a month-long insert stream
-  of 100K data points per second.
-  The actual storage size heavily depends on data randomness (entropy). Higher randomness means higher storage size requirements.
-
-
-### High availability
-
-1) Install multiple VictoriaMetrics instances in distinct datacenters.
-2) Add addresses of these instances to `remote_write` section in Prometheus config:
-
-```yml
-remote_write:
-  - url: http://<victoriametrics-addr-1>:8428/api/v1/write
-    queue_config:
-      max_samples_per_send: 10000
-  # ...
-  - url: http://<victoriametrics-addr-N>:8428/api/v1/write
-    queue_config:
-      max_samples_per_send: 10000
-```
-
-3) Apply the updated config:
-
-```
-kill -HUP `pidof prometheus`
-```
-
-4) Now Prometheus should write data into all the configured `remote_write` urls in parallel.
-5) Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas.
-6) Set up Prometheus datasource in Grafana that points to Promxy.
-
-
-### Multiple retentions
-
-Just start multiple VictoriaMetrics instances with distinct values for the following flags:
-
-* `-retentionPeriod`
-* `-storageDataPath`, so the data for each retention period is saved in a separate directory
-* `-httpListenAddr`, so clients may reach VictoriaMetrics instance with proper retention
-
-
-### Scalability and cluster version
-
-Though single-node VictoriaMetrics cannot scale to multiple nodes, it is optimized for resource usage - storage size / bandwidth / IOPS, RAM, CPU.
-This means that a single-node VictoriaMetrics may scale vertically and substitute moderately sized cluster built with competing solutions
-such as Thanos, Uber M3, InfluxDB or TimescaleDB.
-
-So try single-node VictoriaMetrics at first and then [switch to cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster) if you still need
-horizontally scalable long-term remote storage for really large Prometheus deployments.
-[Contact us](mailto:info@victoriametrics.com) for paid support.
-
-
-### Security
-
-Do not forget protecting sensitive endpoints in VictoriaMetrics when exposing it to untrusted networks such as internet.
-Consider setting the following command-line flags:
-
-* `-tls`, `-tlsCertFile` and `-tlsKeyFile` for switching from HTTP to HTTPS.
-* `-httpAuth.username` and `-httpAuth.password` for protecting all the HTTP endpoints
-  with [HTTP Basic Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication).
-* `-deleteAuthKey` for protecting `/api/v1/admin/tsdb/delete_series` endpoint. See [how to delete time series](#how-to-delete-time-series).
-* `-snapshotAuthKey` for protecting `/snapshot*` endpoints. See [how to work with snapshots](#how-to-work-with-snapshots).
-
-Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
-For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`.
-
-
-### Tuning
-
-* There is no need in VictoriaMetrics tuning, since it uses reasonable defaults for command-line flags,
-  which are automatically adjusted for the available CPU and RAM resources.
-* There is no need in Operating System tuning, since VictoriaMetrics is optimized for default OS settings.
-  The only option is increasing the limit on [the number open files in the OS](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a),
-  so Prometheus instances could establish more connections to VictoriaMetrics.
+It is recommended setting up [monitoring](#monitoring) for the cluster.


 ### Monitoring

-VictoriaMetrics exports internal metrics in Prometheus format on the `/metrics` page.
-Add this page to Prometheus' scrape config in order to collect VictoriaMetrics metrics.
-There is [an official Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229).
+All the cluster components expose various metrics in Prometheus-compatible format at `/metrics` page on the TCP port set in `-httpListenAddr` command-line flag.
+By default the following TCP ports are used:
+- `vminsert` - 8480
+- `vmselect` - 8481
+- `vmstorage` - 8482
+
+It is recommended setting up Prometheus to scrape `/metrics` pages from all the cluster components, so they can be monitored and analyzed
+with [the official Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176).


-### Troubleshooting
+### URL format

-* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
-  then it is likely you have too many active time series for the current amount of RAM.
-  It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
-  ingestion performance.
-  Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
-  option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
+* URLs for data ingestion: `http://<vminsert>:8480/insert/<accountID>/<suffix>`, where:
+  - `<accountID>` is an arbitrary number identifying namespace for data ingestion (aka tenant)
+  - `<suffix>` may have the following values:
+     - `prometheus` - for inserting data with [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
+     - `influx/write` or `influx/api/v2/write` - for inserting data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
+     - `prometheus/api/v1/import` - for importing data obtained via `api/v1/export` on `vmselect` (see below).
+
+* URLs for querying: `http://<vmselect>:8481/select/<accountID>/prometheus/<suffix>`, where:
+  - `<accountID>` is an arbitrary number identifying data namespace for the query (aka tenant)
+  - `<suffix>` may have the following values:
+    - `api/v1/query` - performs [PromQL instant query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries)
+    - `api/v1/query_range` - performs [PromQL range query](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries)
+    - `api/v1/series` - performs [series query](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)
+    - `api/v1/labels` - returns a [list of label names](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names)
+    - `api/v1/label/<label_name>/values` - returns values for the given `<label_name>` according [to API](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values)
+    - `federate` - returns [federated metrics](https://prometheus.io/docs/prometheus/latest/federation/)
+    - `api/v1/export` - exports raw data. See [this article](https://medium.com/@valyala/analyzing-prometheus-data-with-external-tools-5f3e5e147639) for details
+
+* URL for time series deletion: `http://<vmselect>:8481/delete/<accountID>/prometheus/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`.
+  Note that the `delete_series` handler should be used only in exceptional cases such as deletion of accidentally ingested incorrect time series. It shouldn't
+  be used on a regular basis, since it carries non-zero overhead.
+
+* `vmstorage` nodes provide the following HTTP endpoints on `8482` port:
+  - `/snapshot/create` - create [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282),
+    which can be used for backups in background. Snapshots are created in `<storageDataPath>/snapshots` folder, where `<storageDataPath>` is the corresponding
+    command-line flag value.
+  - `/snapshot/list` - list available snasphots.
+  - `/snapshot/delete?snapshot=<id>` - delete the given snapshot.
+  - `/snapshot/delete_all` - delete all the snapshots.
+
+  Snapshots may be created independently on each `vmstorage` node. There is no need in synchronizing snapshots' creation
+  across `vmstorage` nodes.
+
+
+### Cluster resizing and scalability.
+
+Cluster performance and capacity scales with adding new nodes.
+
+* `vminsert` and `vmselect` nodes are stateless and may be added / removed at any time.
+  Do not forget updating the list of these nodes on http load balancer.
+  Adding more `vminsert` nodes scales data ingestion rate. See [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/175#issuecomment-536925841)
+  about ingestion rate scalability.
+  Adding more `vmselect` nodes scales select queries rate.
+* `vmstorage` nodes own the ingested data, so they cannot be removed without data loss.
+  Adding more `vmstorage` nodes scales cluster capacity.
+
+Steps to add `vmstorage` node:
+
+1. Start new `vmstorage` node with the same `-retentionPeriod` as existing nodes in the cluster.
+2. Gradually restart all the `vmselect` nodes with new `-storageNode` arg containing `<new_vmstorage_host>:8401`.
+3. Gradually restart all the `vminsert` nodes with new `-storageNode` arg containing `<new_vmstorage_host>:8400`.
+
+
+### Cluster availability
+
+* HTTP load balancer must stop routing requests to unavailable `vminsert` and `vmselect` nodes.
+* The cluster remains available if at least a single `vmstorage` node exists:
+
+  - `vminsert` re-routes incoming data from unavailable `vmstorage` nodes to healthy `vmstorage` nodes
+  - `vmselect` continues serving partial responses if at least a single `vmstorage` node is available.
+
+
+### Updating / reconfiguring cluster nodes
+
+All the node types - `vminsert`, `vmselect` and `vmstorage` - may be updated via graceful shutdown.
+Send `SIGINT` signal to the corresponding process, wait until it finishes and then start new version
+with new configs.
+
+Cluster should remain in working state if at least a single node of each type remains available during
+the update process. See [cluster availability](#cluster-availability) section for details.
+
+
+### Capacity planning
+
+Each instance type - `vminsert`, `vmselect` and `vmstorage` - can run on the most suitable hardware.
+
+#### vminsert
+
+* The recommended total number of vCPU cores for all the `vminsert` instances can be calculated from the ingestion rate: `vCPUs = ingestion_rate / 150K`.
+* The recommended number of vCPU cores per each `vminsert` instance should equal to the number of `vmstorage` instances in the cluster.
+* The amount of RAM per each `vminsert` instance should be 1GB or more. RAM is used as a buffer for spikes in ingestion rate.
+* Sometimes `-rpc.disableCompression` command-line flag on `vminsert` instances could increase ingestion capacity at the cost
+  of higher network bandwidth usage between `vminsert` and `vmstorage`.
+
+#### vmstorage
+
+* The recommended total number of vCPU cores for all the `vmstorage` instances can be calculated from the ingestion rate: `vCPUs = ingestion_rate / 150K`.
+* The recommended total amount of RAM for all the `vmstorage` instances can be calculated from the number of active time series: `RAM = active_time_series * 1KB`.
+  Time series is active if it received at least a single data point during the last hour or if it has been queried during the last hour.
+* The recommended total amount of storage space for all the `vmstorage` instances can be calculated
+  from the ingestion rate and retention: `storage_space = ingestion_rate * retention_seconds`.
+
+#### vmselect
+
+The recommended hardware for `vmselect` instances highly depends on the type of queries. Lightweight queries over small number of time series usually require
+small number of vCPU cores and small amount of RAM on `vmselect`, while heavy queries over big number of time series (>10K) usually require
+bigger number of vCPU cores and bigger amounts of RAM.
+
+
+### Helm
+
+Helm chart simplifies managing cluster version of VictoriaMetrics in Kubernetes.
+It is available in the [helm-charts](https://github.com/VictoriaMetrics/helm-charts) repository.
+
+Upgrade follows `Cluster resizing procedure` under the hood.
+
+
+### Replication and data safety
+
+VictoriaMetrics offloads replication to the underlying storage pointed by `-storageDataPath`.
+It is recommended storing data on [Google Compute Engine persistent disks](https://cloud.google.com/compute/docs/disks/#pdspecs),
+since they are protected from data loss and data corruption. They also provide consistently high performance
+and [may be resized](https://cloud.google.com/compute/docs/disks/add-persistent-disk) without downtime.
+HDD-based persistent disks should be enough for the majority of use cases.
+
+It is recommended using durable replicated persistent volumes in Kubernetes.
+
+Note that [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883).
+
+
+### Backups
+
+It is recommended performing periodical backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
+for protecting from user errors such as accidental data deletion.
+
+The following steps must be performed for each `vmstorage` node for creating a backup:
+
+1. Create an instant snapshot by navigating to `/snapshot/create` HTTP handler. It will create snapshot and return its name.
+2. Archive the created snapshot from `<-storageDataPath>/snapshots/<snapshot_name>` folder using [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/app/vmbackup/README.md).
+   The archival process doesn't interfere with `vmstorage` work, so it may be performed at any suitable time.
+3. Delete unused snapshots via `/snapshot/delete?snapshot=<snapshot_name>` or `/snapshot/delete_all` in order to free up occupied storage space.
+
+There is no need in synchronizing backups among all the `vmstorage` nodes.
+
+Restoring from backup:
+
+1. Stop `vmstorage` node with `kill -INT`.
+2. Restore data from backup using [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/app/vmrestore/README.md) into `-storageDataPath` directory.
+3. Start `vmstorage` node.


 ## Community and contributions

-Feel free asking any questions regarding VictoriaMetrics [here](https://groups.google.com/forum/#!forum/victorametrics-users).
-
 We are open to third-party pull requests provided they follow [KISS design principle](https://en.wikipedia.org/wiki/KISS_principle):

 - Prefer simple code and architecture.
@@ -392,6 +284,17 @@ We are open to third-party pull requests provided they follow [KISS design princ

 Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people.

+Due to `KISS` cluster version of VictoriaMetrics has no the following "features" popular in distributed computing world:
+
+- Fragile gossip protocols. See [failed attempt in Thanos](https://github.com/improbable-eng/thanos/blob/030bc345c12c446962225221795f4973848caab5/docs/proposals/completed/201809_gossip-removal.md).
+- Hard-to-understand-and-implement-properly [Paxos protocols](https://www.quora.com/In-distributed-systems-what-is-a-simple-explanation-of-the-Paxos-algorithm).
+- Complex replication schemes, which may go nuts in unforesseen edge cases. The replication is offloaded to the underlying durable replicated storage
+  such as [persistent disks in Google Compute Engine](https://cloud.google.com/compute/docs/disks/#pdspecs).
+- Automatic data reshuffling between storage nodes, which may hurt cluster performance and availability.
+- Automatic cluster resizing, which may cost you a lot of money if improperly configured.
+- Automatic discovering and addition of new nodes in the cluster, which may mix data between dev and prod clusters :)
+- Automatic leader election, which may result in split brain disaster on network errors.
+

 ## Reporting bugs

--- a/app/victoria-metrics/Makefile
+++ b/app/victoria-metrics/Makefile
@@ -1,21 +0,0 @@
-# All these commands must run from repository root.
-
-victoria-metrics-prod:
-	APP_NAME=victoria-metrics $(MAKE) app-via-docker
-
-package-victoria-metrics:
-	APP_NAME=victoria-metrics \
-	$(MAKE) package-via-docker
-
-publish-victoria-metrics:
-	APP_NAME=victoria-metrics $(MAKE) publish-via-docker
-
-run-victoria-metrics:
-	mkdir -p victoria-metrics-data
-	DOCKER_OPTS='-v $(shell pwd)/victoria-metrics-data:/victoria-metrics-data -p 8428:8428 -p 2003:2003 -p 2003:2003/udp' \
-	APP_NAME=victoria-metrics \
-	ARGS='-graphiteListenAddr=:2003 -opentsdbListenAddr=:4242 -retentionPeriod=12 -search.maxUniqueTimeseries=1000000 -search.maxQueryDuration=10m' \
-	$(MAKE) run-via-docker
-
-victoria-metrics-arm:
-	CC=arm-linux-gnueabi-gcc CGO_ENABLED=1 GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/victoria-metrics-arm ./app/victoria-metrics
--- a/app/victoria-metrics/deployment/Dockerfile
+++ b/app/victoria-metrics/deployment/Dockerfile
@@ -1,5 +0,0 @@
-FROM scratch
-COPY --from=local/certs:1.0.2 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
-COPY bin/victoria-metrics-prod .
-EXPOSE 8428
-ENTRYPOINT ["/victoria-metrics-prod"]
--- a/app/victoria-metrics/main.go
+++ b/app/victoria-metrics/main.go
@@ -1,60 +0,0 @@
-package main
-
-import (
-	"flag"
-	"net/http"
-	"time"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
-)
-
-var httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections")
-
-func main() {
-	flag.Parse()
-	buildinfo.Init()
-	logger.Init()
-	logger.Infof("starting VictoraMetrics at %q...", *httpListenAddr)
-	startTime := time.Now()
-	vmstorage.Init()
-	vmselect.Init()
-	vminsert.Init()
-
-	go httpserver.Serve(*httpListenAddr, requestHandler)
-	logger.Infof("started VictoriaMetrics in %s", time.Since(startTime))
-
-	sig := procutil.WaitForSigterm()
-	logger.Infof("received signal %s", sig)
-
-	logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
-	startTime = time.Now()
-	if err := httpserver.Stop(*httpListenAddr); err != nil {
-		logger.Fatalf("cannot stop the webservice: %s", err)
-	}
-	vminsert.Stop()
-	logger.Infof("successfully shut down the webservice in %s", time.Since(startTime))
-
-	vmstorage.Stop()
-	vmselect.Stop()
-
-	logger.Infof("the VictoriaMetrics has been stopped in %s", time.Since(startTime))
-}
-
-func requestHandler(w http.ResponseWriter, r *http.Request) bool {
-	if vminsert.RequestHandler(w, r) {
-		return true
-	}
-	if vmselect.RequestHandler(w, r) {
-		return true
-	}
-	if vmstorage.RequestHandler(w, r) {
-		return true
-	}
-	return false
-}
--- a/app/vmbackup/Makefile
+++ b/app/vmbackup/Makefile
@@ -0,0 +1,37 @@
+# All these commands must run from repository root.
+
+vmbackup:
+	APP_NAME=vmbackup $(MAKE) app-local
+
+vmbackup-prod:
+	APP_NAME=vmbackup $(MAKE) app-via-docker
+
+package-vmbackup:
+	APP_NAME=vmbackup $(MAKE) package-via-docker
+
+publish-vmbackup:
+	APP_NAME=vmbackup $(MAKE) publish-via-docker
+
+vmbackup-arm:
+	CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-arm ./app/vmbackup
+
+vmbackup-arm-prod:
+	APP_NAME=vmbackup APP_SUFFIX='-arm' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=arm' $(MAKE) app-via-docker
+
+vmbackup-arm64:
+	CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-arm64 ./app/vmbackup
+
+vmbackup-arm64-prod:
+	APP_NAME=vmbackup APP_SUFFIX='-arm64' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=arm64' $(MAKE) app-via-docker
+
+vmbackup-386:
+	CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmbackup-386 ./app/vmbackup
+
+vmbackup-386-prod:
+	APP_NAME=vmbackup APP_SUFFIX='-386' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=386' $(MAKE) app-via-docker
+
+vmbackup-pure:
+	APP_NAME=vmbackup $(MAKE) app-local-pure
+
+vmbackup-pure-prod:
+	APP_NAME=vmbackup APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
--- a/app/vmbackup/README.md
+++ b/app/vmbackup/README.md
@@ -0,0 +1,178 @@
+## vmbackup
+
+`vmbackup` creates VictoriaMetrics data backups from [instant snapshots](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots).
+
+Supported storage systems for backups:
+
+* [GCS](https://cloud.google.com/storage/). Example: `gcs://<bucket>/<path/to/backup>`
+* [S3](https://aws.amazon.com/s3/). Example: `s3://<bucket>/<path/to/backup>`
+* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio). See `-customS3Endpoint` command-line flag.
+* Local filesystem. Example: `fs://</absolute/path/to/backup>`
+
+Incremental backups and full backups are supported. Incremental backups are created automatically if the destination path already contains data from the previous backup.
+Full backups can be sped up with `-origin` pointing to already existing backup on the same remote storage. In this case `vmbackup` makes server-side copy for the shared
+data between the existing backup and new backup. This saves time and costs on data transfer.
+
+Backup process can be interrupted at any time. It is automatically resumed from the interruption point when restarting `vmbackup` with the same args.
+
+Backed up data can be restored with [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md).
+
+
+### Use cases
+
+#### Regular backups
+
+Regular backup can be performed with the following command:
+
+```
+vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gcs://<bucket>/<path/to/new/backup>
+```
+
+* `</path/to/victoria-metrics-data>` - path to VictoriaMetrics data pointed by `-storageDataPath` command-line flag in single-node VictoriaMetrics or in cluster `vmstorage`.
+  There is no need to stop VictoriaMetrics for creating backups, since they are performed from immutable [instant snapshots](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots).
+* `<local-snapshot>` is the snapshot to backup. See [how to create instant snapshots](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots).
+* `<bucket>` is already existing name for [GCS bucket](https://cloud.google.com/storage/docs/creating-buckets).
+* `<path/to/new/backup>` is the destination path where new backup will be placed.
+
+
+#### Regular backups with server-side copy from existing backup
+
+If the destination GCS bucket already contains the previous backup at `-origin` path, then new backup can be sped up
+with the following command:
+
+```
+vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gcs://<bucket>/<path/to/new/backup> -origin=gcs://<bucket>/<path/to/existing/backup>
+```
+
+This saves time and network bandwidth costs by performing server-side copy for the shared data from the `-origin` to `-dst`.
+
+
+#### Incremental backups
+
+Incremental backups are performed if `-dst` points to already existing backup. In this case only new data is uploaded to remote storage.
+This saves time and network bandwidth costs when working with big backups:
+
+```
+vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshotName=<local-snapshot> -dst=gcs://<bucket>/<path/to/existing/backup>
+```
+
+
+#### Smart backups
+
+Smart backups mean storing full daily backups into `YYYYMMDD` folders and creating incremental hourly backup into `latest` folder:
+
+* Run the following command every hour:
+
+```
+vmbackup -snapshotName=<latest-snapshot> -dst=gcs://<bucket>/latest
+```
+
+Where `<latest-snapshot>` is the latest [snapshot](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots).
+The command will upload only changed data to `gcs://<bucket>/latest`.
+
+* Run the following command once a day:
+
+```
+vmbackup -snapshotName=<daily-snapshot> -dst=gcs://<bucket>/<YYYYMMDD> -origin=gcs://<bucket>/latest
+```
+
+Where `<daily-snapshot>` is the snapshot for the last day `<YYYYMMDD>`.
+
+
+This apporach saves network bandwidth costs on hourly backups (since they are incremental) and allows recovering data from either the last hour (`latest` backup)
+or from any day (`YYYYMMDD` backups). Note that hourly backup shouldn't run when creating daily backup.
+
+Do not forget removing old snapshots and backups when they are no longer needed for saving storage costs.
+
+
+### How does it work?
+
+The backup algorithm is the following:
+
+1. Collect information about files in the `-snapshotName`, in the `-dst` and in the `-origin`.
+2. Determine files in `-dst`, which are missing in `-snapshotName`, and delete them. These are usually small files, which are already merged into bigger files in the snapshot.
+3. Determine files from `-snapshotName`, which are missing in `-dst`. These are usually small new files and bigger merged files.
+4. Determine files from step 3, which exist in the `-origin`, and perform server-side copy of these files from `-origin` to `-dst`.
+   This are usually the biggest and the oldest files, which are shared between backups.
+5. Upload the remaining files from setp 3 from `-snapshotName` to `-dst`.
+
+The algorithm splits source files into 100MB chunks in the backup. Each chunk is stored as a separate file in the backup.
+Such splitting minimizes the amounts of data to re-transfer after temporary errors.
+
+`vmbackup` relies on [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) properties:
+
+- All the files in the snapshot are immutable.
+- Old files are periodically merged into new files.
+- Smaller files have higher probability to be merged.
+- Consecutive snapshots share many identical files.
+
+These properties allow performing fast and cheap incremental backups and server-side copying from `-origin` paths.
+`vmbackup` can work improperly or slowly when these properties are violated.
+
+
+### Troubleshooting
+
+* If the backup is slow, then try setting higher value for `-concurrency` flag. This will increase the number of concurrent workers that upload data to backup storage.
+* If `vmbackup` eats all the network bandwidth, then set `-maxBytesPerSecond` to the desired value.
+* If `vmbackup` has been interrupted due to temporary error, then just restart it with the same args. It will resume the backup process.
+
+
+### Advanced usage
+
+Run `vmbackup -help` in order to see all the available options:
+
+```
+  -concurrency int
+    	The number of concurrent workers. Higher concurrency may reduce backup duration (default 10)
+  -configFilePath string
+    	Path to file with S3 configs. Configs are loaded from default location if not set.
+    	See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
+  -configProfile string
+    	Profile name for S3 configs (default "default")
+  -credsFilePath string
+    	Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
+    	See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
+  -customS3Endpoint string
+    	Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
+  -dst string
+    	Where to put the backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
+    	-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded
+  -loggerLevel string
+    	Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC (default "INFO")
+  -maxBytesPerSecond int
+    	The maximum upload speed. There is no limit if it is set to 0
+  -memory.allowedPercent float
+    	Allowed percent of system memory VictoriaMetrics caches may occupy (default 60)
+  -origin string
+    	Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups
+  -snapshotName string
+    	Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots
+  -storageDataPath string
+    	Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage (default "victoria-metrics-data")
+  -version
+    	Show VictoriaMetrics version
+```
+
+
+### How to build from sources
+
+It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - see `vmutils-*` archives there.
+
+
+#### Development build
+
+1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
+2. Run `make vmbackup` from the root folder of the repository.
+   It builds `vmbackup` binary and puts it into the `bin` folder.
+
+#### Production build
+
+1. [Install docker](https://docs.docker.com/install/).
+2. Run `make vmbackup-prod` from the root folder of the repository.
+   It builds `vmbackup-prod` binary and puts it into the `bin` folder.
+
+#### Building docker images
+
+Run `make package-vmbackup`. It builds `victoriametrics/vmbackup:<PKG_TAG>` docker image locally.
+`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
+The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmbackup`.
--- a/app/vmbackup/deployment/Dockerfile
+++ b/app/vmbackup/deployment/Dockerfile
@@ -0,0 +1,5 @@
+FROM scratch
+COPY --from=local/certs:1.0.3 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+COPY bin/vmbackup-prod .
+EXPOSE 8428
+ENTRYPOINT ["/vmbackup-prod"]
--- a/app/vmbackup/main.go
+++ b/app/vmbackup/main.go
@@ -0,0 +1,114 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/actions"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+var (
+	storageDataPath = flag.String("storageDataPath", "victoria-metrics-data", "Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage")
+	snapshotName    = flag.String("snapshotName", "", "Name for the snapshot to backup. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-work-with-snapshots")
+	dst             = flag.String("dst", "", "Where to put the backup on the remote storage. "+
+		"Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir\n"+
+		"-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded")
+	origin            = flag.String("origin", "", "Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups")
+	concurrency       = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce backup duration")
+	maxBytesPerSecond = flag.Int("maxBytesPerSecond", 0, "The maximum upload speed. There is no limit if it is set to 0")
+)
+
+func main() {
+	flag.Usage = usage
+	flag.Parse()
+	buildinfo.Init()
+
+	srcFS, err := newSrcFS()
+	if err != nil {
+		logger.Fatalf("%s", err)
+	}
+	dstFS, err := newDstFS()
+	if err != nil {
+		logger.Fatalf("%s", err)
+	}
+	originFS, err := newOriginFS()
+	if err != nil {
+		logger.Fatalf("%s", err)
+	}
+	a := &actions.Backup{
+		Concurrency: *concurrency,
+		Src:         srcFS,
+		Dst:         dstFS,
+		Origin:      originFS,
+	}
+	if err := a.Run(); err != nil {
+		logger.Fatalf("cannot create backup: %s", err)
+	}
+}
+
+func usage() {
+	const s = `
+vmbackup performs backups for VictoriaMetrics data from instant snapshots to gcs, s3
+or local filesystem. Backed up data can be restored with vmrestore.
+
+See the docs at https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md .
+`
+
+	f := flag.CommandLine.Output()
+	fmt.Fprintf(f, "%s\n", s)
+	flag.PrintDefaults()
+}
+
+func newSrcFS() (*fslocal.FS, error) {
+	if len(*snapshotName) == 0 {
+		return nil, fmt.Errorf("`-snapshotName` cannot be empty")
+	}
+	snapshotPath := *storageDataPath + "/snapshots/" + *snapshotName
+
+	// Verify the snapshot exists.
+	f, err := os.Open(snapshotPath)
+	if err != nil {
+		return nil, fmt.Errorf("cannot open snapshot at %q: %s", snapshotPath, err)
+	}
+	fi, err := f.Stat()
+	_ = f.Close()
+	if err != nil {
+		return nil, fmt.Errorf("cannot stat %q: %s", snapshotPath, err)
+	}
+	if !fi.IsDir() {
+		return nil, fmt.Errorf("snapshot %q must be a directory", snapshotPath)
+	}
+
+	fs := &fslocal.FS{
+		Dir:               snapshotPath,
+		MaxBytesPerSecond: *maxBytesPerSecond,
+	}
+	if err := fs.Init(); err != nil {
+		return nil, fmt.Errorf("cannot initialize fs: %s", err)
+	}
+	return fs, nil
+}
+
+func newDstFS() (common.RemoteFS, error) {
+	fs, err := actions.NewRemoteFS(*dst)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse `-dst`=%q: %s", *dst, err)
+	}
+	return fs, nil
+}
+
+func newOriginFS() (common.RemoteFS, error) {
+	if len(*origin) == 0 {
+		return nil, nil
+	}
+	fs, err := actions.NewRemoteFS(*origin)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse `-origin`=%q: %s", *origin, err)
+	}
+	return fs, nil
+}
--- a/app/vminsert/Makefile
+++ b/app/vminsert/Makefile
@@ -0,0 +1,36 @@
+# All these commands must run from repository root.
+
+run-vminsert:
+	APP_NAME=vminsert \
+	ARGS='-storageNode=localhost:8400' \
+	$(MAKE) run-via-docker
+
+vminsert:
+	APP_NAME=vminsert $(MAKE) app-local
+
+vminsert-race:
+	APP_NAME=vminsert RACE=-race $(MAKE) app-local
+
+vminsert-prod:
+	APP_NAME=vminsert $(MAKE) app-via-docker
+
+vminsert-prod-race:
+	APP_NAME=vminsert RACE=-race $(MAKE) app-via-docker
+
+vminsert-pure:
+	APP_NAME=vminsert $(MAKE) app-local-pure
+
+vminsert-pure-prod:
+	APP_NAME=vminsert APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
+
+package-vminsert:
+	APP_NAME=vminsert $(MAKE) package-via-docker
+
+package-vminsert-race:
+	APP_NAME=vminsert RACE=-race $(MAKE) package-via-docker
+
+publish-vminsert:
+	APP_NAME=vminsert $(MAKE) publish-via-docker
+
+publish-vminsert-race:
+	APP_NAME=vminsert RACE=-race $(MAKE) publish-via-docker
--- a/app/vminsert/README.md
+++ b/app/vminsert/README.md
@@ -1 +1 @@
-`vminsert` routes the ingested data to `vmstorage`.
+`vminsert` routes the ingested data to `vmstorage` nodes.
--- a/app/vminsert/common/gzip_reader.go
+++ b/app/vminsert/common/gzip_reader.go
@@ -0,0 +1,30 @@
+package common
+
+import (
+	"compress/gzip"
+	"io"
+	"sync"
+)
+
+// GetGzipReader returns new gzip reader from the pool.
+//
+// Return back the gzip reader when it no longer needed with PutGzipReader.
+func GetGzipReader(r io.Reader) (*gzip.Reader, error) {
+	v := gzipReaderPool.Get()
+	if v == nil {
+		return gzip.NewReader(r)
+	}
+	zr := v.(*gzip.Reader)
+	if err := zr.Reset(r); err != nil {
+		return nil, err
+	}
+	return zr, nil
+}
+
+// PutGzipReader returns back gzip reader obtained via GetGzipReader.
+func PutGzipReader(zr *gzip.Reader) {
+	_ = zr.Close()
+	gzipReaderPool.Put(zr)
+}
+
+var gzipReaderPool sync.Pool
--- a/app/vminsert/common/insert_ctx.go
+++ b/app/vminsert/common/insert_ctx.go
@@ -1,106 +0,0 @@
-package common
-
-import (
-	"fmt"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
-)
-
-// InsertCtx contains common bits for data points insertion.
-type InsertCtx struct {
-	Labels []prompb.Label
-
-	mrs            []storage.MetricRow
-	metricNamesBuf []byte
-}
-
-// Reset resets ctx for future fill with rowsLen rows.
-func (ctx *InsertCtx) Reset(rowsLen int) {
-	for _, label := range ctx.Labels {
-		label.Name = nil
-		label.Value = nil
-	}
-	ctx.Labels = ctx.Labels[:0]
-
-	for i := range ctx.mrs {
-		mr := &ctx.mrs[i]
-		mr.MetricNameRaw = nil
-	}
-	ctx.mrs = ctx.mrs[:0]
-
-	if n := rowsLen - cap(ctx.mrs); n > 0 {
-		ctx.mrs = append(ctx.mrs[:cap(ctx.mrs)], make([]storage.MetricRow, n)...)
-	}
-	ctx.mrs = ctx.mrs[:rowsLen]
-	ctx.metricNamesBuf = ctx.metricNamesBuf[:0]
-}
-
-func (ctx *InsertCtx) marshalMetricNameRaw(prefix []byte, labels []prompb.Label) []byte {
-	start := len(ctx.metricNamesBuf)
-	ctx.metricNamesBuf = append(ctx.metricNamesBuf, prefix...)
-	ctx.metricNamesBuf = storage.MarshalMetricNameRaw(ctx.metricNamesBuf, labels)
-	metricNameRaw := ctx.metricNamesBuf[start:]
-	return metricNameRaw[:len(metricNameRaw):len(metricNameRaw)]
-}
-
-// WriteDataPoint writes (timestamp, value) with the given prefix and lables into ctx buffer.
-func (ctx *InsertCtx) WriteDataPoint(prefix []byte, labels []prompb.Label, timestamp int64, value float64) {
-	metricNameRaw := ctx.marshalMetricNameRaw(prefix, labels)
-	ctx.addRow(metricNameRaw, timestamp, value)
-}
-
-// WriteDataPointExt writes (timestamp, value) with the given metricNameRaw and labels into ctx buffer.
-//
-// It returns metricNameRaw for the given labels if len(metricNameRaw) == 0.
-func (ctx *InsertCtx) WriteDataPointExt(metricNameRaw []byte, labels []prompb.Label, timestamp int64, value float64) []byte {
-	if len(metricNameRaw) == 0 {
-		metricNameRaw = ctx.marshalMetricNameRaw(nil, labels)
-	}
-	ctx.addRow(metricNameRaw, timestamp, value)
-	return metricNameRaw
-}
-
-func (ctx *InsertCtx) addRow(metricNameRaw []byte, timestamp int64, value float64) {
-	mrs := ctx.mrs
-	if cap(mrs) > len(mrs) {
-		mrs = mrs[:len(mrs)+1]
-	} else {
-		mrs = append(mrs, storage.MetricRow{})
-	}
-	mr := &mrs[len(mrs)-1]
-	ctx.mrs = mrs
-	mr.MetricNameRaw = metricNameRaw
-	mr.Timestamp = timestamp
-	mr.Value = value
-}
-
-// AddLabel adds (name, value) label to ctx.Labels.
-//
-// name and value must exist until ctx.Labels is used.
-func (ctx *InsertCtx) AddLabel(name, value string) {
-	labels := ctx.Labels
-	if cap(labels) > len(labels) {
-		labels = labels[:len(labels)+1]
-	} else {
-		labels = append(labels, prompb.Label{})
-	}
-	label := &labels[len(labels)-1]
-
-	// Do not copy name and value contents for performance reasons.
-	// This reduces GC overhead on the number of objects and allocations.
-	label.Name = bytesutil.ToUnsafeBytes(name)
-	label.Value = bytesutil.ToUnsafeBytes(value)
-
-	ctx.Labels = labels
-}
-
-// FlushBufs flushes buffered rows to the underlying storage.
-func (ctx *InsertCtx) FlushBufs() error {
-	if err := vmstorage.AddRows(ctx.mrs); err != nil {
-		return fmt.Errorf("cannot store metrics: %s", err)
-	}
-	return nil
-}
--- a/app/vminsert/common/lines_reader.go
+++ b/app/vminsert/common/lines_reader.go
@@ -0,0 +1,79 @@
+package common
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+)
+
+// The maximum size of a single line returned by ReadLinesBlock.
+const maxLineSize = 256 * 1024
+
+// Default size in bytes of a single block returned by ReadLinesBlock.
+const defaultBlockSize = 64 * 1024
+
+// ReadLinesBlock reads a block of lines delimited by '\n' from tailBuf and r into dstBuf.
+//
+// Trailing chars after the last newline are put into tailBuf.
+//
+// Returns (dstBuf, tailBuf).
+func ReadLinesBlock(r io.Reader, dstBuf, tailBuf []byte) ([]byte, []byte, error) {
+	return ReadLinesBlockExt(r, dstBuf, tailBuf, maxLineSize)
+}
+
+// ReadLinesBlockExt reads a block of lines delimited by '\n' from tailBuf and r into dstBuf.
+//
+// Trailing chars after the last newline are put into tailBuf.
+//
+// Returns (dstBuf, tailBuf).
+//
+// maxLineLen limits the maximum length of a single line.
+func ReadLinesBlockExt(r io.Reader, dstBuf, tailBuf []byte, maxLineLen int) ([]byte, []byte, error) {
+	if cap(dstBuf) < defaultBlockSize {
+		dstBuf = bytesutil.Resize(dstBuf, defaultBlockSize)
+	}
+	dstBuf = append(dstBuf[:0], tailBuf...)
+	tailBuf = tailBuf[:0]
+again:
+	n, err := r.Read(dstBuf[len(dstBuf):cap(dstBuf)])
+	// Check for error only if zero bytes read from r, i.e. no forward progress made.
+	// Otherwise process the read data.
+	if n == 0 {
+		if err == nil {
+			return dstBuf, tailBuf, fmt.Errorf("no forward progress made")
+		}
+		if err == io.EOF && len(dstBuf) > 0 {
+			// Missing newline in the end of stream. This is OK,
+			// so suppress io.EOF for now. It will be returned during the next
+			// call to ReadLinesBlock.
+			// This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/60 .
+			return dstBuf, tailBuf, nil
+		}
+		return dstBuf, tailBuf, err
+	}
+	dstBuf = dstBuf[:len(dstBuf)+n]
+
+	// Search for the last newline in dstBuf and put the rest into tailBuf.
+	nn := bytes.LastIndexByte(dstBuf[len(dstBuf)-n:], '\n')
+	if nn < 0 {
+		// Didn't found at least a single line.
+		if len(dstBuf) > maxLineLen {
+			return dstBuf, tailBuf, fmt.Errorf("too long line: more than %d bytes", maxLineLen)
+		}
+		if cap(dstBuf) < 2*len(dstBuf) {
+			// Increase dsbBuf capacity, so more data could be read into it.
+			dstBufLen := len(dstBuf)
+			dstBuf = bytesutil.Resize(dstBuf, 2*cap(dstBuf))
+			dstBuf = dstBuf[:dstBufLen]
+		}
+		goto again
+	}
+
+	// Found at least a single line. Return it.
+	nn += len(dstBuf) - n
+	tailBuf = append(tailBuf[:0], dstBuf[nn+1:]...)
+	dstBuf = dstBuf[:nn]
+	return dstBuf, tailBuf, nil
+}
--- a/app/vminsert/common/lines_reader_test.go
+++ b/app/vminsert/common/lines_reader_test.go
@@ -0,0 +1,213 @@
+package common
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"reflect"
+	"testing"
+)
+
+func TestReadLinesBlockFailure(t *testing.T) {
+	f := func(s string) {
+		t.Helper()
+		r := bytes.NewBufferString(s)
+		if _, _, err := ReadLinesBlock(r, nil, nil); err == nil {
+			t.Fatalf("expecting non-nil error")
+		}
+		sbr := &singleByteReader{
+			b: []byte(s),
+		}
+		if _, _, err := ReadLinesBlock(sbr, nil, nil); err == nil {
+			t.Fatalf("expecting non-nil error")
+		}
+		fr := &failureReader{}
+		if _, _, err := ReadLinesBlock(fr, nil, nil); err == nil {
+			t.Fatalf("expecting non-nil error")
+		}
+	}
+
+	// empty string
+	f("")
+
+	// too long string
+	b := make([]byte, maxLineSize+1)
+	f(string(b))
+}
+
+type failureReader struct{}
+
+func (fr *failureReader) Read(p []byte) (int, error) {
+	return 0, fmt.Errorf("some error")
+}
+
+func TestReadLinesBlockMultiLinesSingleByteReader(t *testing.T) {
+	f := func(s string, linesExpected []string) {
+		t.Helper()
+
+		r := &singleByteReader{
+			b: []byte(s),
+		}
+		var err error
+		var dstBuf, tailBuf []byte
+		var lines []string
+		for {
+			dstBuf, tailBuf, err = ReadLinesBlock(r, dstBuf, tailBuf)
+			if err != nil {
+				if err == io.EOF {
+					break
+				}
+				t.Fatalf("unexpected error in ReadLinesBlock(%q): %s", s, err)
+			}
+			lines = append(lines, string(dstBuf))
+		}
+		if !reflect.DeepEqual(lines, linesExpected) {
+			t.Fatalf("unexpected lines after reading %q: got %q; want %q", s, lines, linesExpected)
+		}
+	}
+
+	f("", nil)
+	f("foo", []string{"foo"})
+	f("foo\n", []string{"foo"})
+	f("foo\nbar", []string{"foo", "bar"})
+	f("\nfoo\nbar", []string{"", "foo", "bar"})
+	f("\nfoo\nbar\n", []string{"", "foo", "bar"})
+	f("\nfoo\nbar\n\n", []string{"", "foo", "bar", ""})
+}
+
+func TestReadLinesBlockMultiLinesBytesBuffer(t *testing.T) {
+	f := func(s string, linesExpected []string) {
+		t.Helper()
+
+		r := bytes.NewBufferString(s)
+		var err error
+		var dstBuf, tailBuf []byte
+		var lines []string
+		for {
+			dstBuf, tailBuf, err = ReadLinesBlock(r, dstBuf, tailBuf)
+			if err != nil {
+				if err == io.EOF {
+					break
+				}
+				t.Fatalf("unexpected error in ReadLinesBlock(%q): %s", s, err)
+			}
+			lines = append(lines, string(dstBuf))
+		}
+		if !reflect.DeepEqual(lines, linesExpected) {
+			t.Fatalf("unexpected lines after reading %q: got %q; want %q", s, lines, linesExpected)
+		}
+	}
+
+	f("", nil)
+	f("foo", []string{"foo"})
+	f("foo\n", []string{"foo"})
+	f("foo\nbar", []string{"foo", "bar"})
+	f("\nfoo\nbar", []string{"\nfoo", "bar"})
+	f("\nfoo\nbar\n", []string{"\nfoo\nbar"})
+	f("\nfoo\nbar\n\n", []string{"\nfoo\nbar\n"})
+}
+
+func TestReadLinesBlockSuccessSingleByteReader(t *testing.T) {
+	f := func(s, dstBufExpected, tailBufExpected string) {
+		t.Helper()
+
+		r := &singleByteReader{
+			b: []byte(s),
+		}
+		dstBuf, tailBuf, err := ReadLinesBlock(r, nil, nil)
+		if err != nil {
+			t.Fatalf("unexpected error: %s", err)
+		}
+		if string(dstBuf) != dstBufExpected {
+			t.Fatalf("unexpected dstBuf; got %q; want %q; tailBuf=%q", dstBuf, dstBufExpected, tailBuf)
+		}
+		if string(tailBuf) != tailBufExpected {
+			t.Fatalf("unexpected tailBuf; got %q; want %q; dstBuf=%q", tailBuf, tailBufExpected, dstBuf)
+		}
+
+		// Verify the same with non-empty dstBuf and tailBuf
+		r = &singleByteReader{
+			b: []byte(s),
+		}
+		dstBuf, tailBuf, err = ReadLinesBlock(r, dstBuf, tailBuf[:0])
+		if err != nil {
+			t.Fatalf("non-empty bufs: unexpected error: %s", err)
+		}
+		if string(dstBuf) != dstBufExpected {
+			t.Fatalf("non-empty bufs: unexpected dstBuf; got %q; want %q; tailBuf=%q", dstBuf, dstBufExpected, tailBuf)
+		}
+		if string(tailBuf) != tailBufExpected {
+			t.Fatalf("non-empty bufs: unexpected tailBuf; got %q; want %q; dstBuf=%q", tailBuf, tailBufExpected, dstBuf)
+		}
+	}
+
+	f("\n", "", "")
+	f("foo\n", "foo", "")
+	f("\nfoo", "", "")
+	f("foo\nbar", "foo", "")
+	f("foo\nbar\nbaz", "foo", "")
+	f("foo", "foo", "")
+
+	// The maximum line size
+	b := make([]byte, maxLineSize+10)
+	b[maxLineSize] = '\n'
+	f(string(b), string(b[:maxLineSize]), "")
+}
+
+func TestReadLinesBlockSuccessBytesBuffer(t *testing.T) {
+	f := func(s, dstBufExpected, tailBufExpected string) {
+		t.Helper()
+
+		r := bytes.NewBufferString(s)
+		dstBuf, tailBuf, err := ReadLinesBlock(r, nil, nil)
+		if err != nil {
+			t.Fatalf("unexpected error: %s", err)
+		}
+		if string(dstBuf) != dstBufExpected {
+			t.Fatalf("unexpected dstBuf; got %q; want %q; tailBuf=%q", dstBuf, dstBufExpected, tailBuf)
+		}
+		if string(tailBuf) != tailBufExpected {
+			t.Fatalf("unexpected tailBuf; got %q; want %q; dstBuf=%q", tailBuf, tailBufExpected, dstBuf)
+		}
+
+		// Verify the same with non-empty dstBuf and tailBuf
+		r = bytes.NewBufferString(s)
+		dstBuf, tailBuf, err = ReadLinesBlock(r, dstBuf, tailBuf[:0])
+		if err != nil {
+			t.Fatalf("non-empty bufs: unexpected error: %s", err)
+		}
+		if string(dstBuf) != dstBufExpected {
+			t.Fatalf("non-empty bufs: unexpected dstBuf; got %q; want %q; tailBuf=%q", dstBuf, dstBufExpected, tailBuf)
+		}
+		if string(tailBuf) != tailBufExpected {
+			t.Fatalf("non-empty bufs: unexpected tailBuf; got %q; want %q; dstBuf=%q", tailBuf, tailBufExpected, dstBuf)
+		}
+	}
+
+	f("\n", "", "")
+	f("foo\n", "foo", "")
+	f("\nfoo", "", "foo")
+	f("foo\nbar", "foo", "bar")
+	f("foo\nbar\nbaz", "foo\nbar", "baz")
+
+	// The maximum line size
+	b := make([]byte, maxLineSize+10)
+	b[maxLineSize] = '\n'
+	f(string(b), string(b[:maxLineSize]), string(b[maxLineSize+1:]))
+}
+
+type singleByteReader struct {
+	b []byte
+}
+
+func (sbr *singleByteReader) Read(p []byte) (int, error) {
+	if len(sbr.b) == 0 {
+		return 0, io.EOF
+	}
+	n := copy(p, sbr.b[:1])
+	sbr.b = sbr.b[n:]
+	if len(sbr.b) == 0 {
+		return n, io.EOF
+	}
+	return n, nil
+}
--- a/app/vminsert/concurrencylimiter/concurrencylimiter.go
+++ b/app/vminsert/concurrencylimiter/concurrencylimiter.go
@@ -1,34 +1,75 @@
 package concurrencylimiter

 import (
+	"flag"
 	"fmt"
+	"net/http"
 	"runtime"
 	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
+	"github.com/VictoriaMetrics/metrics"
 )

+var maxConcurrentInserts = flag.Int("maxConcurrentInserts", runtime.GOMAXPROCS(-1)*4, "The maximum number of concurrent inserts")
+
 var (
-	// ch is the channel for limiting concurrent inserts.
-	// Put an item into it before performing an insert and remove
-	// the item after the insert is complete.
-	ch = make(chan struct{}, runtime.GOMAXPROCS(-1)*2)
+	// ch is the channel for limiting concurrent calls to Do.
+	ch chan struct{}

 	// waitDuration is the amount of time to wait until at least a single
-	// concurrent insert out of cap(Ch) inserts is complete.
+	// concurrent Do call out of cap(ch) inserts is complete.
 	waitDuration = time.Second * 30
 )

+// Init initializes concurrencylimiter.
+//
+// Init must be called after flag.Parse call.
+func Init() {
+	ch = make(chan struct{}, *maxConcurrentInserts)
+}
+
 // Do calls f with the limited concurrency.
 func Do(f func() error) error {
-	// Limit the number of conurrent inserts in order to prevent from excess
+	// Limit the number of conurrent f calls in order to prevent from excess
 	// memory usage and CPU trashing.
-	t := time.NewTimer(waitDuration)
 	select {
 	case ch <- struct{}{}:
-		t.Stop()
+		err := f()
+		<-ch
+		return err
+	default:
+	}
+
+	// All the workers are busy.
+	// Sleep for up to waitDuration.
+	concurrencyLimitReached.Inc()
+	t := timerpool.Get(waitDuration)
+	select {
+	case ch <- struct{}{}:
+		timerpool.Put(t)
 		err := f()
 		<-ch
 		return err
 	case <-t.C:
-		return fmt.Errorf("the server is overloaded with %d concurrent inserts; either increase the number of CPUs or reduce the load", cap(ch))
+		timerpool.Put(t)
+		concurrencyLimitTimeout.Inc()
+		return &httpserver.ErrorWithStatusCode{
+			Err:        fmt.Errorf("the server is overloaded with %d concurrent inserts; either increase -maxConcurrentInserts or reduce the load", cap(ch)),
+			StatusCode: http.StatusServiceUnavailable,
+		}
 	}
 }
+
+var (
+	concurrencyLimitReached = metrics.NewCounter(`vm_concurrent_insert_limit_reached_total`)
+	concurrencyLimitTimeout = metrics.NewCounter(`vm_concurrent_insert_limit_timeout_total`)
+
+	_ = metrics.NewGauge(`vm_concurrent_insert_capacity`, func() float64 {
+		return float64(cap(ch))
+	})
+	_ = metrics.NewGauge(`vm_concurrent_insert_current`, func() float64 {
+		return float64(len(ch))
+	})
+)
--- a/app/vminsert/deployment/Dockerfile
+++ b/app/vminsert/deployment/Dockerfile
@@ -0,0 +1,5 @@
+FROM scratch
+COPY --from=local/certs:1.0.3 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+COPY bin/vminsert-prod .
+EXPOSE 8480
+ENTRYPOINT ["/vminsert-prod"]
--- a/app/vminsert/graphite/parser.go
+++ b/app/vminsert/graphite/parser.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"strings"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/fastjson/fastfloat"
 )

@@ -34,13 +36,8 @@ func (rs *Rows) Reset() {
 // See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
 //
 // s must be unchanged until rs is in use.
-func (rs *Rows) Unmarshal(s string) error {
-	var err error
-	rs.Rows, rs.tagsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
-	if err != nil {
-		return err
-	}
-	return err
+func (rs *Rows) Unmarshal(s string) {
+	rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
 }

 // Row is a single graphite row.
@@ -83,49 +80,61 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
 		tags := tagsPool[tagsStart:]
 		r.Tags = tags[:len(tags):len(tags)]
 	}
+	if len(r.Metric) == 0 {
+		return tagsPool, fmt.Errorf("metric cannot be empty")
+	}

 	n = strings.IndexByte(tail, ' ')
 	if n < 0 {
-		return tagsPool, fmt.Errorf("cannot find whitespace between value and timestamp in %q", s)
+		// There is no timestamp. Use default timestamp instead.
+		r.Value = fastfloat.ParseBestEffort(tail)
+		return tagsPool, nil
 	}
 	r.Value = fastfloat.ParseBestEffort(tail[:n])
 	r.Timestamp = fastfloat.ParseInt64BestEffort(tail[n+1:])
 	return tagsPool, nil
 }

-func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag, error) {
+func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
 	for len(s) > 0 {
 		n := strings.IndexByte(s, '\n')
-		if n == 0 {
-			// Skip empty line
-			s = s[1:]
-			continue
-		}
-		if cap(dst) > len(dst) {
-			dst = dst[:len(dst)+1]
-		} else {
-			dst = append(dst, Row{})
-		}
-		r := &dst[len(dst)-1]
 		if n < 0 {
 			// The last line.
-			var err error
-			tagsPool, err = r.unmarshal(s, tagsPool)
-			if err != nil {
-				return dst, tagsPool, err
-			}
-			return dst, tagsPool, nil
-		}
-		var err error
-		tagsPool, err = r.unmarshal(s[:n], tagsPool)
-		if err != nil {
-			return dst, tagsPool, err
+			return unmarshalRow(dst, s, tagsPool)
 		}
+		dst, tagsPool = unmarshalRow(dst, s[:n], tagsPool)
 		s = s[n+1:]
 	}
-	return dst, tagsPool, nil
+	return dst, tagsPool
 }

+func unmarshalRow(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
+	if len(s) > 0 && s[len(s)-1] == '\r' {
+		s = s[:len(s)-1]
+	}
+	if len(s) == 0 {
+		// Skip empty line
+		return dst, tagsPool
+	}
+
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	var err error
+	tagsPool, err = r.unmarshal(s, tagsPool)
+	if err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal Graphite line %q: %s", s, err)
+		invalidLines.Inc()
+	}
+	return dst, tagsPool
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="graphite"}`)
+
 func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
 	for {
 		if cap(dst) > len(dst) {
@@ -141,12 +150,20 @@ func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
 			if err := tag.unmarshal(s); err != nil {
 				return dst[:len(dst)-1], err
 			}
+			if len(tag.Key) == 0 || len(tag.Value) == 0 {
+				// Skip empty tag
+				dst = dst[:len(dst)-1]
+			}
 			return dst, nil
 		}
 		if err := tag.unmarshal(s[:n]); err != nil {
 			return dst[:len(dst)-1], err
 		}
 		s = s[n+1:]
+		if len(tag.Key) == 0 || len(tag.Value) == 0 {
+			// Skip empty tag
+			dst = dst[:len(dst)-1]
+		}
 	}
 }

@@ -168,9 +185,6 @@ func (t *Tag) unmarshal(s string) error {
 		return fmt.Errorf("missing tag value for %q", s)
 	}
 	t.Key = s[:n]
-	if len(t.Key) == 0 {
-		return fmt.Errorf("tag key cannot be empty for %q", s)
-	}
 	t.Value = s[n+1:]
 	return nil
 }
--- a/app/vminsert/graphite/parser_test.go
+++ b/app/vminsert/graphite/parser_test.go
@@ -9,48 +9,42 @@ func TestRowsUnmarshalFailure(t *testing.T) {
 	f := func(s string) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
 		}

 		// Try again
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
 		}
 	}

+	// Missing metric
+	f(" 123 455")
+
 	// Missing value
 	f("aaa")

-	// Missing timestamp
-	f("aaa 1123")
-
-	// Invalid multiline
-	f("aaa\nbbb 123 34")
-
 	// missing tag
 	f("aa; 12 34")

 	// missing tag value
 	f("aa;bb 23 34")
-	f("aa;=dsd 234 45")
 }

 func TestRowsUnmarshalSuccess(t *testing.T) {
 	f := func(s string, rowsExpected *Rows) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}

 		// Try unmarshaling again
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}
@@ -63,7 +57,9 @@ func TestRowsUnmarshalSuccess(t *testing.T) {

 	// Empty line
 	f("", &Rows{})
+	f("\r", &Rows{})
 	f("\n\n", &Rows{})
+	f("\n\r\n", &Rows{})

 	// Single line
 	f("foobar -123.456 789", &Rows{
@@ -81,6 +77,23 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 		}},
 	})

+	// Missing timestamp
+	f("aaa 1123", &Rows{
+		Rows: []Row{{
+			Metric: "aaa",
+			Value:  1123,
+		}},
+	})
+
+	// Timestamp bigger than 1<<31
+	f("aaa 1123 429496729600", &Rows{
+		Rows: []Row{{
+			Metric:    "aaa",
+			Value:     1123,
+			Timestamp: 429496729600,
+		}},
+	})
+
 	// Tags
 	f("foo;bar=baz 1 2", &Rows{
 		Rows: []Row{{
@@ -93,7 +106,8 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			Timestamp: 2,
 		}},
 	})
-	f("foo;bar=baz;aa=;x=y 1 2", &Rows{
+	// Empty tags
+	f("foo;bar=baz;aa=;x=y;=z 1 2", &Rows{
 		Rows: []Row{{
 			Metric: "foo",
 			Tags: []Tag{
@@ -101,10 +115,6 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 					Key:   "bar",
 					Value: "baz",
 				},
-				{
-					Key:   "aa",
-					Value: "",
-				},
 				{
 					Key:   "x",
 					Value: "y",
@@ -116,7 +126,27 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	})

 	// Multi lines
-	f("foo 0.3 2\nbar.baz 0.34 43\n", &Rows{
+	f("foo 0.3 2\naaa 3\nbar.baz 0.34 43\n", &Rows{
+		Rows: []Row{
+			{
+				Metric:    "foo",
+				Value:     0.3,
+				Timestamp: 2,
+			},
+			{
+				Metric: "aaa",
+				Value:  3,
+			},
+			{
+				Metric:    "bar.baz",
+				Value:     0.34,
+				Timestamp: 43,
+			},
+		},
+	})
+
+	// Multi lines with invalid line
+	f("foo 0.3 2\naaa\nbar.baz 0.34 43\n", &Rows{
 		Rows: []Row{
 			{
 				Metric:    "foo",
--- a/app/vminsert/graphite/parser_timing_test.go
+++ b/app/vminsert/graphite/parser_timing_test.go
@@ -16,8 +16,9 @@ cpu.usage_irq 0.34432 1234556768
 	b.RunParallel(func(pb *testing.PB) {
 		var rows Rows
 		for pb.Next() {
-			if err := rows.Unmarshal(s); err != nil {
-				panic(fmt.Errorf("cannot unmarshal %q: %s", s, err))
+			rows.Unmarshal(s)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of rows unmarshaled: got %d; want 4", len(rows.Rows)))
 			}
 		}
 	})
--- a/app/vminsert/graphite/request_handler.go
+++ b/app/vminsert/graphite/request_handler.go
@@ -1,7 +1,6 @@
 package graphite

 import (
-	"bytes"
 	"fmt"
 	"io"
 	"net"
@@ -11,110 +10,125 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
 	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/fastjson/fastfloat"
 )

-var rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="graphite"}`)
+var (
+	rowsInserted  = tenantmetrics.NewCounterMap(`vm_rows_inserted_total{type="graphite"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="graphite"}`)
+)

 // insertHandler processes remote write for graphite plaintext protocol.
 //
 // See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
-func insertHandler(r io.Reader) error {
+func insertHandler(at *auth.Token, r io.Reader) error {
 	return concurrencylimiter.Do(func() error {
-		return insertHandlerInternal(r)
+		return insertHandlerInternal(at, r)
 	})
 }

-func insertHandlerInternal(r io.Reader) error {
+func insertHandlerInternal(at *auth.Token, r io.Reader) error {
 	ctx := getPushCtx()
 	defer putPushCtx(ctx)
 	for ctx.Read(r) {
-		if err := ctx.InsertRows(); err != nil {
+		if err := ctx.InsertRows(at); err != nil {
 			return err
 		}
 	}
 	return ctx.Error()
 }

-func (ctx *pushCtx) InsertRows() error {
+func (ctx *pushCtx) InsertRows(at *auth.Token) error {
 	rows := ctx.Rows.Rows
 	ic := &ctx.Common
-	ic.Reset(len(rows))
+	ic.Reset()
+	atCopy := *at
 	for i := range rows {
 		r := &rows[i]
 		ic.Labels = ic.Labels[:0]
 		ic.AddLabel("", r.Metric)
 		for j := range r.Tags {
 			tag := &r.Tags[j]
+			if atCopy.AccountID == 0 {
+				// Multi-tenancy support via custom tags.
+				// Do not allow overriding AccountID and ProjectID from atCopy for security reasons.
+				if tag.Key == "VictoriaMetrics_AccountID" {
+					atCopy.AccountID = uint32(fastfloat.ParseUint64BestEffort(tag.Value))
+				}
+				if atCopy.ProjectID == 0 && tag.Key == "VictoriaMetrics_ProjectID" {
+					atCopy.ProjectID = uint32(fastfloat.ParseUint64BestEffort(tag.Value))
+				}
+			}
 			ic.AddLabel(tag.Key, tag.Value)
 		}
-		ic.WriteDataPoint(nil, ic.Labels, r.Timestamp, r.Value)
+		if err := ic.WriteDataPoint(&atCopy, ic.Labels, r.Timestamp, r.Value); err != nil {
+			return err
+		}
 	}
-	rowsInserted.Add(len(rows))
+	// Assume that all the rows for a single connection belong to the same (AccountID, ProjectID).
+	rowsInserted.Get(&atCopy).Add(len(rows))
+	rowsPerInsert.Update(float64(len(rows)))
 	return ic.FlushBufs()
 }

-const maxReadPacketSize = 4 * 1024 * 1024
-
 const flushTimeout = 3 * time.Second

 func (ctx *pushCtx) Read(r io.Reader) bool {
-	graphiteReadCalls.Inc()
+	readCalls.Inc()
 	if ctx.err != nil {
 		return false
 	}
 	if c, ok := r.(net.Conn); ok {
 		if err := c.SetReadDeadline(time.Now().Add(flushTimeout)); err != nil {
-			graphiteReadErrors.Inc()
+			readErrors.Inc()
 			ctx.err = fmt.Errorf("cannot set read deadline: %s", err)
 			return false
 		}
 	}
-	lr := io.LimitReader(r, maxReadPacketSize)
-	ctx.reqBuf.Reset()
-	ctx.reqBuf.B = append(ctx.reqBuf.B[:0], ctx.tailBuf...)
-	n, err := io.CopyBuffer(&ctx.reqBuf, lr, ctx.copyBuf[:])
-	if err != nil {
-		if ne, ok := err.(net.Error); ok && ne.Timeout() {
+	ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlock(r, ctx.reqBuf, ctx.tailBuf)
+	if ctx.err != nil {
+		if ne, ok := ctx.err.(net.Error); ok && ne.Timeout() {
 			// Flush the read data on timeout and try reading again.
+			ctx.err = nil
 		} else {
-			graphiteReadErrors.Inc()
-			ctx.err = fmt.Errorf("cannot read graphite plaintext protocol data: %s", err)
+			if ctx.err != io.EOF {
+				readErrors.Inc()
+				ctx.err = fmt.Errorf("cannot read graphite plaintext protocol data: %s", ctx.err)
+			}
 			return false
 		}
-	} else if n < maxReadPacketSize {
-		// Mark the end of stream.
-		ctx.err = io.EOF
+	}
+	ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))
+
+	// Fill missing timestamps with the current timestamp rounded to seconds.
+	currentTimestamp := time.Now().Unix()
+	rows := ctx.Rows.Rows
+	for i := range rows {
+		r := &rows[i]
+		if r.Timestamp == 0 {
+			r.Timestamp = currentTimestamp
+		}
 	}

-	// Parse all the rows until the last newline in ctx.reqBuf.B
-	nn := bytes.LastIndexByte(ctx.reqBuf.B, '\n')
-	ctx.tailBuf = ctx.tailBuf[:0]
-	if nn >= 0 {
-		ctx.tailBuf = append(ctx.tailBuf[:0], ctx.reqBuf.B[nn+1:]...)
-		ctx.reqBuf.B = ctx.reqBuf.B[:nn]
-	}
-	if err = ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf.B)); err != nil {
-		graphiteUnmarshalErrors.Inc()
-		ctx.err = fmt.Errorf("cannot unmarshal graphite plaintext protocol data with size %d: %s", len(ctx.reqBuf.B), err)
-		return false
+	// Convert timestamps from seconds to milliseconds.
+	for i := range rows {
+		rows[i].Timestamp *= 1e3
 	}

-	// Convert timestamps from seconds to milliseconds
-	for i := range ctx.Rows.Rows {
-		ctx.Rows.Rows[i].Timestamp *= 1e3
-	}
 	return true
 }

 type pushCtx struct {
 	Rows   Rows
-	Common common.InsertCtx
+	Common netstorage.InsertCtx

-	reqBuf  bytesutil.ByteBuffer
+	reqBuf  []byte
 	tailBuf []byte
-	copyBuf [16 * 1024]byte

 	err error
 }
@@ -128,17 +142,16 @@ func (ctx *pushCtx) Error() error {

 func (ctx *pushCtx) reset() {
 	ctx.Rows.Reset()
-	ctx.Common.Reset(0)
-	ctx.reqBuf.Reset()
+	ctx.Common.Reset()
+	ctx.reqBuf = ctx.reqBuf[:0]
 	ctx.tailBuf = ctx.tailBuf[:0]

 	ctx.err = nil
 }

 var (
-	graphiteReadCalls       = metrics.NewCounter(`vm_read_calls_total{name="graphite"}`)
-	graphiteReadErrors      = metrics.NewCounter(`vm_read_errors_total{name="graphite"}`)
-	graphiteUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="graphite"}`)
+	readCalls  = metrics.NewCounter(`vm_read_calls_total{name="graphite"}`)
+	readErrors = metrics.NewCounter(`vm_read_errors_total{name="graphite"}`)
 )

 func getPushCtx() *pushCtx {
--- a/app/vminsert/graphite/server.go
+++ b/app/vminsert/graphite/server.go
@@ -7,8 +7,10 @@ import (
 	"sync"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
 	"github.com/VictoriaMetrics/metrics"
 )

@@ -23,7 +25,7 @@ var (
 // Serve starts graphite server on the given addr.
 func Serve(addr string) {
 	logger.Infof("starting TCP Graphite server at %q", addr)
-	lnTCP, err := net.Listen("tcp4", addr)
+	lnTCP, err := netutil.NewTCPListener("graphite", addr)
 	if err != nil {
 		logger.Fatalf("cannot start TCP Graphite server at %q: %s", addr, err)
 	}
@@ -70,7 +72,8 @@ func serveTCP(ln net.Listener) {
 		}
 		go func() {
 			writeRequestsTCP.Inc()
-			if err := insertHandler(c); err != nil {
+			var at auth.Token // TODO: properly initialize auth token
+			if err := insertHandler(&at, c); err != nil {
 				writeErrorsTCP.Inc()
 				logger.Errorf("error in TCP Graphite conn %q<->%q: %s", c.LocalAddr(), c.RemoteAddr(), err)
 			}
@@ -88,6 +91,7 @@ func serveUDP(ln net.PacketConn) {
 			defer wg.Done()
 			var bb bytesutil.ByteBuffer
 			bb.B = bytesutil.Resize(bb.B, 64*1024)
+			var at auth.Token // TODO: properly initialize auth token
 			for {
 				bb.Reset()
 				bb.B = bb.B[:cap(bb.B)]
@@ -108,7 +112,7 @@ func serveUDP(ln net.PacketConn) {
 				}
 				bb.B = bb.B[:n]
 				writeRequestsUDP.Inc()
-				if err := insertHandler(bb.NewReader()); err != nil {
+				if err := insertHandler(&at, bb.NewReader()); err != nil {
 					writeErrorsUDP.Inc()
 					logger.Errorf("error in UDP Graphite conn %q<->%q: %s", ln.LocalAddr(), addr, err)
 					continue
--- a/app/vminsert/influx/parser.go
+++ b/app/vminsert/influx/parser.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"strings"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/fastjson/fastfloat"
 )

@@ -41,13 +43,8 @@ func (rs *Rows) Reset() {
 // See https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/
 //
 // s must be unchanged until rs is in use.
-func (rs *Rows) Unmarshal(s string) error {
-	var err error
-	rs.Rows, rs.tagsPool, rs.fieldsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0], rs.fieldsPool[:0])
-	if err != nil {
-		return err
-	}
-	return err
+func (rs *Rows) Unmarshal(s string) {
+	rs.Rows, rs.tagsPool, rs.fieldsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0], rs.fieldsPool[:0])
 }

 // Row is a single influx row.
@@ -65,9 +62,8 @@ func (r *Row) reset() {
 	r.Timestamp = 0
 }

-func (r *Row) unmarshal(s string, tagsPool []Tag, fieldsPool []Field) ([]Tag, []Field, error) {
+func (r *Row) unmarshal(s string, tagsPool []Tag, fieldsPool []Field, noEscapeChars bool) ([]Tag, []Field, error) {
 	r.reset()
-	noEscapeChars := strings.IndexByte(s, '\\') < 0
 	n := nextUnescapedChar(s, ' ', noEscapeChars)
 	if n < 0 {
 		return tagsPool, fieldsPool, fmt.Errorf("cannot find Whitespace I in %q", s)
@@ -89,9 +85,7 @@ func (r *Row) unmarshal(s string, tagsPool []Tag, fieldsPool []Field) ([]Tag, []
 		measurementTags = measurementTags[:n]
 	}
 	r.Measurement = unescapeTagValue(measurementTags, noEscapeChars)
-	if len(r.Measurement) == 0 {
-		return tagsPool, fieldsPool, fmt.Errorf("measurement cannot be empty. measurementTags=%q", s)
-	}
+	// Allow empty r.Measurement. In this case metric name is constructed directly from field keys.

 	// Parse fields
 	fieldsStart := len(fieldsPool)
@@ -141,9 +135,6 @@ func (tag *Tag) unmarshal(s string, noEscapeChars bool) error {
 		return fmt.Errorf("missing tag value for %q", s)
 	}
 	tag.Key = unescapeTagValue(s[:n], noEscapeChars)
-	if len(tag.Key) == 0 {
-		return fmt.Errorf("tag key cannot be empty")
-	}
 	tag.Value = unescapeTagValue(s[n+1:], noEscapeChars)
 	return nil
 }
@@ -177,39 +168,51 @@ func (f *Field) unmarshal(s string, noEscapeChars, hasQuotedFields bool) error {
 	return nil
 }

-func unmarshalRows(dst []Row, s string, tagsPool []Tag, fieldsPool []Field) ([]Row, []Tag, []Field, error) {
+func unmarshalRows(dst []Row, s string, tagsPool []Tag, fieldsPool []Field) ([]Row, []Tag, []Field) {
+	noEscapeChars := strings.IndexByte(s, '\\') < 0
 	for len(s) > 0 {
 		n := strings.IndexByte(s, '\n')
-		if n == 0 {
-			// Skip empty line
-			s = s[1:]
-			continue
-		}
-		if cap(dst) > len(dst) {
-			dst = dst[:len(dst)+1]
-		} else {
-			dst = append(dst, Row{})
-		}
-		r := &dst[len(dst)-1]
 		if n < 0 {
 			// The last line.
-			var err error
-			tagsPool, fieldsPool, err = r.unmarshal(s, tagsPool, fieldsPool)
-			if err != nil {
-				return dst, tagsPool, fieldsPool, err
-			}
-			return dst, tagsPool, fieldsPool, nil
-		}
-		var err error
-		tagsPool, fieldsPool, err = r.unmarshal(s[:n], tagsPool, fieldsPool)
-		if err != nil {
-			return dst, tagsPool, fieldsPool, err
+			return unmarshalRow(dst, s, tagsPool, fieldsPool, noEscapeChars)
 		}
+		dst, tagsPool, fieldsPool = unmarshalRow(dst, s[:n], tagsPool, fieldsPool, noEscapeChars)
 		s = s[n+1:]
 	}
-	return dst, tagsPool, fieldsPool, nil
+	return dst, tagsPool, fieldsPool
 }

+func unmarshalRow(dst []Row, s string, tagsPool []Tag, fieldsPool []Field, noEscapeChars bool) ([]Row, []Tag, []Field) {
+	if len(s) > 0 && s[len(s)-1] == '\r' {
+		s = s[:len(s)-1]
+	}
+	if len(s) == 0 {
+		// Skip empty line
+		return dst, tagsPool, fieldsPool
+	}
+	if s[0] == '#' {
+		// Skip comment
+		return dst, tagsPool, fieldsPool
+	}
+
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	var err error
+	tagsPool, fieldsPool, err = r.unmarshal(s, tagsPool, fieldsPool, noEscapeChars)
+	if err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal Influx line %q: %s; skipping it", s, err)
+		invalidLines.Inc()
+	}
+	return dst, tagsPool, fieldsPool
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="influx"}`)
+
 func unmarshalTags(dst []Tag, s string, noEscapeChars bool) ([]Tag, error) {
 	for {
 		if cap(dst) > len(dst) {
@@ -221,14 +224,22 @@ func unmarshalTags(dst []Tag, s string, noEscapeChars bool) ([]Tag, error) {
 		n := nextUnescapedChar(s, ',', noEscapeChars)
 		if n < 0 {
 			if err := tag.unmarshal(s, noEscapeChars); err != nil {
-				return dst, err
+				return dst[:len(dst)-1], err
+			}
+			if len(tag.Key) == 0 || len(tag.Value) == 0 {
+				// Skip empty tag
+				dst = dst[:len(dst)-1]
 			}
 			return dst, nil
 		}
 		if err := tag.unmarshal(s[:n], noEscapeChars); err != nil {
-			return dst, err
+			return dst[:len(dst)-1], err
 		}
 		s = s[n+1:]
+		if len(tag.Key) == 0 || len(tag.Value) == 0 {
+			// Skip empty tag
+			dst = dst[:len(dst)-1]
+		}
 	}
 }

@@ -293,8 +304,10 @@ func parseFieldValue(s string, hasQuotedFields bool) (float64, error) {
 		if len(s) < 2 || s[len(s)-1] != '"' {
 			return 0, fmt.Errorf("missing closing quote for quoted field value %s", s)
 		}
-		// Quoted string is translated to empty value.
-		return 0, nil
+		// Try converting quoted string to number, since sometimes Influx agents
+		// send numbers as strings.
+		s = s[1 : len(s)-1]
+		return fastfloat.ParseBestEffort(s), nil
 	}
 	ch := s[len(s)-1]
 	if ch == 'i' {
--- a/app/vminsert/influx/parser_test.go
+++ b/app/vminsert/influx/parser_test.go
@@ -74,19 +74,18 @@ func TestRowsUnmarshalFailure(t *testing.T) {
 	f := func(s string) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("expecting zero rows; got %d rows", len(rows.Rows))
 		}

 		// Try again
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("expecting zero rows; got %d rows", len(rows.Rows))
 		}
 	}

-	// Missing measurement
-	f(",foo=bar baz=123")
-
 	// No fields
 	f("foo")
 	f("foo,bar=baz 1234")
@@ -94,12 +93,8 @@ func TestRowsUnmarshalFailure(t *testing.T) {
 	// Missing tag value
 	f("foo,bar")
 	f("foo,bar baz")
-	f("foo,bar= baz")
 	f("foo,bar=123, 123")

-	// Missing tag name
-	f("foo,=bar baz=234")
-
 	// Missing field value
 	f("foo bar")
 	f("foo bar=")
@@ -122,17 +117,13 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	f := func(s string, rowsExpected *Rows) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}

 		// Try unmarshaling again
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}
@@ -146,6 +137,36 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	// Empty line
 	f("", &Rows{})
 	f("\n\n", &Rows{})
+	f("\n\r\n", &Rows{})
+
+	// Comment
+	f("\n# foobar\n", &Rows{})
+	f("#foobar baz", &Rows{})
+	f("#foobar baz\n#sss", &Rows{})
+
+	// Missing measurement
+	f(" baz=123", &Rows{
+		Rows: []Row{{
+			Measurement: "",
+			Fields: []Field{{
+				Key:   "baz",
+				Value: 123,
+			}},
+		}},
+	})
+	f(",foo=bar baz=123", &Rows{
+		Rows: []Row{{
+			Measurement: "",
+			Tags: []Tag{{
+				Key:   "foo",
+				Value: "bar",
+			}},
+			Fields: []Field{{
+				Key:   "baz",
+				Value: 123,
+			}},
+		}},
+	})

 	// Minimal line without tags and timestamp
 	f("foo bar=123", &Rows{
@@ -157,6 +178,15 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			}},
 		}},
 	})
+	f("# comment\nfoo bar=123\r\n#comment2 sdsf dsf", &Rows{
+		Rows: []Row{{
+			Measurement: "foo",
+			Fields: []Field{{
+				Key:   "bar",
+				Value: 123,
+			}},
+		}},
+	})
 	f("foo bar=123\n", &Rows{
 		Rows: []Row{{
 			Measurement: "foo",
@@ -216,7 +246,7 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	})

 	// Line with empty tag values
-	f("foo,tag1=xyz,tagN=,tag2=43as bar=123", &Rows{
+	f("foo,tag1=xyz,tagN=,tag2=43as,=xxx bar=123", &Rows{
 		Rows: []Row{{
 			Measurement: "foo",
 			Tags: []Tag{
@@ -224,10 +254,6 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 					Key:   "tag1",
 					Value: "xyz",
 				},
-				{
-					Key:   "tagN",
-					Value: "",
-				},
 				{
 					Key:   "tag2",
 					Value: "43as",
@@ -241,17 +267,27 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	})

 	// Line with multiple tags, multiple fields and timestamp
-	f(`system,host=ip-172-16-10-144 uptime_format="3 days, 21:01" 1557761040000000000`, &Rows{
+	f(`system,host=ip-172-16-10-144 uptime_format="3 days, 21:01",quoted_float="-1.23",quoted_int="123" 1557761040000000000`, &Rows{
 		Rows: []Row{{
 			Measurement: "system",
 			Tags: []Tag{{
 				Key:   "host",
 				Value: "ip-172-16-10-144",
 			}},
-			Fields: []Field{{
-				Key:   "uptime_format",
-				Value: 0,
-			}},
+			Fields: []Field{
+				{
+					Key:   "uptime_format",
+					Value: 0,
+				},
+				{
+					Key:   "quoted_float",
+					Value: -1.23,
+				},
+				{
+					Key:   "quoted_int",
+					Value: 123,
+				},
+			},
 			Timestamp: 1557761040000000000,
 		}},
 	})
@@ -299,11 +335,11 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	})

 	// Escape chars
-	f(`fo\,bar\=baz,x\==\\a\,\=\q\  \\\a\=\,=4.34`, &Rows{
+	f(`fo\,bar\=baz,x\=\b=\\a\,\=\q\  \\\a\=\,=4.34`, &Rows{
 		Rows: []Row{{
 			Measurement: `fo,bar=baz`,
 			Tags: []Tag{{
-				Key:   `x=`,
+				Key:   `x=\b`,
 				Value: `\a,=\q `,
 			}},
 			Fields: []Field{{
@@ -312,6 +348,36 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			}},
 		}},
 	})
+	// Test case from https://community.librenms.org/t/integration-with-victoriametrics/9689
+	f("ports,foo=a,bar=et\\ +\\ V,baz=ype INDISCARDS=245333676,OUTDISCARDS=1798680", &Rows{
+		Rows: []Row{{
+			Measurement: "ports",
+			Tags: []Tag{
+				{
+					Key:   "foo",
+					Value: "a",
+				},
+				{
+					Key:   "bar",
+					Value: "et + V",
+				},
+				{
+					Key:   "baz",
+					Value: "ype",
+				},
+			},
+			Fields: []Field{
+				{
+					Key:   "INDISCARDS",
+					Value: 245333676,
+				},
+				{
+					Key:   "OUTDISCARDS",
+					Value: 1798680,
+				},
+			},
+		}},
+	})

 	// Multiple lines
 	f("foo,tag=xyz field=1.23 48934\n"+
@@ -338,4 +404,78 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			},
 		},
 	})
+
+	// Multiple lines with invalid line in the middle.
+	f("foo,tag=xyz field=1.23 48934\n"+
+		"invalid line\n"+
+		"bar x=-1i\n\n", &Rows{
+		Rows: []Row{
+			{
+				Measurement: "foo",
+				Tags: []Tag{{
+					Key:   "tag",
+					Value: "xyz",
+				}},
+				Fields: []Field{{
+					Key:   "field",
+					Value: 1.23,
+				}},
+				Timestamp: 48934,
+			},
+			{
+				Measurement: "bar",
+				Fields: []Field{{
+					Key:   "x",
+					Value: -1,
+				}},
+			},
+		},
+	})
+
+	// No newline after the second line.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/82
+	f("foo,tag=xyz field=1.23 48934\n"+
+		"bar x=-1i", &Rows{
+		Rows: []Row{
+			{
+				Measurement: "foo",
+				Tags: []Tag{{
+					Key:   "tag",
+					Value: "xyz",
+				}},
+				Fields: []Field{{
+					Key:   "field",
+					Value: 1.23,
+				}},
+				Timestamp: 48934,
+			},
+			{
+				Measurement: "bar",
+				Fields: []Field{{
+					Key:   "x",
+					Value: -1,
+				}},
+			},
+		},
+	})
+
+	f("x,y=z,g=p:\\ \\ 5432\\,\\ gp\\ mon\\ [lol]\\ con10\\ cmd5\\ SELECT f=1", &Rows{
+		Rows: []Row{{
+			Measurement: "x",
+			Tags: []Tag{
+				{
+					Key:   "y",
+					Value: "z",
+				},
+				{
+					Key:   "g",
+					Value: "p:  5432, gp mon [lol] con10 cmd5 SELECT",
+				},
+			},
+			Fields: []Field{{
+				Key:   "f",
+				Value: 1,
+			}},
+		}},
+	})
 }
--- a/app/vminsert/influx/parser_timing_test.go
+++ b/app/vminsert/influx/parser_timing_test.go
@@ -6,14 +6,19 @@ import (
 )

 func BenchmarkRowsUnmarshal(b *testing.B) {
-	s := `cpu usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 1234556768`
+	s := `cpu usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 1234556768
+cpu usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 123455676344
+aaa usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 123455676344
+bbb usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 123455676344
+`
 	b.SetBytes(int64(len(s)))
 	b.ReportAllocs()
 	b.RunParallel(func(pb *testing.PB) {
 		var rows Rows
 		for pb.Next() {
-			if err := rows.Unmarshal(s); err != nil {
-				panic(fmt.Errorf("cannot unmarshal %q: %s", s, err))
+			rows.Unmarshal(s)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of rows parsed; got %d; want 4", len(rows.Rows)))
 			}
 		}
 	})
--- a/app/vminsert/influx/request_handler.go
+++ b/app/vminsert/influx/request_handler.go
@@ -1,8 +1,7 @@
 package influx

 import (
-	"bytes"
-	"compress/gzip"
+	"flag"
 	"fmt"
 	"io"
 	"net/http"
@@ -12,32 +11,43 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
 	"github.com/VictoriaMetrics/metrics"
 )

-var rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="influx"}`)
+var (
+	measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for `{measurement}{separator}{field_name}` metric name when inserted via Influx line protocol")
+	skipSingleField           = flag.Bool("influxSkipSingleField", false, "Uses `{measurement}` instead of `{measurement}{separator}{field_name}` for metic name if Influx line contains only a single field")
+)
+
+var (
+	rowsInserted  = tenantmetrics.NewCounterMap(`vm_rows_inserted_total{type="influx"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="influx"}`)
+)

 // InsertHandler processes remote write for influx line protocol.
 //
 // See https://github.com/influxdata/influxdb/blob/4cbdc197b8117fee648d62e2e5be75c6575352f0/tsdb/README.md
-func InsertHandler(req *http.Request) error {
+func InsertHandler(at *auth.Token, req *http.Request) error {
 	return concurrencylimiter.Do(func() error {
-		return insertHandlerInternal(req)
+		return insertHandlerInternal(at, req)
 	})
 }

-func insertHandlerInternal(req *http.Request) error {
-	influxReadCalls.Inc()
+func insertHandlerInternal(at *auth.Token, req *http.Request) error {
+	readCalls.Inc()

 	r := req.Body
 	if req.Header.Get("Content-Encoding") == "gzip" {
-		zr, err := getGzipReader(r)
+		zr, err := common.GetGzipReader(r)
 		if err != nil {
 			return fmt.Errorf("cannot read gzipped influx line protocol data: %s", err)
 		}
-		defer putGzipReader(zr)
+		defer common.PutGzipReader(zr)
 		r = zr
 	}

@@ -64,97 +74,76 @@ func insertHandlerInternal(req *http.Request) error {
 	ctx := getPushCtx()
 	defer putPushCtx(ctx)
 	for ctx.Read(r, tsMultiplier) {
-		if err := ctx.InsertRows(db); err != nil {
+		if err := ctx.InsertRows(at, db); err != nil {
 			return err
 		}
 	}
 	return ctx.Error()
 }

-func (ctx *pushCtx) InsertRows(db string) error {
+func (ctx *pushCtx) InsertRows(at *auth.Token, db string) error {
 	rows := ctx.Rows.Rows
-	rowsLen := 0
-	for i := range rows {
-		rowsLen += len(rows[i].Tags)
-	}
 	ic := &ctx.Common
-	ic.Reset(rowsLen)
+	ic.Reset()
+	rowsTotal := 0
 	for i := range rows {
 		r := &rows[i]
 		ic.Labels = ic.Labels[:0]
-		ic.AddLabel("db", db)
+		hasDBLabel := false
 		for j := range r.Tags {
 			tag := &r.Tags[j]
+			if tag.Key == "db" {
+				hasDBLabel = true
+			}
 			ic.AddLabel(tag.Key, tag.Value)
 		}
-		ctx.metricNameBuf = storage.MarshalMetricNameRaw(ctx.metricNameBuf[:0], ic.Labels)
+		if len(db) > 0 && !hasDBLabel {
+			ic.AddLabel("db", db)
+		}
+		ic.MetricNameBuf = storage.MarshalMetricNameRaw(ic.MetricNameBuf[:0], at.AccountID, at.ProjectID, ic.Labels)
+		metricNameBufLen := len(ic.MetricNameBuf)
 		ctx.metricGroupBuf = append(ctx.metricGroupBuf[:0], r.Measurement...)
-		ctx.metricGroupBuf = append(ctx.metricGroupBuf, '.')
+		skipFieldKey := len(r.Fields) == 1 && *skipSingleField
+		if len(ctx.metricGroupBuf) > 0 && !skipFieldKey {
+			ctx.metricGroupBuf = append(ctx.metricGroupBuf, *measurementFieldSeparator...)
+		}
 		metricGroupPrefixLen := len(ctx.metricGroupBuf)
+		ic.AddLabel("", "placeholder")
+		placeholderLabel := &ic.Labels[len(ic.Labels)-1]
 		for j := range r.Fields {
 			f := &r.Fields[j]
-			ctx.metricGroupBuf = append(ctx.metricGroupBuf[:metricGroupPrefixLen], f.Key...)
+			if !skipFieldKey {
+				ctx.metricGroupBuf = append(ctx.metricGroupBuf[:metricGroupPrefixLen], f.Key...)
+			}
 			metricGroup := bytesutil.ToUnsafeString(ctx.metricGroupBuf)
-			ic.Labels = ic.Labels[:0]
+			ic.Labels = ic.Labels[:len(ic.Labels)-1]
 			ic.AddLabel("", metricGroup)
-			ic.WriteDataPoint(ctx.metricNameBuf, ic.Labels[:1], r.Timestamp, f.Value)
+			ic.MetricNameBuf = storage.MarshalMetricLabelRaw(ic.MetricNameBuf[:metricNameBufLen], placeholderLabel)
+			storageNodeIdx := ic.GetStorageNodeIdx(at, ic.Labels)
+			if err := ic.WriteDataPointExt(at, storageNodeIdx, ic.MetricNameBuf, r.Timestamp, f.Value); err != nil {
+				return err
+			}
 		}
-		rowsInserted.Add(len(r.Fields))
+		rowsTotal += len(r.Fields)
 	}
+	rowsInserted.Get(at).Add(rowsTotal)
+	rowsPerInsert.Update(float64(rowsTotal))
 	return ic.FlushBufs()
 }

-func getGzipReader(r io.Reader) (*gzip.Reader, error) {
-	v := gzipReaderPool.Get()
-	if v == nil {
-		return gzip.NewReader(r)
-	}
-	zr := v.(*gzip.Reader)
-	if err := zr.Reset(r); err != nil {
-		return nil, err
-	}
-	return zr, nil
-}
-
-func putGzipReader(zr *gzip.Reader) {
-	_ = zr.Close()
-	gzipReaderPool.Put(zr)
-}
-
-var gzipReaderPool sync.Pool
-
-const maxReadPacketSize = 4 * 1024 * 1024
-
 func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
 	if ctx.err != nil {
 		return false
 	}
-	lr := io.LimitReader(r, maxReadPacketSize)
-	ctx.reqBuf.Reset()
-	ctx.reqBuf.B = append(ctx.reqBuf.B[:0], ctx.tailBuf...)
-	n, err := io.CopyBuffer(&ctx.reqBuf, lr, ctx.copyBuf[:])
-	if err != nil {
-		influxReadErrors.Inc()
-		ctx.err = fmt.Errorf("cannot read influx line protocol data: %s", err)
-		return false
-	}
-	if n < maxReadPacketSize {
-		// Mark the end of stream.
-		ctx.err = io.EOF
-	}
-
-	// Parse all the rows until the last newline in ctx.reqBuf.B
-	nn := bytes.LastIndexByte(ctx.reqBuf.B, '\n')
-	ctx.tailBuf = ctx.tailBuf[:0]
-	if nn >= 0 {
-		ctx.tailBuf = append(ctx.tailBuf[:0], ctx.reqBuf.B[nn+1:]...)
-		ctx.reqBuf.B = ctx.reqBuf.B[:nn]
-	}
-	if err = ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf.B)); err != nil {
-		influxUnmarshalErrors.Inc()
-		ctx.err = fmt.Errorf("cannot unmarshal influx line protocol data with size %d: %s", len(ctx.reqBuf.B), err)
+	ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlock(r, ctx.reqBuf, ctx.tailBuf)
+	if ctx.err != nil {
+		if ctx.err != io.EOF {
+			readErrors.Inc()
+			ctx.err = fmt.Errorf("cannot read influx line protocol data: %s", ctx.err)
+		}
 		return false
 	}
+	ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))

 	// Adjust timestamps according to tsMultiplier
 	currentTs := time.Now().UnixNano() / 1e6
@@ -169,6 +158,7 @@ func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
 		}
 	} else if tsMultiplier < 0 {
 		tsMultiplier = -tsMultiplier
+		currentTs -= currentTs % tsMultiplier
 		for i := range ctx.Rows.Rows {
 			row := &ctx.Rows.Rows[i]
 			if row.Timestamp == 0 {
@@ -182,19 +172,16 @@ func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
 }

 var (
-	influxReadCalls       = metrics.NewCounter(`vm_read_calls_total{name="influx"}`)
-	influxReadErrors      = metrics.NewCounter(`vm_read_errors_total{name="influx"}`)
-	influxUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="influx"}`)
+	readCalls  = metrics.NewCounter(`vm_read_calls_total{name="influx"}`)
+	readErrors = metrics.NewCounter(`vm_read_errors_total{name="influx"}`)
 )

 type pushCtx struct {
 	Rows   Rows
-	Common common.InsertCtx
+	Common netstorage.InsertCtx

-	reqBuf         bytesutil.ByteBuffer
+	reqBuf         []byte
 	tailBuf        []byte
-	copyBuf        [16 * 1024]byte
-	metricNameBuf  []byte
 	metricGroupBuf []byte

 	err error
@@ -209,11 +196,9 @@ func (ctx *pushCtx) Error() error {

 func (ctx *pushCtx) reset() {
 	ctx.Rows.Reset()
-	ctx.Common.Reset(0)
-
-	ctx.reqBuf.Reset()
+	ctx.Common.Reset()
+	ctx.reqBuf = ctx.reqBuf[:0]
 	ctx.tailBuf = ctx.tailBuf[:0]
-	ctx.metricNameBuf = ctx.metricNameBuf[:0]
 	ctx.metricGroupBuf = ctx.metricGroupBuf[:0]

 	ctx.err = nil
--- a/app/vminsert/main.go
+++ b/app/vminsert/main.go
@@ -1,69 +1,146 @@
-package vminsert
+package main

 import (
 	"flag"
 	"fmt"
 	"net/http"
-	"strings"
+	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/graphite"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/influx"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdb"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdbhttp"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/prometheus"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/vmimport"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 )

 var (
-	graphiteListenAddr   = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
-	opentsdbListenAddr   = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB put messages. Usually :4242 must be set. Doesn't work if empty")
-	maxInsertRequestSize = flag.Int("maxInsertRequestSize", 32*1024*1024, "The maximum size of a single insert request in bytes")
+	graphiteListenAddr     = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
+	opentsdbListenAddr     = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB put messages. Usually :4242 must be set. Doesn't work if empty")
+	opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
+	httpListenAddr         = flag.String("httpListenAddr", ":8480", "Address to listen for http connections")
+	maxInsertRequestSize   = flag.Int("maxInsertRequestSize", 32*1024*1024, "The maximum size of a single insert request in bytes")
+	maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 30, "The maximum number of labels accepted per time series. Superflouos labels are dropped")
+	storageNodes           = flagutil.NewArray("storageNode", "Address of vmstorage nodes; usage: -storageNode=vmstorage-host1:8400 -storageNode=vmstorage-host2:8400")
 )

-// Init initializes vminsert.
-func Init() {
+func main() {
+	flag.Parse()
+	buildinfo.Init()
+	logger.Init()
+
+	logger.Infof("initializing netstorage for storageNodes %s...", *storageNodes)
+	startTime := time.Now()
+	if len(*storageNodes) == 0 {
+		logger.Fatalf("missing -storageNode arg")
+	}
+	netstorage.InitStorageNodes(*storageNodes)
+	logger.Infof("successfully initialized netstorage in %s", time.Since(startTime))
+
+	storage.SetMaxLabelsPerTimeseries(*maxLabelsPerTimeseries)
+
+	concurrencylimiter.Init()
 	if len(*graphiteListenAddr) > 0 {
 		go graphite.Serve(*graphiteListenAddr)
 	}
 	if len(*opentsdbListenAddr) > 0 {
 		go opentsdb.Serve(*opentsdbListenAddr)
 	}
-}
+	if len(*opentsdbHTTPListenAddr) > 0 {
+		go opentsdbhttp.Serve(*opentsdbHTTPListenAddr, int64(*maxInsertRequestSize))
+	}
+
+	go func() {
+		httpserver.Serve(*httpListenAddr, requestHandler)
+	}()
+
+	sig := procutil.WaitForSigterm()
+	logger.Infof("service received signal %s", sig)
+
+	logger.Infof("gracefully shutting down the service at %q", *httpListenAddr)
+	startTime = time.Now()
+	if err := httpserver.Stop(*httpListenAddr); err != nil {
+		logger.Fatalf("cannot stop the service: %s", err)
+	}
+	logger.Infof("successfully shut down the service in %s", time.Since(startTime))

-// Stop stops vminsert.
-func Stop() {
 	if len(*graphiteListenAddr) > 0 {
 		graphite.Stop()
 	}
 	if len(*opentsdbListenAddr) > 0 {
 		opentsdb.Stop()
 	}
+	if len(*opentsdbHTTPListenAddr) > 0 {
+		opentsdbhttp.Stop()
+	}
+
+	logger.Infof("shutting down neststorage...")
+	startTime = time.Now()
+	netstorage.Stop()
+	logger.Infof("successfully stopped netstorage in %s", time.Since(startTime))
+
+	fs.MustStopDirRemover()
+
+	logger.Infof("the vminsert has been stopped")
 }

-// RequestHandler is a handler for Prometheus remote storage write API
-func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
-	path := strings.Replace(r.URL.Path, "//", "/", -1)
-	switch path {
-	case "/api/v1/write":
+func requestHandler(w http.ResponseWriter, r *http.Request) bool {
+	p, err := httpserver.ParsePath(r.URL.Path)
+	if err != nil {
+		httpserver.Errorf(w, "cannot parse path %q: %s", r.URL.Path, err)
+		return true
+	}
+	if p.Prefix != "insert" {
+		// This is not our link.
+		return false
+	}
+	at, err := auth.NewToken(p.AuthToken)
+	if err != nil {
+		httpserver.Errorf(w, "auth error: %s", err)
+		return true
+	}
+
+	switch p.Suffix {
+	case "prometheus/", "prometheus", "prometheus/api/v1/write":
 		prometheusWriteRequests.Inc()
-		if err := prometheus.InsertHandler(r, int64(*maxInsertRequestSize)); err != nil {
+		if err := prometheus.InsertHandler(at, r, int64(*maxInsertRequestSize)); err != nil {
 			prometheusWriteErrors.Inc()
 			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
 			return true
 		}
 		w.WriteHeader(http.StatusNoContent)
 		return true
-	case "/write", "/api/v2/write":
+	case "prometheus/api/v1/import":
+		vmimportRequests.Inc()
+		if err := vmimport.InsertHandler(at, r); err != nil {
+			vmimportErrors.Inc()
+			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
+			return true
+		}
+		return true
+	case "influx/write", "influx/api/v2/write":
 		influxWriteRequests.Inc()
-		if err := influx.InsertHandler(r); err != nil {
+		if err := influx.InsertHandler(at, r); err != nil {
 			influxWriteErrors.Inc()
 			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
 			return true
 		}
 		w.WriteHeader(http.StatusNoContent)
 		return true
-	case "/query":
-		// Emulate fake response for influx query
+	case "influx/query":
+		// Emulate fake response for influx query.
+		// This is required for TSBS benchmark.
 		influxQueryRequests.Inc()
 		fmt.Fprintf(w, `{"results":[{"series":[{"values":[]}]}]}`)
 		return true
@@ -74,11 +151,14 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 }

 var (
-	prometheusWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/write", protocol="prometheus"}`)
-	prometheusWriteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/write", protocol="prometheus"}`)
+	prometheusWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/insert/{}/prometheus/", protocol="prometheus"}`)
+	prometheusWriteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/insert/{}/prometheus/", protocol="prometheus"}`)

-	influxWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/write", protocol="influx"}`)
-	influxWriteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/write", protocol="influx"}`)
+	vmimportRequests = metrics.NewCounter(`vm_http_requests_total{path="/insert/{}/prometheus/api/v1/import", protocol="vm"}`)
+	vmimportErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/insert/{}/prometheus/api/v1/import", protocol="vm"}`)

-	influxQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/query", protocol="influx"}`)
+	influxWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/insert/{}/influx/", protocol="influx"}`)
+	influxWriteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/insert/{}/influx/", protocol="influx"}`)
+
+	influxQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/insert/{}/influx/query", protocol="influx"}`)
 )
--- a/app/vminsert/netstorage/insert_ctx.go
+++ b/app/vminsert/netstorage/insert_ctx.go
@@ -0,0 +1,194 @@
+package netstorage
+
+import (
+	"fmt"
+	"net/http"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/consts"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	xxhash "github.com/cespare/xxhash/v2"
+	jump "github.com/lithammer/go-jump-consistent-hash"
+)
+
+// InsertCtx is a generic context for inserting data.
+//
+// InsertCtx.Reset must be called before the first usage.
+type InsertCtx struct {
+	Labels        []prompb.Label
+	MetricNameBuf []byte
+
+	bufRowss  []bufRows
+	labelsBuf []byte
+
+	resultCh chan error
+}
+
+type bufRows struct {
+	buf  []byte
+	rows int
+}
+
+func (br *bufRows) pushTo(sn *storageNode) error {
+	bufLen := len(br.buf)
+	err := sn.push(br.buf, br.rows)
+	br.buf = br.buf[:0]
+	br.rows = 0
+	if err != nil {
+		return &httpserver.ErrorWithStatusCode{
+			Err:        fmt.Errorf("cannot send %d bytes to storageNode %q: %s", bufLen, sn.dialer.Addr(), err),
+			StatusCode: http.StatusServiceUnavailable,
+		}
+	}
+	return nil
+}
+
+// Reset resets ctx.
+func (ctx *InsertCtx) Reset() {
+	for _, label := range ctx.Labels {
+		label.Name = nil
+		label.Value = nil
+	}
+	ctx.Labels = ctx.Labels[:0]
+	ctx.MetricNameBuf = ctx.MetricNameBuf[:0]
+
+	if ctx.bufRowss == nil {
+		ctx.bufRowss = make([]bufRows, len(storageNodes))
+	}
+	for i := range ctx.bufRowss {
+		br := &ctx.bufRowss[i]
+		br.buf = br.buf[:0]
+		br.rows = 0
+	}
+	ctx.labelsBuf = ctx.labelsBuf[:0]
+	if ctx.resultCh == nil {
+		ctx.resultCh = make(chan error, len(storageNodes))
+	} else if len(ctx.resultCh) > 0 {
+		logger.Panicf("BUG: ctx.resultCh must be empty on Reset; got %d items", len(ctx.resultCh))
+	}
+}
+
+// AddLabelBytes adds (name, value) label to ctx.Labels.
+//
+// name and value must exist until ctx.Labels is used.
+func (ctx *InsertCtx) AddLabelBytes(name, value []byte) {
+	labels := ctx.Labels
+	if cap(labels) > len(labels) {
+		labels = labels[:len(labels)+1]
+	} else {
+		labels = append(labels, prompb.Label{})
+	}
+	label := &labels[len(labels)-1]
+
+	// Do not copy name and value contents for performance reasons.
+	// This reduces GC overhead on the number of objects and allocations.
+	label.Name = name
+	label.Value = value
+
+	ctx.Labels = labels
+}
+
+// AddLabel adds (name, value) label to ctx.Labels.
+//
+// name and value must exist until ctx.Labels is used.
+func (ctx *InsertCtx) AddLabel(name, value string) {
+	labels := ctx.Labels
+	if cap(labels) > len(labels) {
+		labels = labels[:len(labels)+1]
+	} else {
+		labels = append(labels, prompb.Label{})
+	}
+	label := &labels[len(labels)-1]
+
+	// Do not copy name and value contents for performance reasons.
+	// This reduces GC overhead on the number of objects and allocations.
+	label.Name = bytesutil.ToUnsafeBytes(name)
+	label.Value = bytesutil.ToUnsafeBytes(value)
+
+	ctx.Labels = labels
+}
+
+// WriteDataPoint writes (timestamp, value) data point with the given at and labels to ctx buffer.
+func (ctx *InsertCtx) WriteDataPoint(at *auth.Token, labels []prompb.Label, timestamp int64, value float64) error {
+	ctx.MetricNameBuf = storage.MarshalMetricNameRaw(ctx.MetricNameBuf[:0], at.AccountID, at.ProjectID, labels)
+	storageNodeIdx := ctx.GetStorageNodeIdx(at, labels)
+	return ctx.WriteDataPointExt(at, storageNodeIdx, ctx.MetricNameBuf, timestamp, value)
+}
+
+// WriteDataPointExt writes the given metricNameRaw with (timestmap, value) to ctx buffer with the given storageNodeIdx.
+func (ctx *InsertCtx) WriteDataPointExt(at *auth.Token, storageNodeIdx int, metricNameRaw []byte, timestamp int64, value float64) error {
+	br := &ctx.bufRowss[storageNodeIdx]
+	sn := storageNodes[storageNodeIdx]
+	bufNew := storage.MarshalMetricRow(br.buf, metricNameRaw, timestamp, value)
+	if len(bufNew) >= consts.MaxInsertPacketSize {
+		// Send buf to storageNode, since it is too big.
+		if err := br.pushTo(sn); err != nil {
+			return err
+		}
+		br.buf = storage.MarshalMetricRow(bufNew[:0], metricNameRaw, timestamp, value)
+	} else {
+		br.buf = bufNew
+	}
+	br.rows++
+	return nil
+}
+
+// FlushBufs flushes ctx bufs to remote storage nodes.
+func (ctx *InsertCtx) FlushBufs() error {
+	// Send per-storageNode bufs in parallel.
+	resultCh := ctx.resultCh
+	resultChLen := 0
+	for i := range ctx.bufRowss {
+		br := &ctx.bufRowss[i]
+		if len(br.buf) == 0 {
+			continue
+		}
+		resultChLen++
+		go func(br *bufRows, sn *storageNode) {
+			resultCh <- br.pushTo(sn)
+		}(br, storageNodes[i])
+	}
+	var lastErr error
+	for i := 0; i < resultChLen; i++ {
+		err := <-resultCh
+		if err != nil {
+			lastErr = err
+		}
+	}
+	return lastErr
+}
+
+// GetStorageNodeIdx returns storage node index for the given at and labels.
+//
+// The returned index must be passed to WriteDataPoint.
+func (ctx *InsertCtx) GetStorageNodeIdx(at *auth.Token, labels []prompb.Label) int {
+	if len(storageNodes) == 1 {
+		// Fast path - only a single storage node.
+		return 0
+	}
+
+	buf := ctx.labelsBuf[:0]
+	buf = encoding.MarshalUint32(buf, at.AccountID)
+	buf = encoding.MarshalUint32(buf, at.ProjectID)
+	for i := range labels {
+		label := &labels[i]
+		buf = marshalBytesFast(buf, label.Name)
+		buf = marshalBytesFast(buf, label.Value)
+	}
+	h := xxhash.Sum64(buf)
+	ctx.labelsBuf = buf
+
+	idx := int(jump.Hash(h, int32(len(storageNodes))))
+	return idx
+}
+
+func marshalBytesFast(dst []byte, s []byte) []byte {
+	dst = encoding.MarshalUint16(dst, uint16(len(s)))
+	dst = append(dst, s...)
+	return dst
+}
--- a/app/vminsert/netstorage/netstorage.go
+++ b/app/vminsert/netstorage/netstorage.go
@@ -0,0 +1,472 @@
+package netstorage
+
+import (
+	"flag"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/consts"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/metrics"
+	xxhash "github.com/cespare/xxhash/v2"
+)
+
+var disableRPCCompression = flag.Bool(`rpc.disableCompression`, false, "Disable compression of RPC traffic. This reduces CPU usage at the cost of higher network bandwidth usage")
+
+// push pushes buf to sn.
+//
+// It falls back to sending data to another vmstorage node if sn is currently
+// unavailable.
+//
+// rows is the number of rows in the buf.
+func (sn *storageNode) push(buf []byte, rows int) error {
+	if len(buf) > consts.MaxInsertPacketSize {
+		logger.Panicf("BUG: len(buf)=%d cannot exceed %d", len(buf), consts.MaxInsertPacketSize)
+	}
+	sn.rowsPushed.Add(rows)
+
+	sn.mu.Lock()
+	defer sn.mu.Unlock()
+
+	if sn.broken {
+		// The vmstorage node is broken. Re-route buf to healthy vmstorage nodes.
+		if err := addToReroutedBuf(buf, rows); err != nil {
+			rowsLostTotal.Add(rows)
+			return err
+		}
+		sn.rowsReroutedFromHere.Add(rows)
+		return nil
+	}
+
+	if len(sn.buf)+len(buf) <= consts.MaxInsertPacketSize {
+		// Fast path: the buf contents fits sn.buf.
+		sn.buf = append(sn.buf, buf...)
+		sn.rows += rows
+		return nil
+	}
+
+	// Slow path: the buf contents doesn't fit sn.buf.
+	// Flush sn.buf to vmstorage and then add buf to sn.buf.
+	if err := sn.flushBufLocked(); err != nil {
+		// Failed to flush or re-route sn.buf to vmstorage nodes.
+		// The sn.buf is already dropped by flushBufLocked.
+		// Drop buf too, since there is litte sense in trying to rescue it.
+		rowsLostTotal.Add(rows)
+		return err
+	}
+
+	// Successful flush.
+	sn.buf = append(sn.buf, buf...)
+	sn.rows += rows
+	return nil
+}
+
+func (sn *storageNode) sendReroutedRow(buf []byte) error {
+	sn.mu.Lock()
+	defer sn.mu.Unlock()
+
+	if sn.broken {
+		return errBrokenStorageNode
+	}
+	if len(sn.buf)+len(buf) > consts.MaxInsertPacketSize {
+		return fmt.Errorf("cannot put %d bytes into vmstorage buffer, since its size cannot exceed %d bytes", len(sn.buf)+len(buf), consts.MaxInsertPacketSize)
+	}
+	sn.buf = append(sn.buf, buf...)
+	sn.rows++
+	return nil
+}
+
+var errBrokenStorageNode = fmt.Errorf("the vmstorage node is temporarily broken")
+
+func (sn *storageNode) flushBufLocked() error {
+	err := sn.sendBufLocked(sn.buf)
+	if err == nil {
+		// Successful flush. Remove broken flag.
+		sn.broken = false
+		sn.rowsSent.Add(sn.rows)
+		sn.buf = sn.buf[:0]
+		sn.rows = 0
+		return nil
+	}
+
+	// Couldn't flush sn.buf to vmstorage. Mark sn as broken
+	// and try re-routing sn.buf to healthy vmstorage nodes.
+	logger.Errorf("cannot send data to vmstorage %s: %s; re-routing data to healthy vmstorage nodes", sn.dialer.Addr(), err)
+	sn.broken = true
+	err = addToReroutedBuf(sn.buf, sn.rows)
+	if err != nil {
+		rowsLostTotal.Add(sn.rows)
+	}
+	sn.buf = sn.buf[:0]
+	sn.rows = 0
+	return err
+}
+
+func (sn *storageNode) sendBufLocked(buf []byte) error {
+	if len(buf) == 0 {
+		return nil
+	}
+	if sn.bc == nil {
+		if err := sn.dial(); err != nil {
+			return fmt.Errorf("cannot dial %q: %s", sn.dialer.Addr(), err)
+		}
+	}
+	timeoutSeconds := len(buf) / 1e6
+	if timeoutSeconds < 60 {
+		timeoutSeconds = 60
+	}
+	timeout := time.Duration(timeoutSeconds) * time.Second
+	deadline := time.Now().Add(timeout)
+	if err := sn.bc.SetWriteDeadline(deadline); err != nil {
+		sn.closeBrokenConn()
+		return fmt.Errorf("cannot set write deadline to %s: %s", deadline, err)
+	}
+	// sizeBuf guarantees that the rows batch will be either fully
+	// read or fully discarded on the vmstorage side.
+	// sizeBuf is used for read optimization in vmstorage.
+	sn.sizeBuf = encoding.MarshalUint64(sn.sizeBuf[:0], uint64(len(buf)))
+	if _, err := sn.bc.Write(sn.sizeBuf); err != nil {
+		sn.closeBrokenConn()
+		return fmt.Errorf("cannot write data size %d: %s", len(buf), err)
+	}
+	if _, err := sn.bc.Write(buf); err != nil {
+		sn.closeBrokenConn()
+		return fmt.Errorf("cannot write data with size %d: %s", len(buf), err)
+	}
+	if err := sn.bc.Flush(); err != nil {
+		sn.closeBrokenConn()
+		return fmt.Errorf("cannot flush data with size %d: %s", len(buf), err)
+	}
+	return nil
+}
+
+func (sn *storageNode) dial() error {
+	c, err := sn.dialer.Dial()
+	if err != nil {
+		sn.dialErrors.Inc()
+		return err
+	}
+	compressionLevel := 1
+	if *disableRPCCompression {
+		compressionLevel = 0
+	}
+	bc, err := handshake.VMInsertClient(c, compressionLevel)
+	if err != nil {
+		_ = c.Close()
+		sn.handshakeErrors.Inc()
+		return fmt.Errorf("handshake error: %s", err)
+	}
+	sn.bc = bc
+	return nil
+}
+
+func (sn *storageNode) closeBrokenConn() {
+	if sn.bc == nil {
+		return
+	}
+	_ = sn.bc.Close()
+	sn.bc = nil
+	sn.connectionErrors.Inc()
+}
+
+func (sn *storageNode) run(stopCh <-chan struct{}) {
+	t := time.NewTimer(time.Second)
+	mustStop := false
+	for !mustStop {
+		select {
+		case <-stopCh:
+			mustStop = true
+			// Make sure flushBufLocked is called last time before returning
+			// in order to send the remaining bits of data.
+		case <-t.C:
+		}
+
+		sn.mu.Lock()
+		if err := sn.flushBufLocked(); err != nil {
+			sn.closeBrokenConn()
+			logger.Errorf("cannot flush data to storageNode %q: %s", sn.dialer.Addr(), err)
+		}
+		sn.mu.Unlock()
+
+		t.Reset(time.Second)
+	}
+	t.Stop()
+}
+
+func rerouteWorker(stopCh <-chan struct{}) {
+	t := time.NewTimer(time.Second)
+	var buf []byte
+	mustStop := false
+	for !mustStop {
+		select {
+		case <-stopCh:
+			mustStop = true
+			// Make sure spreadReroutedBufToStorageNodes is called last time before returning
+			// in order to reroute the remaining data to healthy vmstorage nodes.
+		case <-t.C:
+		}
+
+		var err error
+		buf, err = spreadReroutedBufToStorageNodes(buf[:0])
+		if err != nil {
+			rerouteErrors.Inc()
+			logger.Errorf("cannot reroute data among healthy vmstorage nodes: %s", err)
+		}
+		t.Reset(time.Second)
+	}
+	t.Stop()
+}
+
+// storageNode is a client sending data to vmstorage node.
+type storageNode struct {
+	mu sync.Mutex
+
+	// Buffer with data that needs to be written to vmstorage node.
+	buf []byte
+
+	// The number of rows buf contains at the moment.
+	rows int
+
+	// Temporary buffer for encoding marshaled buf size.
+	sizeBuf []byte
+
+	// broken is set to true if the given vmstorage node is temporarily unhealthy.
+	// In this case the data is re-routed to the remaining healthy vmstorage nodes.
+	broken bool
+
+	dialer *netutil.TCPDialer
+
+	bc *handshake.BufferedConn
+
+	// The number of dial errors to vmstorage node.
+	dialErrors *metrics.Counter
+
+	// The number of handshake errors to vmstorage node.
+	handshakeErrors *metrics.Counter
+
+	// The number of connection errors to vmstorage node.
+	connectionErrors *metrics.Counter
+
+	// The number of rows pushed to storageNode with push method.
+	rowsPushed *metrics.Counter
+
+	// The number of rows sent to vmstorage node.
+	rowsSent *metrics.Counter
+
+	// The number of rows rerouted from the given vmstorage node
+	// to healthy nodes when the given node was unhealthy.
+	rowsReroutedFromHere *metrics.Counter
+
+	// The number of rows rerouted to the given vmstorage node
+	// from other nodes when they were unhealthy.
+	rowsReroutedToHere *metrics.Counter
+}
+
+// storageNodes contains a list of vmstorage node clients.
+var storageNodes []*storageNode
+
+var (
+	storageNodesWG  sync.WaitGroup
+	rerouteWorkerWG sync.WaitGroup
+)
+
+var (
+	storageNodesStopCh  = make(chan struct{})
+	rerouteWorkerStopCh = make(chan struct{})
+)
+
+// InitStorageNodes initializes vmstorage nodes' connections to the given addrs.
+func InitStorageNodes(addrs []string) {
+	if len(addrs) == 0 {
+		logger.Panicf("BUG: addrs must be non-empty")
+	}
+	if len(addrs) > 255 {
+		logger.Panicf("BUG: too much addresses: %d; max supported %d addresses", len(addrs), 255)
+	}
+
+	for _, addr := range addrs {
+		sn := &storageNode{
+			dialer: netutil.NewTCPDialer("vminsert", addr),
+
+			dialErrors:           metrics.NewCounter(fmt.Sprintf(`vm_rpc_dial_errors_total{name="vminsert", addr=%q}`, addr)),
+			handshakeErrors:      metrics.NewCounter(fmt.Sprintf(`vm_rpc_handshake_errors_total{name="vminsert", addr=%q}`, addr)),
+			connectionErrors:     metrics.NewCounter(fmt.Sprintf(`vm_rpc_connection_errors_total{name="vminsert", addr=%q}`, addr)),
+			rowsPushed:           metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_pushed_total{name="vminsert", addr=%q}`, addr)),
+			rowsSent:             metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_sent_total{name="vminsert", addr=%q}`, addr)),
+			rowsReroutedFromHere: metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_rerouted_from_here_total{name="vminsert", addr=%q}`, addr)),
+			rowsReroutedToHere:   metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_rerouted_to_here_total{name="vminsert", addr=%q}`, addr)),
+		}
+		_ = metrics.NewGauge(fmt.Sprintf(`vm_rpc_rows_pending{name="vminsert", addr=%q}`, addr), func() float64 {
+			sn.mu.Lock()
+			n := sn.rows
+			sn.mu.Unlock()
+			return float64(n)
+		})
+		_ = metrics.NewGauge(fmt.Sprintf(`vm_rpc_buf_pending_bytes{name="vminsert", addr=%q}`, addr), func() float64 {
+			sn.mu.Lock()
+			n := len(sn.buf)
+			sn.mu.Unlock()
+			return float64(n)
+		})
+		storageNodes = append(storageNodes, sn)
+		storageNodesWG.Add(1)
+		go func(addr string) {
+			sn.run(storageNodesStopCh)
+			storageNodesWG.Done()
+		}(addr)
+	}
+
+	reroutedBufMaxSize = memory.Allowed() / 16
+	rerouteWorkerWG.Add(1)
+	go func() {
+		rerouteWorker(rerouteWorkerStopCh)
+		rerouteWorkerWG.Done()
+	}()
+}
+
+// Stop gracefully stops netstorage.
+func Stop() {
+	close(rerouteWorkerStopCh)
+	rerouteWorkerWG.Wait()
+
+	close(storageNodesStopCh)
+	storageNodesWG.Wait()
+}
+
+func addToReroutedBuf(buf []byte, rows int) error {
+	reroutedLock.Lock()
+	defer reroutedLock.Unlock()
+	if len(reroutedBuf)+len(buf) > reroutedBufMaxSize {
+		reroutedBufOverflows.Inc()
+		return fmt.Errorf("%d rows dropped because of reroutedBuf overflows %d bytes", rows, reroutedBufMaxSize)
+	}
+	reroutedBuf = append(reroutedBuf, buf...)
+	reroutedRows += rows
+	reroutesTotal.Inc()
+	return nil
+}
+
+func spreadReroutedBufToStorageNodes(swapBuf []byte) ([]byte, error) {
+	healthyStorageNodes := getHealthyStorageNodes()
+	if len(healthyStorageNodes) == 0 {
+		// No more vmstorage nodes to write data to.
+		return swapBuf, fmt.Errorf("all the storage nodes are unhealthy")
+	}
+
+	reroutedLock.Lock()
+	reroutedBuf, swapBuf = swapBuf[:0], reroutedBuf
+	rows := reroutedRows
+	reroutedRows = 0
+	reroutedLock.Unlock()
+
+	if len(swapBuf) == 0 {
+		// Nothing to re-route.
+		return swapBuf, nil
+	}
+
+	var mr storage.MetricRow
+	src := swapBuf
+	rowsProcessed := 0
+	for len(src) > 0 {
+		tail, err := mr.Unmarshal(src)
+		if err != nil {
+			logger.Panicf("BUG: cannot unmarshal recently marshaled MetricRow: %s", err)
+		}
+		rowBuf := src[:len(src)-len(tail)]
+		src = tail
+
+		// Use non-consistent hashing instead of jump hash in order to re-route rows
+		// equally among healthy vmstorage nodes.
+		// This should spread the increased load among healthy vmstorage nodes.
+		h := xxhash.Sum64(mr.MetricNameRaw)
+		idx := h % uint64(len(healthyStorageNodes))
+		attempts := 0
+		for {
+			sn := healthyStorageNodes[idx]
+			err := sn.sendReroutedRow(rowBuf)
+			if err == nil {
+				sn.rowsReroutedToHere.Inc()
+				break
+			}
+
+			// Cannot send data to sn. Try sending to the next vmstorage node.
+			idx++
+			if idx >= uint64(len(healthyStorageNodes)) {
+				idx = 0
+			}
+			attempts++
+			if attempts < len(healthyStorageNodes) {
+				continue
+			}
+			// There are no healthy nodes.
+			// Try returning the remaining data to reroutedBuf if it has enough free space.
+			rowsRemaining := rows - rowsProcessed
+			recovered := false
+			reroutedLock.Lock()
+			if len(rowBuf)+len(tail)+len(reroutedBuf) <= reroutedBufMaxSize {
+				swapBuf = append(swapBuf[:0], rowBuf...)
+				swapBuf = append(swapBuf, tail...)
+				swapBuf = append(swapBuf, reroutedBuf...)
+				reroutedBuf, swapBuf = swapBuf, reroutedBuf[:0]
+				reroutedRows += rowsRemaining
+				recovered = true
+			}
+			reroutedLock.Unlock()
+			if recovered {
+				return swapBuf, nil
+			}
+			rowsLostTotal.Add(rowsRemaining)
+			return swapBuf, fmt.Errorf("all the %d vmstorage nodes are unavailable; lost %d rows; last error: %s", len(storageNodes), rowsRemaining, err)
+		}
+		rowsProcessed++
+	}
+	if rowsProcessed != rows {
+		logger.Panicf("BUG: unexpected number of rows processed; got %d; want %d", rowsProcessed, rows)
+	}
+	reroutedRowsProcessed.Add(rowsProcessed)
+	return swapBuf, nil
+}
+
+var (
+	reroutedLock       sync.Mutex
+	reroutedBuf        []byte
+	reroutedRows       int
+	reroutedBufMaxSize int
+
+	reroutedRowsProcessed = metrics.NewCounter(`vm_rpc_rerouted_rows_processed_total{name="vminsert"}`)
+	reroutedBufOverflows  = metrics.NewCounter(`vm_rpc_rerouted_buf_overflows_total{name="vminsert"}`)
+	reroutesTotal         = metrics.NewCounter(`vm_rpc_reroutes_total{name="vminsert"}`)
+	_                     = metrics.NewGauge(`vm_rpc_rerouted_rows_pending{name="vminsert"}`, func() float64 {
+		reroutedLock.Lock()
+		n := reroutedRows
+		reroutedLock.Unlock()
+		return float64(n)
+	})
+	_ = metrics.NewGauge(`vm_rpc_rerouted_buf_pending_bytes{name="vminsert"}`, func() float64 {
+		reroutedLock.Lock()
+		n := len(reroutedBuf)
+		reroutedLock.Unlock()
+		return float64(n)
+	})
+
+	rerouteErrors = metrics.NewCounter(`vm_rpc_reroute_errors_total{name="vminsert"}`)
+	rowsLostTotal = metrics.NewCounter(`vm_rpc_rows_lost_total{name="vminsert"}`)
+)
+
+func getHealthyStorageNodes() []*storageNode {
+	sns := make([]*storageNode, 0, len(storageNodes)-1)
+	for _, sn := range storageNodes {
+		sn.mu.Lock()
+		if !sn.broken {
+			sns = append(sns, sn)
+		}
+		sn.mu.Unlock()
+	}
+	return sns
+}
--- a/app/vminsert/opentsdb/parser.go
+++ b/app/vminsert/opentsdb/parser.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"strings"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/fastjson/fastfloat"
 )

@@ -34,13 +36,8 @@ func (rs *Rows) Reset() {
 // See http://opentsdb.net/docs/build/html/api_telnet/put.html
 //
 // s must be unchanged until rs is in use.
-func (rs *Rows) Unmarshal(s string) error {
-	var err error
-	rs.Rows, rs.tagsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
-	if err != nil {
-		return err
-	}
-	return err
+func (rs *Rows) Unmarshal(s string) {
+	rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
 }

 // Row is a single OpenTSDB row.
@@ -69,6 +66,9 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
 		return tagsPool, fmt.Errorf("cannot find whitespace between metric and timestamp in %q", s)
 	}
 	r.Metric = s[:n]
+	if len(r.Metric) == 0 {
+		return tagsPool, fmt.Errorf("metric cannot be empty")
+	}
 	tail := s[n+1:]
 	n = strings.IndexByte(tail, ' ')
 	if n < 0 {
@@ -92,39 +92,46 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
 	return tagsPool, nil
 }

-func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag, error) {
+func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
 	for len(s) > 0 {
 		n := strings.IndexByte(s, '\n')
-		if n == 0 {
-			// Skip empty line
-			s = s[1:]
-			continue
-		}
-		if cap(dst) > len(dst) {
-			dst = dst[:len(dst)+1]
-		} else {
-			dst = append(dst, Row{})
-		}
-		r := &dst[len(dst)-1]
 		if n < 0 {
 			// The last line.
-			var err error
-			tagsPool, err = r.unmarshal(s, tagsPool)
-			if err != nil {
-				return dst, tagsPool, err
-			}
-			return dst, tagsPool, nil
-		}
-		var err error
-		tagsPool, err = r.unmarshal(s[:n], tagsPool)
-		if err != nil {
-			return dst, tagsPool, err
+			return unmarshalRow(dst, s, tagsPool)
 		}
+		dst, tagsPool = unmarshalRow(dst, s[:n], tagsPool)
 		s = s[n+1:]
 	}
-	return dst, tagsPool, nil
+	return dst, tagsPool
 }

+func unmarshalRow(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
+	if len(s) > 0 && s[len(s)-1] == '\r' {
+		s = s[:len(s)-1]
+	}
+	if len(s) == 0 {
+		// Skip empty line
+		return dst, tagsPool
+	}
+
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	var err error
+	tagsPool, err = r.unmarshal(s, tagsPool)
+	if err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal OpenTSDB line %q: %s", s, err)
+		invalidLines.Inc()
+	}
+	return dst, tagsPool
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="opentsdb"}`)
+
 func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
 	for {
 		if cap(dst) > len(dst) {
@@ -140,12 +147,20 @@ func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
 			if err := tag.unmarshal(s); err != nil {
 				return dst[:len(dst)-1], err
 			}
+			if len(tag.Key) == 0 || len(tag.Value) == 0 {
+				// Skip empty tag
+				dst = dst[:len(dst)-1]
+			}
 			return dst, nil
 		}
 		if err := tag.unmarshal(s[:n]); err != nil {
 			return dst[:len(dst)-1], err
 		}
 		s = s[n+1:]
+		if len(tag.Key) == 0 || len(tag.Value) == 0 {
+			// Skip empty tag
+			dst = dst[:len(dst)-1]
+		}
 	}
 }

@@ -167,9 +182,6 @@ func (t *Tag) unmarshal(s string) error {
 		return fmt.Errorf("missing tag value for %q", s)
 	}
 	t.Key = s[:n]
-	if len(t.Key) == 0 {
-		return fmt.Errorf("tag key cannot be empty for %q", s)
-	}
 	t.Value = s[n+1:]
 	return nil
 }
--- a/app/vminsert/opentsdb/parser_test.go
+++ b/app/vminsert/opentsdb/parser_test.go
@@ -9,19 +9,24 @@ func TestRowsUnmarshalFailure(t *testing.T) {
 	f := func(s string) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
 		}

 		// Try again
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
 		}
 	}

 	// Missing put prefix
 	f("xx")

+	// Missing metric
+	f("put  111 34")
+
 	// Missing timestamp
 	f("put aaa")

@@ -42,26 +47,19 @@ func TestRowsUnmarshalFailure(t *testing.T) {

 	// Invalid tag
 	f("put aaa 123 4.5 foo")
-	f("put aaa 123 4.5 =")
-	f("put aaa 123 4.5 =foo")
-	f("put aaa 123 4.5 =foo a=b")
 }

 func TestRowsUnmarshalSuccess(t *testing.T) {
 	f := func(s string, rowsExpected *Rows) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}

 		// Try unmarshaling again
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}
@@ -74,7 +72,9 @@ func TestRowsUnmarshalSuccess(t *testing.T) {

 	// Empty line
 	f("", &Rows{})
+	f("\r", &Rows{})
 	f("\n\n", &Rows{})
+	f("\n\r\n", &Rows{})

 	// Single line
 	f("put foobar 789 -123.456 a=b", &Rows{
@@ -88,17 +88,13 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			}},
 		}},
 	})
-	// Empty tag value
-	f("put foobar 789 -123.456 a= b=c", &Rows{
+	// Empty tag
+	f("put foobar 789 -123.456 a= b=c =d", &Rows{
 		Rows: []Row{{
 			Metric:    "foobar",
 			Value:     -123.456,
 			Timestamp: 789,
 			Tags: []Tag{
-				{
-					Key:   "a",
-					Value: "",
-				},
 				{
 					Key:   "b",
 					Value: "c",
@@ -200,4 +196,27 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			},
 		},
 	})
+	// Multi lines with invalid line
+	f("put foo 2 0.3 a=b\naaa bbb\nput bar.baz 43 0.34 a=b\n", &Rows{
+		Rows: []Row{
+			{
+				Metric:    "foo",
+				Value:     0.3,
+				Timestamp: 2,
+				Tags: []Tag{{
+					Key:   "a",
+					Value: "b",
+				}},
+			},
+			{
+				Metric:    "bar.baz",
+				Value:     0.34,
+				Timestamp: 43,
+				Tags: []Tag{{
+					Key:   "a",
+					Value: "b",
+				}},
+			},
+		},
+	})
 }
--- a/app/vminsert/opentsdb/parser_timing_test.go
+++ b/app/vminsert/opentsdb/parser_timing_test.go
@@ -6,18 +6,19 @@ import (
 )

 func BenchmarkRowsUnmarshal(b *testing.B) {
-	s := `cpu.usage_user 1234556768 1.23 a=b
-cpu.usage_system 1234556768 23.344 a=b
-cpu.usage_iowait 1234556769 3.3443 a=b
-cpu.usage_irq 1234556768 0.34432 a=b
+	s := `put cpu.usage_user 1234556768 1.23 a=b
+put cpu.usage_system 1234556768 23.344 a=b
+put cpu.usage_iowait 1234556769 3.3443 a=b
+put cpu.usage_irq 1234556768 0.34432 a=b
 `
 	b.SetBytes(int64(len(s)))
 	b.ReportAllocs()
 	b.RunParallel(func(pb *testing.PB) {
 		var rows Rows
 		for pb.Next() {
-			if err := rows.Unmarshal(s); err != nil {
-				panic(fmt.Errorf("cannot unmarshal %q: %s", s, err))
+			rows.Unmarshal(s)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of parsed rows; got %d; want 4", len(rows.Rows)))
 			}
 		}
 	})
--- a/app/vminsert/opentsdb/request_handler.go
+++ b/app/vminsert/opentsdb/request_handler.go
@@ -1,7 +1,6 @@
 package opentsdb

 import (
-	"bytes"
 	"fmt"
 	"io"
 	"net"
@@ -11,110 +10,124 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
 	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/fastjson/fastfloat"
 )

-var rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="opentsdb"}`)
+var (
+	rowsInserted  = tenantmetrics.NewCounterMap(`vm_rows_inserted_total{type="opentsdb"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="opentsdb"}`)
+)

 // insertHandler processes remote write for OpenTSDB put protocol.
 //
 // See http://opentsdb.net/docs/build/html/api_telnet/put.html
-func insertHandler(r io.Reader) error {
+func insertHandler(at *auth.Token, r io.Reader) error {
 	return concurrencylimiter.Do(func() error {
-		return insertHandlerInternal(r)
+		return insertHandlerInternal(at, r)
 	})
 }

-func insertHandlerInternal(r io.Reader) error {
+func insertHandlerInternal(at *auth.Token, r io.Reader) error {
 	ctx := getPushCtx()
 	defer putPushCtx(ctx)
 	for ctx.Read(r) {
-		if err := ctx.InsertRows(); err != nil {
+		if err := ctx.InsertRows(at); err != nil {
 			return err
 		}
 	}
 	return ctx.Error()
 }

-func (ctx *pushCtx) InsertRows() error {
+func (ctx *pushCtx) InsertRows(at *auth.Token) error {
 	rows := ctx.Rows.Rows
 	ic := &ctx.Common
-	ic.Reset(len(rows))
+	ic.Reset()
+	atCopy := *at
 	for i := range rows {
 		r := &rows[i]
 		ic.Labels = ic.Labels[:0]
 		ic.AddLabel("", r.Metric)
 		for j := range r.Tags {
 			tag := &r.Tags[j]
+			if atCopy.AccountID == 0 {
+				// Multi-tenancy support via custom tags.
+				// Do not allow overriding AccountID and ProjectID from atCopy for security reasons.
+				if tag.Key == "VictoriaMetrics_AccountID" {
+					atCopy.AccountID = uint32(fastfloat.ParseUint64BestEffort(tag.Value))
+				}
+				if atCopy.ProjectID == 0 && tag.Key == "VictoriaMetrics_ProjectID" {
+					atCopy.ProjectID = uint32(fastfloat.ParseUint64BestEffort(tag.Value))
+				}
+			}
 			ic.AddLabel(tag.Key, tag.Value)
 		}
-		ic.WriteDataPoint(nil, ic.Labels, r.Timestamp, r.Value)
+		if err := ic.WriteDataPoint(&atCopy, ic.Labels, r.Timestamp, r.Value); err != nil {
+			return err
+		}
 	}
-	rowsInserted.Add(len(rows))
+	// Assume that all the rows for a single connection belong to the same (AccountID, ProjectID).
+	rowsInserted.Get(&atCopy).Add(len(rows))
+	rowsPerInsert.Update(float64(len(rows)))
 	return ic.FlushBufs()
 }

-const maxReadPacketSize = 4 * 1024 * 1024
-
 const flushTimeout = 3 * time.Second

 func (ctx *pushCtx) Read(r io.Reader) bool {
-	opentsdbReadCalls.Inc()
+	readCalls.Inc()
 	if ctx.err != nil {
 		return false
 	}
 	if c, ok := r.(net.Conn); ok {
 		if err := c.SetReadDeadline(time.Now().Add(flushTimeout)); err != nil {
-			opentsdbReadErrors.Inc()
+			readErrors.Inc()
 			ctx.err = fmt.Errorf("cannot set read deadline: %s", err)
 			return false
 		}
 	}
-	lr := io.LimitReader(r, maxReadPacketSize)
-	ctx.reqBuf.Reset()
-	ctx.reqBuf.B = append(ctx.reqBuf.B[:0], ctx.tailBuf...)
-	n, err := io.CopyBuffer(&ctx.reqBuf, lr, ctx.copyBuf[:])
-	if err != nil {
-		if ne, ok := err.(net.Error); ok && ne.Timeout() {
+	ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlock(r, ctx.reqBuf, ctx.tailBuf)
+	if ctx.err != nil {
+		if ne, ok := ctx.err.(net.Error); ok && ne.Timeout() {
 			// Flush the read data on timeout and try reading again.
+			ctx.err = nil
 		} else {
-			opentsdbReadErrors.Inc()
-			ctx.err = fmt.Errorf("cannot read OpenTSDB put protocol data: %s", err)
+			if ctx.err != io.EOF {
+				readErrors.Inc()
+				ctx.err = fmt.Errorf("cannot read OpenTSDB put protocol data: %s", ctx.err)
+			}
 			return false
 		}
-	} else if n < maxReadPacketSize {
-		// Mark the end of stream.
-		ctx.err = io.EOF
 	}
+	ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))

-	// Parse all the rows until the last newline in ctx.reqBuf.B
-	nn := bytes.LastIndexByte(ctx.reqBuf.B, '\n')
-	ctx.tailBuf = ctx.tailBuf[:0]
-	if nn >= 0 {
-		ctx.tailBuf = append(ctx.tailBuf[:0], ctx.reqBuf.B[nn+1:]...)
-		ctx.reqBuf.B = ctx.reqBuf.B[:nn]
-	}
-	if err = ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf.B)); err != nil {
-		opentsdbUnmarshalErrors.Inc()
-		ctx.err = fmt.Errorf("cannot unmarshal OpenTSDB put protocol data with size %d: %s", len(ctx.reqBuf.B), err)
-		return false
+	// Fill in missing timestamps
+	currentTimestamp := time.Now().Unix()
+	rows := ctx.Rows.Rows
+	for i := range rows {
+		r := &rows[i]
+		if r.Timestamp == 0 {
+			r.Timestamp = currentTimestamp
+		}
 	}

 	// Convert timestamps from seconds to milliseconds
-	for i := range ctx.Rows.Rows {
-		ctx.Rows.Rows[i].Timestamp *= 1e3
+	for i := range rows {
+		rows[i].Timestamp *= 1e3
 	}
 	return true
 }

 type pushCtx struct {
 	Rows   Rows
-	Common common.InsertCtx
+	Common netstorage.InsertCtx

-	reqBuf  bytesutil.ByteBuffer
+	reqBuf  []byte
 	tailBuf []byte
-	copyBuf [16 * 1024]byte

 	err error
 }
@@ -128,17 +141,16 @@ func (ctx *pushCtx) Error() error {

 func (ctx *pushCtx) reset() {
 	ctx.Rows.Reset()
-	ctx.Common.Reset(0)
-	ctx.reqBuf.Reset()
+	ctx.Common.Reset()
+	ctx.reqBuf = ctx.reqBuf[:0]
 	ctx.tailBuf = ctx.tailBuf[:0]

 	ctx.err = nil
 }

 var (
-	opentsdbReadCalls       = metrics.NewCounter(`vm_read_calls_total{name="opentsdb"}`)
-	opentsdbReadErrors      = metrics.NewCounter(`vm_read_errors_total{name="opentsdb"}`)
-	opentsdbUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="opentsdb"}`)
+	readCalls  = metrics.NewCounter(`vm_read_calls_total{name="opentsdb"}`)
+	readErrors = metrics.NewCounter(`vm_read_errors_total{name="opentsdb"}`)
 )

 func getPushCtx() *pushCtx {
--- a/app/vminsert/opentsdb/server.go
+++ b/app/vminsert/opentsdb/server.go
@@ -7,8 +7,10 @@ import (
 	"sync"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
 	"github.com/VictoriaMetrics/metrics"
 )

@@ -23,7 +25,7 @@ var (
 // Serve starts OpenTSDB collector on the given addr.
 func Serve(addr string) {
 	logger.Infof("starting TCP OpenTSDB collector at %q", addr)
-	lnTCP, err := net.Listen("tcp4", addr)
+	lnTCP, err := netutil.NewTCPListener("opentsdb", addr)
 	if err != nil {
 		logger.Fatalf("cannot start TCP OpenTSDB collector at %q: %s", addr, err)
 	}
@@ -70,7 +72,8 @@ func serveTCP(ln net.Listener) {
 		}
 		go func() {
 			writeRequestsTCP.Inc()
-			if err := insertHandler(c); err != nil {
+			var at auth.Token // TODO: properly initialize the auth token
+			if err := insertHandler(&at, c); err != nil {
 				writeErrorsTCP.Inc()
 				logger.Errorf("error in TCP OpenTSDB conn %q<->%q: %s", c.LocalAddr(), c.RemoteAddr(), err)
 			}
@@ -88,6 +91,7 @@ func serveUDP(ln net.PacketConn) {
 			defer wg.Done()
 			var bb bytesutil.ByteBuffer
 			bb.B = bytesutil.Resize(bb.B, 64*1024)
+			var at auth.Token // TODO: properly initialize the auth token
 			for {
 				bb.Reset()
 				bb.B = bb.B[:cap(bb.B)]
@@ -108,7 +112,7 @@ func serveUDP(ln net.PacketConn) {
 				}
 				bb.B = bb.B[:n]
 				writeRequestsUDP.Inc()
-				if err := insertHandler(bb.NewReader()); err != nil {
+				if err := insertHandler(&at, bb.NewReader()); err != nil {
 					writeErrorsUDP.Inc()
 					logger.Errorf("error in UDP OpenTSDB conn %q<->%q: %s", ln.LocalAddr(), addr, err)
 					continue
--- a/app/vminsert/opentsdbhttp/parser.go
+++ b/app/vminsert/opentsdbhttp/parser.go
@@ -0,0 +1,198 @@
+package opentsdbhttp
+
+import (
+	"fmt"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/fastjson"
+	"github.com/valyala/fastjson/fastfloat"
+)
+
+// Rows contains parsed OpenTSDB rows.
+type Rows struct {
+	Rows []Row
+
+	tagsPool []Tag
+}
+
+// Reset resets rs.
+func (rs *Rows) Reset() {
+	// Release references to objects, so they can be GC'ed.
+	for i := range rs.Rows {
+		rs.Rows[i].reset()
+	}
+	rs.Rows = rs.Rows[:0]
+
+	for i := range rs.tagsPool {
+		rs.tagsPool[i].reset()
+	}
+	rs.tagsPool = rs.tagsPool[:0]
+}
+
+// Unmarshal unmarshals OpenTSDB rows from av.
+//
+// See http://opentsdb.net/docs/build/html/api_http/put.html
+//
+// s must be unchanged until rs is in use.
+func (rs *Rows) Unmarshal(av *fastjson.Value) {
+	rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], av, rs.tagsPool[:0])
+}
+
+// Row is a single OpenTSDB row.
+type Row struct {
+	Metric    string
+	Tags      []Tag
+	Value     float64
+	Timestamp int64
+}
+
+func (r *Row) reset() {
+	r.Metric = ""
+	r.Tags = nil
+	r.Value = 0
+	r.Timestamp = 0
+}
+
+func (r *Row) unmarshal(o *fastjson.Value, tagsPool []Tag) ([]Tag, error) {
+	r.reset()
+	m := o.GetStringBytes("metric")
+	if len(m) == 0 {
+		return tagsPool, fmt.Errorf("missing `metric` in %s", o)
+	}
+	r.Metric = bytesutil.ToUnsafeString(m)
+
+	rawTs := o.Get("timestamp")
+	if rawTs != nil {
+		ts, err := getFloat64(rawTs)
+		if err != nil {
+			return tagsPool, fmt.Errorf("invalid `timestamp` in %s: %s", o, err)
+		}
+		r.Timestamp = int64(ts)
+	} else {
+		// Allow missing timestamp. It is automatically populated
+		// with the current time in this case.
+		r.Timestamp = 0
+	}
+
+	rawV := o.Get("value")
+	if rawV == nil {
+		return tagsPool, fmt.Errorf("missing `value` in %s", o)
+	}
+	v, err := getFloat64(rawV)
+	if err != nil {
+		return tagsPool, fmt.Errorf("invalid `value` in %s: %s", o, err)
+	}
+	r.Value = v
+
+	vt := o.Get("tags")
+	if vt == nil {
+		// Allow empty tags.
+		return tagsPool, nil
+	}
+	rawTags, err := vt.Object()
+	if err != nil {
+		return tagsPool, fmt.Errorf("invalid `tags` in %s: %s", o, err)
+	}
+
+	tagsStart := len(tagsPool)
+	tagsPool, err = unmarshalTags(tagsPool, rawTags)
+	if err != nil {
+		return tagsPool, fmt.Errorf("cannot parse tags %s: %s", rawTags, err)
+	}
+	tags := tagsPool[tagsStart:]
+	r.Tags = tags[:len(tags):len(tags)]
+	return tagsPool, nil
+}
+
+func getFloat64(v *fastjson.Value) (float64, error) {
+	switch v.Type() {
+	case fastjson.TypeNumber:
+		return v.Float64()
+	case fastjson.TypeString:
+		vStr, _ := v.StringBytes()
+		vFloat := fastfloat.ParseBestEffort(bytesutil.ToUnsafeString(vStr))
+		if vFloat == 0 && string(vStr) != "0" && string(vStr) != "0.0" {
+			return 0, fmt.Errorf("invalid float64 value: %q", vStr)
+		}
+		return vFloat, nil
+	default:
+		return 0, fmt.Errorf("value doesn't contain float64; it contains %s", v.Type())
+	}
+}
+
+func unmarshalRows(dst []Row, av *fastjson.Value, tagsPool []Tag) ([]Row, []Tag) {
+	switch av.Type() {
+	case fastjson.TypeObject:
+		return unmarshalRow(dst, av, tagsPool)
+	case fastjson.TypeArray:
+		a, _ := av.Array()
+		for _, o := range a {
+			dst, tagsPool = unmarshalRow(dst, o, tagsPool)
+		}
+		return dst, tagsPool
+	default:
+		logger.Errorf("OpenTSDB JSON must be either object or array; got %s; body=%s", av.Type(), av)
+		invalidLines.Inc()
+		return dst, tagsPool
+	}
+}
+
+func unmarshalRow(dst []Row, o *fastjson.Value, tagsPool []Tag) ([]Row, []Tag) {
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	var err error
+	tagsPool, err = r.unmarshal(o, tagsPool)
+	if err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal OpenTSDB object %s: %s", o, err)
+		invalidLines.Inc()
+	}
+	return dst, tagsPool
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="opentsdb-http"}`)
+
+func unmarshalTags(dst []Tag, o *fastjson.Object) ([]Tag, error) {
+	var err error
+	o.Visit(func(k []byte, v *fastjson.Value) {
+		if v.Type() != fastjson.TypeString {
+			err = fmt.Errorf("tag value must be string; got %s; value=%s", v.Type(), v)
+			return
+		}
+		if len(k) == 0 {
+			// Skip empty tags
+			return
+		}
+		vStr, _ := v.StringBytes()
+		if len(vStr) == 0 {
+			// Skip empty tags
+			return
+		}
+		if cap(dst) > len(dst) {
+			dst = dst[:len(dst)+1]
+		} else {
+			dst = append(dst, Tag{})
+		}
+		tag := &dst[len(dst)-1]
+		tag.Key = bytesutil.ToUnsafeString(k)
+		tag.Value = bytesutil.ToUnsafeString(vStr)
+	})
+	return dst, err
+}
+
+// Tag is an OpenTSDB tag.
+type Tag struct {
+	Key   string
+	Value string
+}
+
+func (t *Tag) reset() {
+	t.Key = ""
+	t.Value = ""
+}
--- a/app/vminsert/opentsdbhttp/parser_test.go
+++ b/app/vminsert/opentsdbhttp/parser_test.go
@@ -0,0 +1,246 @@
+package opentsdbhttp
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestRowsUnmarshalFailure(t *testing.T) {
+	f := func(s string) {
+		t.Helper()
+		var rows Rows
+		p := parserPool.Get()
+		defer parserPool.Put(p)
+		v, err := p.Parse(s)
+		if err != nil {
+			// Expected JSON parser error
+			return
+		}
+		// Verify OpenTSDB body parsing error
+		rows.Unmarshal(v)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
+		}
+		// Try again
+		rows.Unmarshal(v)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
+		}
+	}
+
+	// invalid json
+	f("{g")
+
+	// Invalid json type
+	f(`1`)
+	f(`"foo"`)
+	f(`[1,2]`)
+	f(`null`)
+
+	// Incomplete object
+	f(`{}`)
+	f(`{"metric": "aaa"}`)
+	f(`{"metric": "aaa", "timestamp": 1122}`)
+	f(`{"metric": "aaa", "timestamp": "tststs"}`)
+	f(`{"timestamp": 1122, "value": 33}`)
+	f(`{"value": 33}`)
+	f(`{"value": 33, "tags": {"fooo":"bar"}}`)
+
+	// Invalid value
+	f(`{"metric": "aaa", "timestamp": 1122, "value": "0.0.0"}`)
+
+	// Invalid metric type
+	f(`{"metric": "", "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": ["aaa"], "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": {"aaa":1}, "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": 1, "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
+
+	// Invalid timestamp type
+	f(`{"metric": "aaa", "timestamp": "foobar", "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": "aaa", "timestamp": [1,2], "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": "aaa", "timestamp": {"a":1}, "value": 0.45, "tags": {"foo": "bar"}}`)
+
+	// Invalid value type
+	f(`{"metric": "aaa", "timestamp": 1122, "value": [0,1], "tags": {"foo":"bar"}}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": {"a":1}, "tags": {"foo":"bar"}}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": "foobar", "tags": {"foo":"bar"}}`)
+
+	// Invalid tags type
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": 1}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": [1,2]}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": "foo"}`)
+
+	// Invalid tag value type
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": {"foo": ["bar"]}}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": {"foo": {"bar":"baz"}}}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": {"foo": 1}}`)
+
+	// Invalid multiline
+	f(`[{"metric": "aaa", "timestamp": 1122, "value": "trt", "tags":{"foo":"bar"}}, {"metric": "aaa", "timestamp": [1122], "value": 111}]`)
+}
+
+func TestRowsUnmarshalSuccess(t *testing.T) {
+	f := func(s string, rowsExpected *Rows) {
+		t.Helper()
+		var rows Rows
+
+		p := parserPool.Get()
+		defer parserPool.Put(p)
+		v, err := p.Parse(s)
+		if err != nil {
+			t.Fatalf("cannot parse json %s: %s", s, err)
+		}
+		rows.Unmarshal(v)
+		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
+			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
+		}
+
+		// Try unmarshaling again
+		rows.Unmarshal(v)
+		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
+			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
+		}
+
+		rows.Reset()
+		if len(rows.Rows) != 0 {
+			t.Fatalf("non-empty rows after reset: %+v", rows.Rows)
+		}
+	}
+
+	// Normal line
+	f(`{"metric": "foobar", "timestamp": 789, "value": -123.456, "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 789,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+	// Timestamp as string
+	f(`{"metric": "foobar", "timestamp": "1789", "value": -123.456, "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 1789,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+	// Timestamp as float64 (it is truncated to integer)
+	f(`{"metric": "foobar", "timestamp": 17.89, "value": -123.456, "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 17,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+	// Empty tags
+	f(`{"metric": "foobar", "timestamp": 789, "value": -123.456, "tags": {}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 789,
+			Tags:      nil,
+		}},
+	})
+	// Missing tags
+	f(`{"metric": "foobar", "timestamp": 789, "value": -123.456}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 789,
+			Tags:      nil,
+		}},
+	})
+	// Empty tag value
+	f(`{"metric": "foobar", "timestamp": 123, "value": -123.456, "tags": {"a":"", "b":"c", "": "d"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 123,
+			Tags: []Tag{
+				{
+					Key:   "b",
+					Value: "c",
+				},
+			},
+		}},
+	})
+	// Value as string
+	f(`{"metric": "foobar", "timestamp": 789, "value": "-12.456", "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -12.456,
+			Timestamp: 789,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+	// Missing timestamp
+	f(`{"metric": "foobar", "value": "-12.456", "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -12.456,
+			Timestamp: 0,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+
+	// Multiple tags
+	f(`{"metric": "foo", "value": 1, "timestamp": 2, "tags": {"bar":"baz", "x": "y"}}`, &Rows{
+		Rows: []Row{{
+			Metric: "foo",
+			Tags: []Tag{
+				{
+					Key:   "bar",
+					Value: "baz",
+				},
+				{
+					Key:   "x",
+					Value: "y",
+				},
+			},
+			Value:     1,
+			Timestamp: 2,
+		}},
+	})
+
+	// Multi lines
+	f(`[{"metric": "foo", "value": "0.3", "timestamp": 2, "tags": {"a":"b"}},
+{"metric": "bar.baz", "value": 0.34, "timestamp": 43, "tags": {"a":"b"}}]`, &Rows{
+		Rows: []Row{
+			{
+				Metric:    "foo",
+				Value:     0.3,
+				Timestamp: 2,
+				Tags: []Tag{{
+					Key:   "a",
+					Value: "b",
+				}},
+			},
+			{
+				Metric:    "bar.baz",
+				Value:     0.34,
+				Timestamp: 43,
+				Tags: []Tag{{
+					Key:   "a",
+					Value: "b",
+				}},
+			},
+		},
+	})
+}
--- a/app/vminsert/opentsdbhttp/parser_timing_test.go
+++ b/app/vminsert/opentsdbhttp/parser_timing_test.go
@@ -0,0 +1,33 @@
+package opentsdbhttp
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/valyala/fastjson"
+)
+
+func BenchmarkRowsUnmarshal(b *testing.B) {
+	s := `[{"metric": "cpu.usage_user", "timestamp": 1234556768, "value": 1.23, "tags": {"a":"b", "x": "y"}},
+{"metric": "cpu.usage_system", "timestamp": 1234556768, "value": 23.344, "tags": {"a":"b"}},
+{"metric": "cpu.usage_iowait", "timestamp": 1234556769, "value":3.3443, "tags": {"a":"b"}},
+{"metric": "cpu.usage_irq", "timestamp": 1234556768, "value": 0.34432, "tags": {"a":"b"}}
+]
+`
+	b.SetBytes(int64(len(s)))
+	b.ReportAllocs()
+	b.RunParallel(func(pb *testing.PB) {
+		var rows Rows
+		var p fastjson.Parser
+		for pb.Next() {
+			v, err := p.Parse(s)
+			if err != nil {
+				panic(fmt.Errorf("cannot parse %q: %s", s, err))
+			}
+			rows.Unmarshal(v)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of rows unmarshaled; got %d; want 4", len(rows.Rows)))
+			}
+		}
+	})
+}
--- a/app/vminsert/opentsdbhttp/request_handler.go
+++ b/app/vminsert/opentsdbhttp/request_handler.go
@@ -0,0 +1,155 @@
+package opentsdbhttp
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"runtime"
+	"sync"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
+	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/fastjson"
+)
+
+var (
+	rowsInserted  = tenantmetrics.NewCounterMap(`vm_rows_inserted_total{type="opentsdb-http"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="opentsdb-http"}`)
+
+	readCalls       = metrics.NewCounter(`vm_read_calls_total{name="opentsdb-http"}`)
+	readErrors      = metrics.NewCounter(`vm_read_errors_total{name="opentsdb-http"}`)
+	unmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="opentsdb-http"}`)
+)
+
+// insertHandler processes HTTP OpenTSDB put requests.
+// See http://opentsdb.net/docs/build/html/api_http/put.html
+func insertHandler(at *auth.Token, req *http.Request, maxSize int64) error {
+	return concurrencylimiter.Do(func() error {
+		return insertHandlerInternal(at, req, maxSize)
+	})
+}
+
+func insertHandlerInternal(at *auth.Token, req *http.Request, maxSize int64) error {
+	readCalls.Inc()
+
+	r := req.Body
+	if req.Header.Get("Content-Encoding") == "gzip" {
+		zr, err := common.GetGzipReader(r)
+		if err != nil {
+			readErrors.Inc()
+			return fmt.Errorf("cannot read gzipped http protocol data: %s", err)
+		}
+		defer common.PutGzipReader(zr)
+		r = zr
+	}
+
+	ctx := getPushCtx()
+	defer putPushCtx(ctx)
+
+	// Read the request in ctx.reqBuf
+	lr := io.LimitReader(r, maxSize+1)
+	reqLen, err := ctx.reqBuf.ReadFrom(lr)
+	if err != nil {
+		readErrors.Inc()
+		return fmt.Errorf("cannot read HTTP OpenTSDB request: %s", err)
+	}
+	if reqLen > maxSize {
+		readErrors.Inc()
+		return fmt.Errorf("too big HTTP OpenTSDB request; mustn't exceed %d bytes", maxSize)
+	}
+
+	// Unmarshal the request to ctx.Rows
+	p := parserPool.Get()
+	defer parserPool.Put(p)
+	v, err := p.ParseBytes(ctx.reqBuf.B)
+	if err != nil {
+		unmarshalErrors.Inc()
+		return fmt.Errorf("cannot parse HTTP OpenTSDB json: %s", err)
+	}
+	ctx.Rows.Unmarshal(v)
+
+	// Fill in missing timestamps
+	currentTimestamp := time.Now().Unix()
+	rows := ctx.Rows.Rows
+	for i := range rows {
+		r := &rows[i]
+		if r.Timestamp == 0 {
+			r.Timestamp = currentTimestamp
+		}
+	}
+
+	// Convert timestamps in seconds to milliseconds if needed.
+	// See http://opentsdb.net/docs/javadoc/net/opentsdb/core/Const.html#SECOND_MASK
+	for i := range rows {
+		r := &rows[i]
+		if r.Timestamp&secondMask == 0 {
+			r.Timestamp *= 1e3
+		}
+	}
+
+	// Insert ctx.Rows to db.
+	ic := &ctx.Common
+	ic.Reset()
+	for i := range rows {
+		r := &rows[i]
+		ic.Labels = ic.Labels[:0]
+		ic.AddLabel("", r.Metric)
+		for j := range r.Tags {
+			tag := &r.Tags[j]
+			ic.AddLabel(tag.Key, tag.Value)
+		}
+		if err := ic.WriteDataPoint(at, ic.Labels, r.Timestamp, r.Value); err != nil {
+			return err
+		}
+	}
+	rowsInserted.Get(at).Add(len(rows))
+	rowsPerInsert.Update(float64(len(rows)))
+	return ic.FlushBufs()
+}
+
+const secondMask int64 = 0x7FFFFFFF00000000
+
+var parserPool fastjson.ParserPool
+
+type pushCtx struct {
+	Rows   Rows
+	Common netstorage.InsertCtx
+
+	reqBuf bytesutil.ByteBuffer
+}
+
+func (ctx *pushCtx) reset() {
+	ctx.Rows.Reset()
+	ctx.Common.Reset()
+	ctx.reqBuf.Reset()
+}
+
+func getPushCtx() *pushCtx {
+	select {
+	case ctx := <-pushCtxPoolCh:
+		return ctx
+	default:
+		if v := pushCtxPool.Get(); v != nil {
+			return v.(*pushCtx)
+		}
+		return &pushCtx{}
+	}
+}
+
+func putPushCtx(ctx *pushCtx) {
+	ctx.reset()
+	select {
+	case pushCtxPoolCh <- ctx:
+	default:
+		pushCtxPool.Put(ctx)
+	}
+}
+
+var pushCtxPool sync.Pool
+var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))
--- a/app/vminsert/opentsdbhttp/server.go
+++ b/app/vminsert/opentsdbhttp/server.go
@@ -0,0 +1,87 @@
+package opentsdbhttp
+
+import (
+	"context"
+	"net/http"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
+)
+
+var (
+	writeRequests = metrics.NewCounter(`vm_http_requests_total{path="/insert/{}/api/put", protocol="opentsdb-http"}`)
+	writeErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/insert/{}/api/put", protocol="opentsdb-http"}`)
+)
+
+var (
+	httpServer     *http.Server
+	httpAddr       string
+	maxRequestSize int64
+)
+
+// Serve starts HTTP OpenTSDB server on the given addr.
+func Serve(addr string, maxReqSize int64) {
+	logger.Infof("starting HTTP OpenTSDB server at %q", addr)
+	httpAddr = addr
+	maxRequestSize = maxReqSize
+	httpServer = &http.Server{
+		Addr:         addr,
+		Handler:      http.HandlerFunc(requestHandler),
+		ReadTimeout:  30 * time.Second,
+		WriteTimeout: 10 * time.Second,
+	}
+	go func() {
+		err := httpServer.ListenAndServe()
+		if err == http.ErrServerClosed {
+			return
+		}
+		if err != nil {
+			logger.Fatalf("error serving HTTP OpenTSDB: %s", err)
+		}
+	}()
+}
+
+// requestHandler handles HTTP OpenTSDB insert request.
+func requestHandler(w http.ResponseWriter, r *http.Request) {
+	p, err := httpserver.ParsePath(r.URL.Path)
+	if err != nil {
+		httpserver.Errorf(w, "cannot parse path %q: %s", r.URL.Path, err)
+		return
+	}
+	if p.Prefix != "insert" {
+		// This is not our link.
+		httpserver.Errorf(w, "unexpected path requested on HTTP OpenTSDB server: %q", r.URL.Path)
+		return
+	}
+	at, err := auth.NewToken(p.AuthToken)
+	if err != nil {
+		httpserver.Errorf(w, "auth error: %s", err)
+		return
+	}
+
+	switch p.Suffix {
+	case "api/put":
+		writeRequests.Inc()
+		if err := insertHandler(at, r, maxRequestSize); err != nil {
+			writeErrors.Inc()
+			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
+			return
+		}
+		w.WriteHeader(http.StatusNoContent)
+	default:
+		httpserver.Errorf(w, "unexpected path requested on HTTP OpenTSDB server: %q", r.URL.Path)
+	}
+}
+
+// Stop stops HTTP OpenTSDB server.
+func Stop() {
+	logger.Infof("stopping HTTP OpenTSDB server at %q...", httpAddr)
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+	if err := httpServer.Shutdown(ctx); err != nil {
+		logger.Fatalf("cannot close HTTP OpenTSDB server: %s", err)
+	}
+}
--- a/app/vminsert/prometheus/request_handler.go
+++ b/app/vminsert/prometheus/request_handler.go
@@ -6,55 +6,67 @@ import (
 	"runtime"
 	"sync"

-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
 	"github.com/VictoriaMetrics/metrics"
 )

-var rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="prometheus"}`)
+var (
+	rowsInserted  = tenantmetrics.NewCounterMap(`vm_rows_inserted_total{type="prometheus"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="prometheus"}`)
+)

 // InsertHandler processes remote write for prometheus.
-func InsertHandler(r *http.Request, maxSize int64) error {
+func InsertHandler(at *auth.Token, r *http.Request, maxSize int64) error {
 	return concurrencylimiter.Do(func() error {
-		return insertHandlerInternal(r, maxSize)
+		return insertHandlerInternal(at, r, maxSize)
 	})
 }

-func insertHandlerInternal(r *http.Request, maxSize int64) error {
+func insertHandlerInternal(at *auth.Token, r *http.Request, maxSize int64) error {
 	ctx := getPushCtx()
 	defer putPushCtx(ctx)
 	if err := ctx.Read(r, maxSize); err != nil {
 		return err
 	}
-	timeseries := ctx.req.Timeseries
-	rowsLen := 0
-	for i := range timeseries {
-		rowsLen += len(timeseries[i].Samples)
-	}
+
 	ic := &ctx.Common
-	ic.Reset(rowsLen)
+	ic.Reset()
+	timeseries := ctx.req.Timeseries
+	rowsTotal := 0
 	for i := range timeseries {
 		ts := &timeseries[i]
-		var metricNameRaw []byte
+		storageNodeIdx := ic.GetStorageNodeIdx(at, ts.Labels)
+		ic.MetricNameBuf = ic.MetricNameBuf[:0]
 		for i := range ts.Samples {
 			r := &ts.Samples[i]
-			metricNameRaw = ic.WriteDataPointExt(metricNameRaw, ts.Labels, r.Timestamp, r.Value)
+			if len(ic.MetricNameBuf) == 0 {
+				ic.MetricNameBuf = storage.MarshalMetricNameRaw(ic.MetricNameBuf[:0], at.AccountID, at.ProjectID, ts.Labels)
+			}
+			if err := ic.WriteDataPointExt(at, storageNodeIdx, ic.MetricNameBuf, r.Timestamp, r.Value); err != nil {
+				return err
+			}
 		}
-		rowsInserted.Add(len(ts.Samples))
+		rowsTotal += len(ts.Samples)
 	}
+	rowsInserted.Get(at).Add(rowsTotal)
+	rowsPerInsert.Update(float64(rowsTotal))
 	return ic.FlushBufs()
 }

 type pushCtx struct {
-	Common common.InsertCtx
+	Common netstorage.InsertCtx

 	req    prompb.WriteRequest
 	reqBuf []byte
 }

 func (ctx *pushCtx) reset() {
-	ctx.Common.Reset(0)
+	ctx.Common.Reset()
 	ctx.req.Reset()
 	ctx.reqBuf = ctx.reqBuf[:0]
 }
--- a/app/vminsert/vmimport/parser.go
+++ b/app/vminsert/vmimport/parser.go
@@ -0,0 +1,202 @@
+package vmimport
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/fastjson"
+)
+
+// Rows contains parsed rows from `/api/v1/import` request.
+type Rows struct {
+	Rows []Row
+
+	tu tagsUnmarshaler
+}
+
+// Reset resets rs.
+func (rs *Rows) Reset() {
+	for i := range rs.Rows {
+		rs.Rows[i].reset()
+	}
+	rs.Rows = rs.Rows[:0]
+
+	rs.tu.reset()
+}
+
+// Unmarshal unmarshals influx line protocol rows from s.
+//
+// See https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/
+//
+// s must be unchanged until rs is in use.
+func (rs *Rows) Unmarshal(s string) {
+	rs.tu.reset()
+	rs.Rows = unmarshalRows(rs.Rows[:0], s, &rs.tu)
+}
+
+// Row is a single row from `/api/v1/import` request.
+type Row struct {
+	Tags       []Tag
+	Values     []float64
+	Timestamps []int64
+}
+
+func (r *Row) reset() {
+	r.Tags = nil
+	r.Values = r.Values[:0]
+	r.Timestamps = r.Timestamps[:0]
+}
+
+func (r *Row) unmarshal(s string, tu *tagsUnmarshaler) error {
+	r.reset()
+	v, err := tu.p.Parse(s)
+	if err != nil {
+		return fmt.Errorf("cannot parse json line: %s", err)
+	}
+
+	// Unmarshal tags
+	metric := v.GetObject("metric")
+	if metric == nil {
+		return fmt.Errorf("missing `metric` object")
+	}
+	tagsStart := len(tu.tagsPool)
+	if err := tu.unmarshalTags(metric); err != nil {
+		return fmt.Errorf("cannot unmarshal `metric`: %s", err)
+	}
+	tags := tu.tagsPool[tagsStart:]
+	r.Tags = tags[:len(tags):len(tags)]
+	if len(r.Tags) == 0 {
+		return fmt.Errorf("missing tags")
+	}
+
+	// Unmarshal values
+	values := v.GetArray("values")
+	if len(values) == 0 {
+		return fmt.Errorf("missing `values` array")
+	}
+	for i, v := range values {
+		f, err := v.Float64()
+		if err != nil {
+			return fmt.Errorf("cannot unmarshal value at position %d: %s", i, err)
+		}
+		r.Values = append(r.Values, f)
+	}
+
+	// Unmarshal timestamps
+	timestamps := v.GetArray("timestamps")
+	if len(timestamps) == 0 {
+		return fmt.Errorf("missing `timestamps` array")
+	}
+	for i, v := range timestamps {
+		ts, err := v.Int64()
+		if err != nil {
+			return fmt.Errorf("cannot unmarshal timestamp at position %d: %s", i, err)
+		}
+		r.Timestamps = append(r.Timestamps, ts)
+	}
+
+	if len(r.Timestamps) != len(r.Values) {
+		return fmt.Errorf("`timestamps` array size must match `values` array size; got %d; want %d", len(r.Timestamps), len(r.Values))
+	}
+	return nil
+}
+
+// Tag represents `/api/v1/import` tag.
+type Tag struct {
+	Key   []byte
+	Value []byte
+}
+
+func (tag *Tag) reset() {
+	// tag.Key and tag.Value point to tu.bytesPool, so there is no need in keeping these byte slices here.
+	tag.Key = nil
+	tag.Value = nil
+}
+
+type tagsUnmarshaler struct {
+	p         fastjson.Parser
+	tagsPool  []Tag
+	bytesPool []byte
+	err       error
+}
+
+func (tu *tagsUnmarshaler) reset() {
+	for i := range tu.tagsPool {
+		tu.tagsPool[i].reset()
+	}
+	tu.tagsPool = tu.tagsPool[:0]
+
+	tu.bytesPool = tu.bytesPool[:0]
+	tu.err = nil
+}
+
+func (tu *tagsUnmarshaler) addTag() *Tag {
+	dst := tu.tagsPool
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Tag{})
+	}
+	tag := &dst[len(dst)-1]
+	tu.tagsPool = dst
+	return tag
+}
+
+func (tu *tagsUnmarshaler) addBytes(b []byte) []byte {
+	bytesPoolLen := len(tu.bytesPool)
+	tu.bytesPool = append(tu.bytesPool, b...)
+	bCopy := tu.bytesPool[bytesPoolLen:]
+	return bCopy[:len(bCopy):len(bCopy)]
+}
+
+func (tu *tagsUnmarshaler) unmarshalTags(o *fastjson.Object) error {
+	tu.err = nil
+	o.Visit(func(key []byte, v *fastjson.Value) {
+		tag := tu.addTag()
+		tag.Key = tu.addBytes(key)
+		sb, err := v.StringBytes()
+		if err != nil && tu.err != nil {
+			tu.err = fmt.Errorf("cannot parse value for tag %q: %s", tag.Key, err)
+		}
+		tag.Value = tu.addBytes(sb)
+	})
+	return tu.err
+}
+
+func unmarshalRows(dst []Row, s string, tu *tagsUnmarshaler) []Row {
+	for len(s) > 0 {
+		n := strings.IndexByte(s, '\n')
+		if n < 0 {
+			// The last line.
+			return unmarshalRow(dst, s, tu)
+		}
+		dst = unmarshalRow(dst, s[:n], tu)
+		s = s[n+1:]
+	}
+	return dst
+}
+
+func unmarshalRow(dst []Row, s string, tu *tagsUnmarshaler) []Row {
+	if len(s) > 0 && s[len(s)-1] == '\r' {
+		s = s[:len(s)-1]
+	}
+	if len(s) == 0 {
+		return dst
+	}
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	if err := r.unmarshal(s, tu); err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal json line %q: %s; skipping it", s, err)
+		invalidLines.Inc()
+	}
+	return dst
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="vmimport"}`)
--- a/app/vminsert/vmimport/parser_test.go
+++ b/app/vminsert/vmimport/parser_test.go
@@ -0,0 +1,216 @@
+package vmimport
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestRowsUnmarshalFailure(t *testing.T) {
+	f := func(s string) {
+		t.Helper()
+		var rows Rows
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("expecting zero rows; got %d rows", len(rows.Rows))
+		}
+
+		// Try again
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("expecting zero rows; got %d rows", len(rows.Rows))
+		}
+	}
+
+	// Invalid json line
+	f("")
+	f("\n")
+	f("foo\n")
+	f("123")
+	f("[1,3]")
+	f("{}")
+	f("[]")
+	f(`{"foo":"bar"}`)
+
+	// Invalid metric
+	f(`{"metric":123,"values":[1,2],"timestamps":[3,4]}`)
+	f(`{"metric":[123],"values":[1,2],"timestamps":[3,4]}`)
+	f(`{"metric":[],"values":[1,2],"timestamps":[3,4]}`)
+	f(`{"metric":{},"values":[1,2],"timestamps":[3,4]}`)
+	f(`{"metric":null,"values":[1,2],"timestamps":[3,4]}`)
+	f(`{"values":[1,2],"timestamps":[3,4]}`)
+
+	// Invalid values
+	f(`{"metric":{"foo":"bar"},"values":1,"timestamps":[3,4]}`)
+	f(`{"metric":{"foo":"bar"},"values":{"x":1},"timestamps":[3,4]}`)
+	f(`{"metric":{"foo":"bar"},"values":{"x":1},"timestamps":[3,4]}`)
+	f(`{"metric":{"foo":"bar"},"values":null,"timestamps":[3,4]}`)
+	f(`{"metric":{"foo":"bar"},"timestamps":[3,4]}`)
+
+	// Invalid timestamps
+	f(`{"metric":{"foo":"bar"},"values":[1,2],"timestamps":3}`)
+	f(`{"metric":{"foo":"bar"},"values":[1,2],"timestamps":false}`)
+	f(`{"metric":{"foo":"bar"},"values":[1,2],"timestamps":{}}`)
+	f(`{"metric":{"foo":"bar"},"values":[1,2]}`)
+
+	// values and timestamps count mismatch
+	f(`{"metric":{"foo":"bar"},"values":[],"timestamps":[]}`)
+	f(`{"metric":{"foo":"bar"},"values":[],"timestamps":[1]}`)
+	f(`{"metric":{"foo":"bar"},"values":[2],"timestamps":[]}`)
+	f(`{"metric":{"foo":"bar"},"values":[2],"timestamps":[3,4]}`)
+	f(`{"metric":{"foo":"bar"},"values":[2,3],"timestamps":[4]}`)
+
+	// Garbage after the line
+	f(`{"metric":{"foo":"bar"},"values":[2],"timestamps":[4]}{}`)
+}
+
+func TestRowsUnmarshalSuccess(t *testing.T) {
+	f := func(s string, rowsExpected *Rows) {
+		t.Helper()
+		var rows Rows
+		rows.Unmarshal(s)
+		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
+			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
+		}
+
+		// Try unmarshaling again
+		rows.Unmarshal(s)
+		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
+			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
+		}
+
+		rows.Reset()
+		if len(rows.Rows) != 0 {
+			t.Fatalf("non-empty rows after reset: %+v", rows.Rows)
+		}
+	}
+
+	// Empty line
+	f("", &Rows{})
+	f("\n\n", &Rows{})
+	f("\n\r\n", &Rows{})
+
+	// Single line with a single tag
+	f(`{"metric":{"foo":"bar"},"values":[1.23],"timestamps":[456]}`, &Rows{
+		Rows: []Row{{
+			Tags: []Tag{{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			}},
+			Values:     []float64{1.23},
+			Timestamps: []int64{456},
+		}},
+	})
+
+	// Line with multiple tags
+	f(`{"metric":{"foo":"bar","baz":"xx"},"values":[1.23, -3.21],"timestamps" : [456,789]}`, &Rows{
+		Rows: []Row{{
+			Tags: []Tag{
+				{
+					Key:   []byte("foo"),
+					Value: []byte("bar"),
+				},
+				{
+					Key:   []byte("baz"),
+					Value: []byte("xx"),
+				},
+			},
+			Values:     []float64{1.23, -3.21},
+			Timestamps: []int64{456, 789},
+		}},
+	})
+
+	// Multiple lines
+	f(`{"metric":{"foo":"bar","baz":"xx"},"values":[1.23, -3.21],"timestamps" : [456,789]}
+{"metric":{"__name__":"xx"},"values":[34],"timestamps" : [11]}
+`, &Rows{
+		Rows: []Row{
+			{
+				Tags: []Tag{
+					{
+						Key:   []byte("foo"),
+						Value: []byte("bar"),
+					},
+					{
+						Key:   []byte("baz"),
+						Value: []byte("xx"),
+					},
+				},
+				Values:     []float64{1.23, -3.21},
+				Timestamps: []int64{456, 789},
+			},
+			{
+				Tags: []Tag{
+					{
+						Key:   []byte("__name__"),
+						Value: []byte("xx"),
+					},
+				},
+				Values:     []float64{34},
+				Timestamps: []int64{11},
+			},
+		},
+	})
+
+	// Multiple lines with invalid line in the middle.
+	f(`{"metric":{"xfoo":"bar","baz":"xx"},"values":[1.232, -3.21],"timestamps" : [456,7890]}
+garbage here
+{"metric":{"__name__":"xxy"},"values":[34],"timestamps" : [111]}`, &Rows{
+		Rows: []Row{
+			{
+				Tags: []Tag{
+					{
+						Key:   []byte("xfoo"),
+						Value: []byte("bar"),
+					},
+					{
+						Key:   []byte("baz"),
+						Value: []byte("xx"),
+					},
+				},
+				Values:     []float64{1.232, -3.21},
+				Timestamps: []int64{456, 7890},
+			},
+			{
+				Tags: []Tag{
+					{
+						Key:   []byte("__name__"),
+						Value: []byte("xxy"),
+					},
+				},
+				Values:     []float64{34},
+				Timestamps: []int64{111},
+			},
+		},
+	})
+
+	// No newline after the second line.
+	f(`{"metric":{"foo":"bar","baz":"xx"},"values":[1.23, -3.21],"timestamps" : [456,789]}
+{"metric":{"__name__":"xx"},"values":[34],"timestamps" : [11]}`, &Rows{
+		Rows: []Row{
+			{
+				Tags: []Tag{
+					{
+						Key:   []byte("foo"),
+						Value: []byte("bar"),
+					},
+					{
+						Key:   []byte("baz"),
+						Value: []byte("xx"),
+					},
+				},
+				Values:     []float64{1.23, -3.21},
+				Timestamps: []int64{456, 789},
+			},
+			{
+				Tags: []Tag{
+					{
+						Key:   []byte("__name__"),
+						Value: []byte("xx"),
+					},
+				},
+				Values:     []float64{34},
+				Timestamps: []int64{11},
+			},
+		},
+	})
+}
--- a/app/vminsert/vmimport/parser_timing_test.go
+++ b/app/vminsert/vmimport/parser_timing_test.go
@@ -0,0 +1,25 @@
+package vmimport
+
+import (
+	"fmt"
+	"testing"
+)
+
+func BenchmarkRowsUnmarshal(b *testing.B) {
+	s := `{"metric":{"__name__":"up","job":"node_exporter","instance":"localhost:9100"},"values":[0,0,0],"timestamps":[1549891472010,1549891487724,1549891503438]}
+{"metric":{"__name__":"up","job":"prometheus","instance":"localhost:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
+{"metric":{"__name__":"up","job":"node_exporter","instance":"foobar.com:9100"},"values":[0,0,0],"timestamps":[1549891472010,1549891487724,1549891503438]}
+{"metric":{"__name__":"up","job":"prometheus","instance":"xxx.yyy.zzz:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
+`
+	b.SetBytes(int64(len(s)))
+	b.ReportAllocs()
+	b.RunParallel(func(pb *testing.PB) {
+		var rows Rows
+		for pb.Next() {
+			rows.Unmarshal(s)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of rows parsed; got %d; want 4", len(rows.Rows)))
+			}
+		}
+	})
+}
--- a/app/vminsert/vmimport/request_handler.go
+++ b/app/vminsert/vmimport/request_handler.go
@@ -0,0 +1,159 @@
+package vmimport
+
+import (
+	"flag"
+	"fmt"
+	"io"
+	"net/http"
+	"runtime"
+	"sync"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
+	"github.com/VictoriaMetrics/metrics"
+)
+
+var maxLineLen = flag.Int("import.maxLineLen", 100*1024*1024, "The maximum length in bytes of a single line accepted by `/api/v1/import`")
+
+var (
+	rowsInserted  = tenantmetrics.NewCounterMap(`vm_rows_inserted_total{type="vmimport"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="vmimport"}`)
+)
+
+// InsertHandler processes `/api/v1/import` request.
+//
+// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6
+func InsertHandler(at *auth.Token, req *http.Request) error {
+	return concurrencylimiter.Do(func() error {
+		return insertHandlerInternal(at, req)
+	})
+}
+
+func insertHandlerInternal(at *auth.Token, req *http.Request) error {
+	readCalls.Inc()
+
+	r := req.Body
+	if req.Header.Get("Content-Encoding") == "gzip" {
+		zr, err := common.GetGzipReader(r)
+		if err != nil {
+			return fmt.Errorf("cannot read gzipped vmimport data: %s", err)
+		}
+		defer common.PutGzipReader(zr)
+		r = zr
+	}
+
+	ctx := getPushCtx()
+	defer putPushCtx(ctx)
+	for ctx.Read(r) {
+		if err := ctx.InsertRows(at); err != nil {
+			return err
+		}
+	}
+	return ctx.Error()
+}
+
+func (ctx *pushCtx) InsertRows(at *auth.Token) error {
+	rows := ctx.Rows.Rows
+	ic := &ctx.Common
+	ic.Reset()
+	rowsTotal := 0
+	for i := range rows {
+		r := &rows[i]
+		ic.Labels = ic.Labels[:0]
+		for j := range r.Tags {
+			tag := &r.Tags[j]
+			ic.AddLabelBytes(tag.Key, tag.Value)
+		}
+		ic.MetricNameBuf = storage.MarshalMetricNameRaw(ic.MetricNameBuf[:0], at.AccountID, at.ProjectID, ic.Labels)
+		storageNodeIdx := ic.GetStorageNodeIdx(at, ic.Labels)
+		values := r.Values
+		timestamps := r.Timestamps
+		_ = timestamps[len(values)-1]
+		for j, value := range values {
+			timestamp := timestamps[j]
+			if err := ic.WriteDataPointExt(at, storageNodeIdx, ic.MetricNameBuf, timestamp, value); err != nil {
+				return err
+			}
+		}
+		rowsTotal += len(values)
+	}
+	rowsInserted.Get(at).Add(rowsTotal)
+	rowsPerInsert.Update(float64(rowsTotal))
+	return ic.FlushBufs()
+}
+
+func (ctx *pushCtx) Read(r io.Reader) bool {
+	if ctx.err != nil {
+		return false
+	}
+	ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlockExt(r, ctx.reqBuf, ctx.tailBuf, *maxLineLen)
+	if ctx.err != nil {
+		if ctx.err != io.EOF {
+			readErrors.Inc()
+			ctx.err = fmt.Errorf("cannot read vmimport data: %s", ctx.err)
+		}
+		return false
+	}
+	ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))
+	return true
+}
+
+var (
+	readCalls  = metrics.NewCounter(`vm_read_calls_total{name="vmimport"}`)
+	readErrors = metrics.NewCounter(`vm_read_errors_total{name="vmimport"}`)
+)
+
+type pushCtx struct {
+	Rows   Rows
+	Common netstorage.InsertCtx
+
+	reqBuf  []byte
+	tailBuf []byte
+
+	err error
+}
+
+func (ctx *pushCtx) Error() error {
+	if ctx.err == io.EOF {
+		return nil
+	}
+	return ctx.err
+}
+
+func (ctx *pushCtx) reset() {
+	ctx.Rows.Reset()
+	ctx.Common.Reset()
+	ctx.reqBuf = ctx.reqBuf[:0]
+	ctx.tailBuf = ctx.tailBuf[:0]
+
+	ctx.err = nil
+}
+
+func getPushCtx() *pushCtx {
+	select {
+	case ctx := <-pushCtxPoolCh:
+		return ctx
+	default:
+		if v := pushCtxPool.Get(); v != nil {
+			return v.(*pushCtx)
+		}
+		return &pushCtx{}
+	}
+}
+
+func putPushCtx(ctx *pushCtx) {
+	ctx.reset()
+	select {
+	case pushCtxPoolCh <- ctx:
+	default:
+		pushCtxPool.Put(ctx)
+	}
+}
+
+var pushCtxPool sync.Pool
+var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))
--- a/app/vmrestore/Makefile
+++ b/app/vmrestore/Makefile
@@ -0,0 +1,37 @@
+# All these commands must run from repository root.
+
+vmrestore:
+	APP_NAME=vmrestore $(MAKE) app-local
+
+vmrestore-prod:
+	APP_NAME=vmrestore $(MAKE) app-via-docker
+
+package-vmrestore:
+	APP_NAME=vmrestore $(MAKE) package-via-docker
+
+publish-vmrestore:
+	APP_NAME=vmrestore $(MAKE) publish-via-docker
+
+vmrestore-arm:
+	CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-arm ./app/vmrestore
+
+vmrestore-arm-prod:
+	APP_NAME=vmrestore APP_SUFFIX='-arm' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=arm' $(MAKE) app-via-docker
+
+vmrestore-arm64:
+	CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-arm64 ./app/vmrestore
+
+vmrestore-arm64-prod:
+	APP_NAME=vmrestore APP_SUFFIX='-arm64' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=arm64' $(MAKE) app-via-docker
+
+vmrestore-386:
+	CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmrestore-386 ./app/vmrestore
+
+vmrestore-386-prod:
+	APP_NAME=vmrestore APP_SUFFIX='-386' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=386' $(MAKE) app-via-docker
+
+vmrestore-pure:
+	APP_NAME=vmrestore $(MAKE) app-local-pure
+
+vmrestore-pure-prod:
+	APP_NAME=vmrestore APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
--- a/app/vmrestore/README.md
+++ b/app/vmrestore/README.md
@@ -0,0 +1,86 @@
+## vmrestore
+
+`vmrestore` restores data from backups created by [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md).
+VictoriaMetrics `v1.29.0` and newer versions must be used for working with the restored data.
+
+Restore process can be interrupted at any time. It is automatically resumed from the inerruption point
+when restarting `vmrestore` with the same args.
+
+
+### Usage
+
+VictoriaMetrics must be stopped during the restore process.
+
+```
+vmrestore -src=gcs://<bucket>/<path/to/backup> -storageDataPath=<local/path/to/restore>
+
+```
+
+* `<bucket>` is [GCS bucket](https://cloud.google.com/storage/docs/creating-buckets) name.
+* `<path/to/backup>` is the path to backup made with [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md) on GCS bucket.
+* `<local/path/to/restore>` is the path to folder where data will be restored. This folder must be passed
+  to VictoriaMetrics in `-storageDataPath` command-line flag after the restore process is complete.
+
+The original `-storageDataPath` directory may contain old files. They will be susbstituted by the files from backup.
+
+
+### Troubleshooting
+
+* If `vmrestore` eats all the network bandwidth, then set `-maxBytesPerSecond` to the desired value.
+* If `vmrestore` has been interrupted due to temporary error, then just restart it with the same args. It will resume the restore process.
+
+
+### Advanced usage
+
+Run `vmrestore -help` in order to see all the available options:
+
+```
+  -concurrency int
+    	The number of concurrent workers. Higher concurrency may reduce restore duration (default 10)
+  -configFilePath string
+    	Path to file with S3 configs. Configs are loaded from default location if not set.
+    	See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
+  -configProfile string
+    	Profile name for S3 configs (default "default")
+  -credsFilePath string
+    	Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
+    	See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
+  -customS3Endpoint string
+    	Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
+  -loggerLevel string
+    	Minimum level of errors to log. Possible values: INFO, ERROR, FATAL, PANIC (default "INFO")
+  -maxBytesPerSecond int
+    	The maximum download speed. There is no limit if it is set to 0
+  -memory.allowedPercent float
+    	Allowed percent of system memory VictoriaMetrics caches may occupy (default 60)
+  -src string
+    	Source path with backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
+  -storageDataPath string
+    	Destination path where backup must be restored. VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case only missing data is downloaded from backup (default "victoria-metrics-data")
+  -version
+    	Show VictoriaMetrics version
+```
+
+
+### How to build from sources
+
+It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - see `vmutils-*` archives there.
+
+
+#### Development build
+
+1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
+2. Run `make vmrestore` from the root folder of the repository.
+   It builds `vmrestore` binary and puts it into the `bin` folder.
+
+#### Production build
+
+1. [Install docker](https://docs.docker.com/install/).
+2. Run `make vmrestore-prod` from the root folder of the repository.
+   It builds `vmrestore-prod` binary and puts it into the `bin` folder.
+
+#### Building docker images
+
+Run `make package-vmrestore`. It builds `victoriametrics/vmrestore:<PKG_TAG>` docker image locally.
+`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
+The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmrestore`.
--- a/app/vmrestore/deployment/Dockerfile
+++ b/app/vmrestore/deployment/Dockerfile
@@ -0,0 +1,5 @@
+FROM scratch
+COPY --from=local/certs:1.0.3 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+COPY bin/vmrestore-prod .
+EXPOSE 8428
+ENTRYPOINT ["/vmrestore-prod"]
--- a/app/vmrestore/main.go
+++ b/app/vmrestore/main.go
@@ -0,0 +1,78 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/actions"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/fslocal"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+var (
+	src = flag.String("src", "", "Source path with backup on the remote storage. "+
+		"Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir")
+	storageDataPath = flag.String("storageDataPath", "victoria-metrics-data", "Destination path where backup must be restored. "+
+		"VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case only missing data is downloaded from backup")
+	concurrency       = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce restore duration")
+	maxBytesPerSecond = flag.Int("maxBytesPerSecond", 0, "The maximum download speed. There is no limit if it is set to 0")
+)
+
+func main() {
+	flag.Usage = usage
+	flag.Parse()
+	buildinfo.Init()
+
+	srcFS, err := newSrcFS()
+	if err != nil {
+		logger.Fatalf("%s", err)
+	}
+	dstFS, err := newDstFS()
+	if err != nil {
+		logger.Fatalf("%s", err)
+	}
+	a := &actions.Restore{
+		Concurrency: *concurrency,
+		Src:         srcFS,
+		Dst:         dstFS,
+	}
+	if err := a.Run(); err != nil {
+		logger.Fatalf("cannot restore from backup: %s", err)
+	}
+}
+
+func usage() {
+	const s = `
+vmrestore restores VictoriaMetrics data from backups made by vmbackup.
+
+See the docs at https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md .
+`
+
+	f := flag.CommandLine.Output()
+	fmt.Fprintf(f, "%s\n", s)
+	flag.PrintDefaults()
+}
+
+func newDstFS() (*fslocal.FS, error) {
+	if len(*storageDataPath) == 0 {
+		return nil, fmt.Errorf("`-storageDataPath` cannot be empty")
+	}
+	fs := &fslocal.FS{
+		Dir:               *storageDataPath,
+		MaxBytesPerSecond: *maxBytesPerSecond,
+	}
+	if err := fs.Init(); err != nil {
+		return nil, fmt.Errorf("cannot initialize local fs: %s", err)
+	}
+	return fs, nil
+}
+
+func newSrcFS() (common.RemoteFS, error) {
+	fs, err := actions.NewRemoteFS(*src)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse `-src`=%q: %s", *src, err)
+	}
+	return fs, nil
+}
--- a/app/vmselect/Makefile
+++ b/app/vmselect/Makefile
@@ -0,0 +1,38 @@
+# All these commands must run from repository root.
+
+run-vmselect:
+	mkdir -p vmselect-cache
+	DOCKER_OPTS='-v $(shell pwd)/vmselect-cache:/cache' \
+	APP_NAME=vmselect \
+	ARGS='-storageNode=localhost:8401 -selectNode=localhost:8481 -cacheDataPath=/cache' \
+	$(MAKE) run-via-docker
+
+vmselect:
+	APP_NAME=vmselect $(MAKE) app-local
+
+vmselect-race:
+	APP_NAME=vmselect RACE=-race $(MAKE) app-local
+
+vmselect-prod:
+	APP_NAME=vmselect $(MAKE) app-via-docker
+
+vmselect-prod-race:
+	APP_NAME=vmselect RACE=-race $(MAKE) app-via-docker
+
+vmselect-pure:
+	APP_NAME=vmselect $(MAKE) app-local-pure
+
+vmselect-pure-prod:
+	APP_NAME=vmselect APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
+
+package-vmselect:
+	APP_NAME=vmselect $(MAKE) package-via-docker
+
+package-vmselect-race:
+	APP_NAME=vmselect RACE=-race $(MAKE) package-via-docker
+
+publish-vmselect:
+	APP_NAME=vmselect $(MAKE) publish-via-docker
+
+publish-vmselect-race:
+	APP_NAME=vmselect RACE=-race $(MAKE) publish-via-docker
--- a/app/vmselect/README.md
+++ b/app/vmselect/README.md
@@ -1,2 +1,6 @@
-`vmselect` performs the incoming queries and fetches the required data
-from `vmstorage`.
+`vmselect` performs the following tasks:
+
+- Splits incoming selects to tasks for `vmstorage` nodes and issues these tasks
+  to all the `vmstorage` nodes in the cluster.
+
+- Merges responses from all the `vmstorage` nodes and returns a single response.
--- a/app/vmselect/deployment/Dockerfile
+++ b/app/vmselect/deployment/Dockerfile
@@ -0,0 +1,5 @@
+FROM scratch
+COPY --from=local/certs:1.0.3 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+COPY bin/vmselect-prod .
+EXPOSE 8481
+ENTRYPOINT ["/vmselect-prod"]
--- a/app/vmselect/main.go
+++ b/app/vmselect/main.go
@@ -1,7 +1,8 @@
-package vmselect
+package main

 import (
 	"flag"
+	"fmt"
 	"net/http"
 	"runtime"
 	"strings"
@@ -10,57 +11,150 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/prometheus"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
 	"github.com/VictoriaMetrics/metrics"
 )

 var (
-	deleteAuthKey         = flag.String("deleteAuthKey", "", "authKey for metrics' deletion via /api/v1/admin/tsdb/delete_series")
+	httpListenAddr        = flag.String("httpListenAddr", ":8481", "Address to listen for http connections")
+	cacheDataPath         = flag.String("cacheDataPath", "", "Path to directory for cache files. Cache isn't saved if empty")
 	maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", runtime.GOMAXPROCS(-1)*2, "The maximum number of concurrent search requests. It shouldn't exceed 2*vCPUs for better performance. See also -search.maxQueueDuration")
 	maxQueueDuration      = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached")
+	storageNodes          = flagutil.NewArray("storageNode", "Addresses of vmstorage nodes; usage: -storageNode=vmstorage-host1:8401 -storageNode=vmstorage-host2:8401")
 )

-// Init initializes vmselect
-func Init() {
-	tmpDirPath := *vmstorage.DataPath + "/tmp"
-	fs.RemoveDirContents(tmpDirPath)
-	netstorage.InitTmpBlocksDir(tmpDirPath)
-	promql.InitRollupResultCache(*vmstorage.DataPath + "/cache/rollupResult")
+func main() {
+	flag.Parse()
+	buildinfo.Init()
+	logger.Init()
+
+	logger.Infof("starting netstorage at storageNodes %s", *storageNodes)
+	startTime := time.Now()
+	if len(*storageNodes) == 0 {
+		logger.Fatalf("missing -storageNode arg")
+	}
+	netstorage.InitStorageNodes(*storageNodes)
+	logger.Infof("started netstorage in %s", time.Since(startTime))
+
+	if len(*cacheDataPath) > 0 {
+		tmpDataPath := *cacheDataPath + "/tmp"
+		fs.RemoveDirContents(tmpDataPath)
+		netstorage.InitTmpBlocksDir(tmpDataPath)
+		promql.InitRollupResultCache(*cacheDataPath + "/rollupResult")
+	} else {
+		netstorage.InitTmpBlocksDir("")
+		promql.InitRollupResultCache("")
+	}
 	concurrencyCh = make(chan struct{}, *maxConcurrentRequests)
+
+	go func() {
+		httpserver.Serve(*httpListenAddr, requestHandler)
+	}()
+
+	sig := procutil.WaitForSigterm()
+	logger.Infof("service received signal %s", sig)
+
+	logger.Infof("gracefully shutting down the service at %q", *httpListenAddr)
+	startTime = time.Now()
+	if err := httpserver.Stop(*httpListenAddr); err != nil {
+		logger.Fatalf("cannot stop the service: %s", err)
+	}
+	logger.Infof("successfully shut down the service in %s", time.Since(startTime))
+
+	logger.Infof("shutting down neststorage...")
+	startTime = time.Now()
+	netstorage.Stop()
+	if len(*cacheDataPath) > 0 {
+		promql.StopRollupResultCache()
+	}
+	logger.Infof("successfully stopped netstorage in %s", time.Since(startTime))
+
+	fs.MustStopDirRemover()
+
+	logger.Infof("the vmselect has been stopped")
 }

 var concurrencyCh chan struct{}

-// Stop stops vmselect
-func Stop() {
-	promql.StopRollupResultCache()
-}
+var (
+	concurrencyLimitReached = metrics.NewCounter(`vm_concurrent_select_limit_reached_total`)
+	concurrencyLimitTimeout = metrics.NewCounter(`vm_concurrent_select_limit_timeout_total`)

-// RequestHandler handles remote read API requests for Prometheus
-func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
+	_ = metrics.NewGauge(`vm_concurrent_select_capacity`, func() float64 {
+		return float64(cap(concurrencyCh))
+	})
+	_ = metrics.NewGauge(`vm_concurrent_select_current`, func() float64 {
+		return float64(len(concurrencyCh))
+	})
+)
+
+func requestHandler(w http.ResponseWriter, r *http.Request) bool {
 	// Limit the number of concurrent queries.
-	// Sleep for a second until giving up. This should resolve short bursts in requests.
-	t := time.NewTimer(*maxQueueDuration)
 	select {
 	case concurrencyCh <- struct{}{}:
-		t.Stop()
 		defer func() { <-concurrencyCh }()
-	case <-t.C:
-		httpserver.Errorf(w, "cannot handle more than %d concurrent requests", cap(concurrencyCh))
+	default:
+		// Sleep for a while until giving up. This should resolve short bursts in requests.
+		concurrencyLimitReached.Inc()
+		t := timerpool.Get(*maxQueueDuration)
+		select {
+		case concurrencyCh <- struct{}{}:
+			timerpool.Put(t)
+			defer func() { <-concurrencyCh }()
+		case <-t.C:
+			timerpool.Put(t)
+			concurrencyLimitTimeout.Inc()
+			err := &httpserver.ErrorWithStatusCode{
+				Err:        fmt.Errorf("cannot handle more than %d concurrent requests", cap(concurrencyCh)),
+				StatusCode: http.StatusServiceUnavailable,
+			}
+			httpserver.Errorf(w, "%s", err)
+			return true
+		}
+	}
+
+	path := r.URL.Path
+	if path == "/internal/resetRollupResultCache" {
+		promql.ResetRollupResultCache()
 		return true
 	}

-	path := strings.Replace(r.URL.Path, "//", "/", -1)
-	if strings.HasPrefix(path, "/api/v1/label/") {
-		s := r.URL.Path[len("/api/v1/label/"):]
+	p, err := httpserver.ParsePath(path)
+	if err != nil {
+		httpserver.Errorf(w, "cannot parse path %q: %s", path, err)
+		return true
+	}
+	at, err := auth.NewToken(p.AuthToken)
+	if err != nil {
+		httpserver.Errorf(w, "auth error: %s", err)
+		return true
+	}
+	switch p.Prefix {
+	case "select":
+		return selectHandler(w, r, p, at)
+	case "delete":
+		return deleteHandler(w, r, p, at)
+	default:
+		// This is not our link
+		return false
+	}
+}
+
+func selectHandler(w http.ResponseWriter, r *http.Request, p *httpserver.Path, at *auth.Token) bool {
+	if strings.HasPrefix(p.Suffix, "prometheus/api/v1/label/") {
+		s := p.Suffix[len("prometheus/api/v1/label/"):]
 		if strings.HasSuffix(s, "/values") {
 			labelValuesRequests.Inc()
 			labelName := s[:len(s)-len("/values")]
 			httpserver.EnableCORS(w, r)
-			if err := prometheus.LabelValuesHandler(labelName, w, r); err != nil {
+			if err := prometheus.LabelValuesHandler(at, labelName, w, r); err != nil {
 				labelValuesErrors.Inc()
 				sendPrometheusError(w, r, err)
 				return true
@@ -69,76 +163,99 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 		}
 	}

-	switch path {
-	case "/api/v1/query":
+	switch p.Suffix {
+	case "prometheus/api/v1/query":
 		queryRequests.Inc()
 		httpserver.EnableCORS(w, r)
-		if err := prometheus.QueryHandler(w, r); err != nil {
+		if err := prometheus.QueryHandler(at, w, r); err != nil {
 			queryErrors.Inc()
 			sendPrometheusError(w, r, err)
 			return true
 		}
 		return true
-	case "/api/v1/query_range":
+	case "prometheus/api/v1/query_range":
 		queryRangeRequests.Inc()
 		httpserver.EnableCORS(w, r)
-		if err := prometheus.QueryRangeHandler(w, r); err != nil {
+		if err := prometheus.QueryRangeHandler(at, w, r); err != nil {
 			queryRangeErrors.Inc()
 			sendPrometheusError(w, r, err)
 			return true
 		}
 		return true
-	case "/api/v1/series":
+	case "prometheus/api/v1/series":
 		seriesRequests.Inc()
 		httpserver.EnableCORS(w, r)
-		if err := prometheus.SeriesHandler(w, r); err != nil {
+		if err := prometheus.SeriesHandler(at, w, r); err != nil {
 			seriesErrors.Inc()
 			sendPrometheusError(w, r, err)
 			return true
 		}
 		return true
-	case "/api/v1/series/count":
+	case "prometheus/api/v1/series/count":
 		seriesCountRequests.Inc()
 		httpserver.EnableCORS(w, r)
-		if err := prometheus.SeriesCountHandler(w, r); err != nil {
+		if err := prometheus.SeriesCountHandler(at, w, r); err != nil {
 			seriesCountErrors.Inc()
 			sendPrometheusError(w, r, err)
 			return true
 		}
 		return true
-	case "/api/v1/labels":
+	case "prometheus/api/v1/labels":
 		labelsRequests.Inc()
 		httpserver.EnableCORS(w, r)
-		if err := prometheus.LabelsHandler(w, r); err != nil {
+		if err := prometheus.LabelsHandler(at, w, r); err != nil {
 			labelsErrors.Inc()
 			sendPrometheusError(w, r, err)
 			return true
 		}
 		return true
-	case "/api/v1/export":
+	case "prometheus/api/v1/labels/count":
+		labelsCountRequests.Inc()
+		httpserver.EnableCORS(w, r)
+		if err := prometheus.LabelsCountHandler(at, w, r); err != nil {
+			labelsCountErrors.Inc()
+			sendPrometheusError(w, r, err)
+			return true
+		}
+		return true
+	case "prometheus/api/v1/export":
 		exportRequests.Inc()
-		if err := prometheus.ExportHandler(w, r); err != nil {
+		if err := prometheus.ExportHandler(at, w, r); err != nil {
 			exportErrors.Inc()
 			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
 			return true
 		}
 		return true
-	case "/federate":
+	case "prometheus/federate":
 		federateRequests.Inc()
-		if err := prometheus.FederateHandler(w, r); err != nil {
+		if err := prometheus.FederateHandler(at, w, r); err != nil {
 			federateErrors.Inc()
-			httpserver.Errorf(w, "error int %q: %s", r.URL.Path, err)
+			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
 			return true
 		}
 		return true
-	case "/api/v1/admin/tsdb/delete_series":
+	case "prometheus/api/v1/rules":
+		// Return dumb placeholder
+		rulesRequests.Inc()
+		w.Header().Set("Content-Type", "application/json")
+		fmt.Fprintf(w, "%s", `{"status":"success","data":{"groups":[]}}`)
+		return true
+	case "prometheus/api/v1/alerts":
+		// Return dumb placehloder
+		alertsRequests.Inc()
+		w.Header().Set("Content-Type", "application/json")
+		fmt.Fprintf(w, "%s", `{"status":"success","data":{"alerts":[]}}`)
+		return true
+	default:
+		return false
+	}
+}
+
+func deleteHandler(w http.ResponseWriter, r *http.Request, p *httpserver.Path, at *auth.Token) bool {
+	switch p.Suffix {
+	case "prometheus/api/v1/admin/tsdb/delete_series":
 		deleteRequests.Inc()
-		authKey := r.FormValue("authKey")
-		if authKey != *deleteAuthKey {
-			httpserver.Errorf(w, "invalid authKey %q. It must match the value from -deleteAuthKey command line flag", authKey)
-			return true
-		}
-		if err := prometheus.DeleteHandler(r); err != nil {
+		if err := prometheus.DeleteHandler(at, r); err != nil {
 			deleteErrors.Inc()
 			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
 			return true
@@ -154,36 +271,45 @@ func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
 	logger.Errorf("error in %q: %s", r.URL.Path, err)

 	w.Header().Set("Content-Type", "application/json")
-	statusCode := 422
+	statusCode := http.StatusUnprocessableEntity
+	if esc, ok := err.(*httpserver.ErrorWithStatusCode); ok {
+		statusCode = esc.StatusCode
+	}
 	w.WriteHeader(statusCode)
 	prometheus.WriteErrorResponse(w, statusCode, err)
 }

 var (
-	labelValuesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/label/{}/values"}`)
-	labelValuesErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/label/{}/values"}`)
+	labelValuesRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/label/{}/values"}`)
+	labelValuesErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="select/{}/prometheus/api/v1/label/{}/values"}`)

-	queryRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query"}`)
-	queryErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/query"}`)
+	queryRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/query"}`)
+	queryErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/query"}`)

-	queryRangeRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query_range"}`)
-	queryRangeErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/query_range"}`)
+	queryRangeRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/prometheus/api/v1/query_range"}`)
+	queryRangeErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/query_range"}`)

-	seriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/series"}`)
-	seriesErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/series"}`)
+	seriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/series"}`)
+	seriesErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/series"}`)

-	seriesCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/series/count"}`)
-	seriesCountErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/series/count"}`)
+	seriesCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/series/count"}`)
+	seriesCountErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/series/count"}`)

-	labelsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels"}`)
-	labelsErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels"}`)
+	labelsRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/labels"}`)
+	labelsErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/labels"}`)

-	deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/admin/tsdb/delete_series"}`)
-	deleteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/admin/tsdb/delete_series"}`)
+	labelsCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/labels/count"}`)
+	labelsCountErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/labels/count"}`)

-	exportRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/export"}`)
-	exportErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/export"}`)
+	deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/delete/{}/prometheus/api/v1/admin/tsdb/delete_series"}`)
+	deleteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/delete/{}/prometheus/api/v1/admin/tsdb/delete_series"}`)

-	federateRequests = metrics.NewCounter(`vm_http_requests_total{path="/federate"}`)
-	federateErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/federate"}`)
+	exportRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/export"}`)
+	exportErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/export"}`)
+
+	federateRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/federate"}`)
+	federateErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/federate"}`)
+
+	rulesRequests  = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/rules"}`)
+	alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/alerts"}`)
 )
--- a/app/vmselect/netstorage/fadvise_darwin.go
+++ b/app/vmselect/netstorage/fadvise_darwin.go
@@ -4,6 +4,6 @@ import (
 	"os"
 )

-func mustFadviseRandomRead(f *os.File) {
+func mustFadviseSequentialRead(f *os.File) {
 	// Do nothing :)
 }
--- a/app/vmselect/netstorage/fadvise_freebsd.go
+++ b/app/vmselect/netstorage/fadvise_freebsd.go
@@ -0,0 +1,15 @@
+package netstorage
+
+import (
+	"os"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"golang.org/x/sys/unix"
+)
+
+func mustFadviseSequentialRead(f *os.File) {
+	fd := int(f.Fd())
+	if err := unix.Fadvise(int(fd), 0, 0, unix.FADV_SEQUENTIAL|unix.FADV_WILLNEED); err != nil {
+		logger.Panicf("FATAL: error returned from unix.Fadvise(SEQUENTIAL|WILLNEED): %s", err)
+	}
+}
--- a/app/vmselect/netstorage/fadvise_linux.go
+++ b/app/vmselect/netstorage/fadvise_linux.go
@@ -7,9 +7,9 @@ import (
 	"golang.org/x/sys/unix"
 )

-func mustFadviseRandomRead(f *os.File) {
+func mustFadviseSequentialRead(f *os.File) {
 	fd := int(f.Fd())
-	if err := unix.Fadvise(int(fd), 0, 0, unix.FADV_RANDOM|unix.FADV_WILLNEED); err != nil {
-		logger.Panicf("FATAL: error returned from unix.Fadvise(RANDOM|WILLNEED): %s", err)
+	if err := unix.Fadvise(int(fd), 0, 0, unix.FADV_SEQUENTIAL|unix.FADV_WILLNEED); err != nil {
+		logger.Panicf("FATAL: error returned from unix.Fadvise(SEQUENTIAL|WILLNEED): %s", err)
 	}
 }
--- a/app/vmselect/netstorage/netstorage.go
+++ b/app/vmselect/netstorage/netstorage.go
--- a/app/vmselect/netstorage/tmp_blocks_file.go
+++ b/app/vmselect/netstorage/tmp_blocks_file.go
@@ -1,7 +1,6 @@
 package netstorage

 import (
-	"bufio"
 	"fmt"
 	"io/ioutil"
 	"os"
@@ -10,6 +9,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 )
@@ -22,9 +22,7 @@ func InitTmpBlocksDir(tmpDirPath string) {
 		tmpDirPath = os.TempDir()
 	}
 	tmpBlocksDir = tmpDirPath + "/searchResults"
-	if err := os.RemoveAll(tmpBlocksDir); err != nil {
-		logger.Panicf("FATAL: cannot remove %q: %s", tmpBlocksDir, err)
-	}
+	fs.MustRemoveAll(tmpBlocksDir)
 	if err := fs.MkdirAllIfNotExist(tmpBlocksDir); err != nil {
 		logger.Panicf("FATAL: cannot create %q: %s", tmpBlocksDir, err)
 	}
@@ -32,13 +30,23 @@ func InitTmpBlocksDir(tmpDirPath string) {

 var tmpBlocksDir string

-const maxInmemoryTmpBlocksFile = 512 * 1024
+func maxInmemoryTmpBlocksFile() int {
+	mem := memory.Allowed()
+	maxLen := mem / 1024
+	if maxLen < 64*1024 {
+		return 64 * 1024
+	}
+	return maxLen
+}
+
+var _ = metrics.NewGauge(`vm_tmp_blocks_max_inmemory_file_size_bytes`, func() float64 {
+	return float64(maxInmemoryTmpBlocksFile())
+})

 type tmpBlocksFile struct {
 	buf []byte

-	f  *os.File
-	bw *bufio.Writer
+	f *os.File

 	offset uint64
 }
@@ -46,7 +54,9 @@ type tmpBlocksFile struct {
 func getTmpBlocksFile() *tmpBlocksFile {
 	v := tmpBlocksFilePool.Get()
 	if v == nil {
-		return &tmpBlocksFile{}
+		return &tmpBlocksFile{
+			buf: make([]byte, 0, maxInmemoryTmpBlocksFile()),
+		}
 	}
 	return v.(*tmpBlocksFile)
 }
@@ -55,7 +65,6 @@ func putTmpBlocksFile(tbf *tmpBlocksFile) {
 	tbf.MustClose()
 	tbf.buf = tbf.buf[:0]
 	tbf.f = nil
-	tbf.bw = nil
 	tbf.offset = 0
 	tmpBlocksFilePool.Put(tbf)
 }
@@ -71,51 +80,34 @@ func (addr tmpBlockAddr) String() string {
 	return fmt.Sprintf("offset %d, size %d", addr.offset, addr.size)
 }

-func getBufioWriter(f *os.File) *bufio.Writer {
-	v := bufioWriterPool.Get()
-	if v == nil {
-		return bufio.NewWriterSize(f, maxInmemoryTmpBlocksFile*2)
-	}
-	bw := v.(*bufio.Writer)
-	bw.Reset(f)
-	return bw
-}
-
-func putBufioWriter(bw *bufio.Writer) {
-	bufioWriterPool.Put(bw)
-}
-
-var bufioWriterPool sync.Pool
-
 var tmpBlocksFilesCreated = metrics.NewCounter(`vm_tmp_blocks_files_created_total`)

-// WriteBlock writes b to tbf.
+// WriteBlockData writes b to tbf.
 //
 // It returns errors since the operation may fail on space shortage
 // and this must be handled.
-func (tbf *tmpBlocksFile) WriteBlock(b *storage.Block) (tmpBlockAddr, error) {
+func (tbf *tmpBlocksFile) WriteBlockData(b []byte) (tmpBlockAddr, error) {
 	var addr tmpBlockAddr
 	addr.offset = tbf.offset
-
-	tbfBufLen := len(tbf.buf)
-	tbf.buf = storage.MarshalBlock(tbf.buf, b)
-	addr.size = len(tbf.buf) - tbfBufLen
+	addr.size = len(b)
 	tbf.offset += uint64(addr.size)
-	if tbf.offset <= maxInmemoryTmpBlocksFile {
+	if len(tbf.buf)+len(b) <= cap(tbf.buf) {
+		// Fast path - the data fits tbf.buf
+		tbf.buf = append(tbf.buf, b...)
 		return addr, nil
 	}

+	// Slow path: flush the data from tbf.buf to file.
 	if tbf.f == nil {
 		f, err := ioutil.TempFile(tmpBlocksDir, "")
 		if err != nil {
 			return addr, err
 		}
 		tbf.f = f
-		tbf.bw = getBufioWriter(f)
 		tmpBlocksFilesCreated.Inc()
 	}
-	_, err := tbf.bw.Write(tbf.buf)
-	tbf.buf = tbf.buf[:0]
+	_, err := tbf.f.Write(tbf.buf)
+	tbf.buf = append(tbf.buf[:0], b...)
 	if err != nil {
 		return addr, fmt.Errorf("cannot write block to %q: %s", tbf.f.Name(), err)
 	}
@@ -126,15 +118,18 @@ func (tbf *tmpBlocksFile) Finalize() error {
 	if tbf.f == nil {
 		return nil
 	}
-
-	err := tbf.bw.Flush()
-	putBufioWriter(tbf.bw)
-	tbf.bw = nil
+	if _, err := tbf.f.Write(tbf.buf); err != nil {
+		return fmt.Errorf("cannot flush the remaining %d bytes to tmpBlocksFile: %s", len(tbf.buf), err)
+	}
+	tbf.buf = tbf.buf[:0]
 	if _, err := tbf.f.Seek(0, 0); err != nil {
 		logger.Panicf("FATAL: cannot seek to the start of file: %s", err)
 	}
-	mustFadviseRandomRead(tbf.f)
-	return err
+	// Hint the OS that the file is read almost sequentiallly.
+	// This should reduce the number of disk seeks, which is important
+	// for HDDs.
+	mustFadviseSequentialRead(tbf.f)
+	return nil
 }

 func (tbf *tmpBlocksFile) MustReadBlockAt(dst *storage.Block, addr tmpBlockAddr) {
@@ -169,10 +164,6 @@ func (tbf *tmpBlocksFile) MustClose() {
 	if tbf.f == nil {
 		return
 	}
-	if tbf.bw != nil {
-		putBufioWriter(tbf.bw)
-		tbf.bw = nil
-	}
 	fname := tbf.f.Name()

 	// Remove the file at first, then close it.
--- a/app/vmselect/netstorage/tmp_blocks_file_test.go
+++ b/app/vmselect/netstorage/tmp_blocks_file_test.go
@@ -30,7 +30,7 @@ func TestTmpBlocksFileSerial(t *testing.T) {
 }

 func TestTmpBlocksFileConcurrent(t *testing.T) {
-	concurrency := 4
+	concurrency := 3
 	ch := make(chan error, concurrency)
 	for i := 0; i < concurrency; i++ {
 		go func() {
@@ -69,7 +69,7 @@ func testTmpBlocksFile() error {
 		_, _, _ = b.MarshalData(0, 0)
 		return &b
 	}
-	for _, size := range []int{1024, 16 * 1024, maxInmemoryTmpBlocksFile / 2, 2 * maxInmemoryTmpBlocksFile} {
+	for _, size := range []int{1024, 16 * 1024, maxInmemoryTmpBlocksFile() / 2, 2 * maxInmemoryTmpBlocksFile()} {
 		err := func() error {
 			tbf := getTmpBlocksFile()
 			defer putTmpBlocksFile(tbf)
@@ -77,9 +77,12 @@ func testTmpBlocksFile() error {
 			// Write blocks until their summary size exceeds `size`.
 			var addrs []tmpBlockAddr
 			var blocks []*storage.Block
+			bb := tmpBufPool.Get()
+			defer tmpBufPool.Put(bb)
 			for tbf.offset < uint64(size) {
 				b := createBlock()
-				addr, err := tbf.WriteBlock(b)
+				bb.B = storage.MarshalBlock(bb.B[:0], b)
+				addr, err := tbf.WriteBlockData(bb.B)
 				if err != nil {
 					return fmt.Errorf("cannot write block at offset %d: %s", tbf.offset, err)
 				}
@@ -94,7 +97,7 @@ func testTmpBlocksFile() error {
 			}

 			// Read blocks in parallel and verify them
-			concurrency := 3
+			concurrency := 2
 			workCh := make(chan int)
 			doneCh := make(chan error)
 			for i := 0; i < concurrency; i++ {
--- a/app/vmselect/prometheus/export.qtpl
+++ b/app/vmselect/prometheus/export.qtpl
@@ -13,7 +13,7 @@
 	{% for i, ts := range rs.Timestamps %}
 		{%z= bb.B %}{% space %}
 		{%f= rs.Values[i] %}{% space %}
-		{%d= int(ts) %}{% newline %}
+		{%dl= ts %}{% newline %}
 	{% endfor %}
 	{% code quicktemplate.ReleaseByteBuffer(bb) %}
 {% endfunc %}
@@ -35,10 +35,10 @@
 		"timestamps":[
 			{% if len(rs.Timestamps) > 0 %}
 				{% code timestamps := rs.Timestamps %}
-				{%d= int(timestamps[0]) %}
+				{%dl= timestamps[0] %}
 				{% code timestamps = timestamps[1:] %}
 				{% for _, ts := range timestamps %}
-					,{%d= int(ts) %}
+					,{%dl= ts %}
 				{% endfor %}
 			{% endif %}
 		]
--- a/app/vmselect/prometheus/export.qtpl.go
+++ b/app/vmselect/prometheus/export.qtpl.go
@@ -49,7 +49,7 @@ func StreamExportPrometheusLine(qw422016 *qt422016.Writer, rs *netstorage.Result
 //line app/vmselect/prometheus/export.qtpl:15
 		qw422016.N().S(` `)
 //line app/vmselect/prometheus/export.qtpl:16
-		qw422016.N().D(int(ts))
+		qw422016.N().DL(ts)
 //line app/vmselect/prometheus/export.qtpl:16
 		qw422016.N().S(`
 `)
@@ -129,7 +129,7 @@ func StreamExportJSONLine(qw422016 *qt422016.Writer, rs *netstorage.Result) {
 		timestamps := rs.Timestamps

 //line app/vmselect/prometheus/export.qtpl:38
-		qw422016.N().D(int(timestamps[0]))
+		qw422016.N().DL(timestamps[0])
 //line app/vmselect/prometheus/export.qtpl:39
 		timestamps = timestamps[1:]

@@ -138,7 +138,7 @@ func StreamExportJSONLine(qw422016 *qt422016.Writer, rs *netstorage.Result) {
 //line app/vmselect/prometheus/export.qtpl:40
 			qw422016.N().S(`,`)
 //line app/vmselect/prometheus/export.qtpl:41
-			qw422016.N().D(int(ts))
+			qw422016.N().DL(ts)
 //line app/vmselect/prometheus/export.qtpl:42
 		}
 //line app/vmselect/prometheus/export.qtpl:43
--- a/app/vmselect/prometheus/federate.qtpl
+++ b/app/vmselect/prometheus/federate.qtpl
@@ -10,7 +10,7 @@
 	{% if len(rs.Timestamps) == 0 || len(rs.Values) == 0 %}{% return %}{% endif %}
 	{%= prometheusMetricName(&rs.MetricName) %}{% space %}
 	{%f= rs.Values[len(rs.Values)-1] %}{% space %}
-	{%d= int(rs.Timestamps[len(rs.Timestamps)-1]) %}{% newline %}
+	{%dl= rs.Timestamps[len(rs.Timestamps)-1] %}{% newline %}
 {% endfunc %}

 {% endstripspace %}
--- a/app/vmselect/prometheus/federate.qtpl.go
+++ b/app/vmselect/prometheus/federate.qtpl.go
@@ -41,7 +41,7 @@ func StreamFederate(qw422016 *qt422016.Writer, rs *netstorage.Result) {
 //line app/vmselect/prometheus/federate.qtpl:12
 	qw422016.N().S(` `)
 //line app/vmselect/prometheus/federate.qtpl:13
-	qw422016.N().D(int(rs.Timestamps[len(rs.Timestamps)-1]))
+	qw422016.N().DL(rs.Timestamps[len(rs.Timestamps)-1])
 //line app/vmselect/prometheus/federate.qtpl:13
 	qw422016.N().S(`
 `)
--- a/app/vmselect/prometheus/labels_count_response.qtpl
+++ b/app/vmselect/prometheus/labels_count_response.qtpl
@@ -0,0 +1,17 @@
+{% import "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" %}
+
+{% stripspace %}
+LabelsCountResponse generates response for /api/v1/label_entries .
+{% func LabelsCountResponse(labelEntries []storage.TagEntry) %}
+{
+	"status":"success",
+	"data":{
+		{% for i, e := range labelEntries %}
+			{%q= e.Key %}:{%d= len(e.Values) %}
+			{% if i+1 < len(labelEntries) %},{% endif %}
+		{% endfor %}
+	}
+}
+{% endfunc %}
+
+{% endstripspace %}
--- a/app/vmselect/prometheus/labels_count_response.qtpl.go
+++ b/app/vmselect/prometheus/labels_count_response.qtpl.go
@@ -0,0 +1,74 @@
+// Code generated by qtc from "labels_count_response.qtpl". DO NOT EDIT.
+// See https://github.com/valyala/quicktemplate for details.
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:1
+package prometheus
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:1
+import "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+
+// LabelsCountResponse generates response for /api/v1/label_entries .
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:5
+import (
+	qtio422016 "io"
+
+	qt422016 "github.com/valyala/quicktemplate"
+)
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:5
+var (
+	_ = qtio422016.Copy
+	_ = qt422016.AcquireByteBuffer
+)
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:5
+func StreamLabelsCountResponse(qw422016 *qt422016.Writer, labelEntries []storage.TagEntry) {
+//line app/vmselect/prometheus/labels_count_response.qtpl:5
+	qw422016.N().S(`{"status":"success","data":{`)
+//line app/vmselect/prometheus/labels_count_response.qtpl:9
+	for i, e := range labelEntries {
+//line app/vmselect/prometheus/labels_count_response.qtpl:10
+		qw422016.N().Q(e.Key)
+//line app/vmselect/prometheus/labels_count_response.qtpl:10
+		qw422016.N().S(`:`)
+//line app/vmselect/prometheus/labels_count_response.qtpl:10
+		qw422016.N().D(len(e.Values))
+//line app/vmselect/prometheus/labels_count_response.qtpl:11
+		if i+1 < len(labelEntries) {
+//line app/vmselect/prometheus/labels_count_response.qtpl:11
+			qw422016.N().S(`,`)
+//line app/vmselect/prometheus/labels_count_response.qtpl:11
+		}
+//line app/vmselect/prometheus/labels_count_response.qtpl:12
+	}
+//line app/vmselect/prometheus/labels_count_response.qtpl:12
+	qw422016.N().S(`}}`)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+}
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+func WriteLabelsCountResponse(qq422016 qtio422016.Writer, labelEntries []storage.TagEntry) {
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	qw422016 := qt422016.AcquireWriter(qq422016)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	StreamLabelsCountResponse(qw422016, labelEntries)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	qt422016.ReleaseWriter(qw422016)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+}
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+func LabelsCountResponse(labelEntries []storage.TagEntry) string {
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	qb422016 := qt422016.AcquireByteBuffer()
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	WriteLabelsCountResponse(qb422016, labelEntries)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	qs422016 := string(qb422016.B)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	qt422016.ReleaseByteBuffer(qb422016)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	return qs422016
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+}
--- a/app/vmselect/prometheus/prometheus.go
+++ b/app/vmselect/prometheus/prometheus.go
@@ -6,40 +6,62 @@ import (
 	"math"
 	"net/http"
 	"runtime"
+	"sort"
 	"strconv"
 	"strings"
+	"sync"
 	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/quicktemplate"
 )

 var (
+	latencyOffset = flag.Duration("search.latencyOffset", time.Second*30, "The time when data points become visible in query results after the colection. "+
+		"Too small value can result in incomplete last points for query results")
 	maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum time for search query execution")
 	maxQueryLen      = flag.Int("search.maxQueryLen", 16*1024, "The maximum search query length in bytes")
+	maxLookback      = flag.Duration("search.maxLookback", 0, "Synonim to `-search.lookback-delta` from Prometheus. "+
+		"The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via `max_lookback` arg")
+	denyPartialResponse = flag.Bool("search.denyPartialResponse", false, "Whether to deny partial responses when some of vmstorage nodes are unavailable. This trades consistency over availability")
+	selectNodes         = flagutil.NewArray("selectNode", "Addresses of vmselect nodes; usage: -selectNode=vmselect-host1:8481 -selectNode=vmselect-host2:8481")
 )

 // Default step used if not set.
 const defaultStep = 5 * 60 * 1000

-// Latency for data processing pipeline, i.e. the time between data is ignested
-// into the system and the time it becomes visible to search.
-const latencyOffset = 60 * 1000
-
 // FederateHandler implements /federate . See https://prometheus.io/docs/prometheus/latest/federation/
-func FederateHandler(w http.ResponseWriter, r *http.Request) error {
+func FederateHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	ct := currentTime()
 	if err := r.ParseForm(); err != nil {
 		return fmt.Errorf("cannot parse request form values: %s", err)
 	}
 	matches := r.Form["match[]"]
-	maxLookback := getDuration(r, "max_lookback", defaultStep)
-	start := getTime(r, "start", ct-maxLookback)
-	end := getTime(r, "end", ct)
+	if len(matches) == 0 {
+		return fmt.Errorf("missing `match[]` arg")
+	}
+	lookbackDelta, err := getMaxLookback(r)
+	if err != nil {
+		return err
+	}
+	if lookbackDelta <= 0 {
+		lookbackDelta = defaultStep
+	}
+	start, err := getTime(r, "start", ct-lookbackDelta)
+	if err != nil {
+		return err
+	}
+	end, err := getTime(r, "end", ct)
+	if err != nil {
+		return err
+	}
 	deadline := getDeadline(r)
 	if start >= end {
 		start = end - defaultStep
@@ -49,19 +71,24 @@ func FederateHandler(w http.ResponseWriter, r *http.Request) error {
 		return err
 	}
 	sq := &storage.SearchQuery{
+		AccountID:    at.AccountID,
+		ProjectID:    at.ProjectID,
 		MinTimestamp: start,
 		MaxTimestamp: end,
 		TagFilterss:  tagFilterss,
 	}
-	rss, err := netstorage.ProcessSearchQuery(sq, deadline)
+	rss, isPartial, err := netstorage.ProcessSearchQuery(at, sq, true, deadline)
 	if err != nil {
 		return fmt.Errorf("cannot fetch data for %q: %s", sq, err)
 	}
+	if isPartial && getDenyPartialResponse(r) {
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}

 	resultsCh := make(chan *quicktemplate.ByteBuffer)
 	doneCh := make(chan error)
 	go func() {
-		err := rss.RunParallel(func(rs *netstorage.Result) {
+		err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
 			bb := quicktemplate.AcquireByteBuffer()
 			WriteFederate(bb, rs)
 			resultsCh <- bb
@@ -87,7 +114,7 @@ func FederateHandler(w http.ResponseWriter, r *http.Request) error {
 var federateDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/federate"}`)

 // ExportHandler exports data in raw format from /api/v1/export.
-func ExportHandler(w http.ResponseWriter, r *http.Request) error {
+func ExportHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	ct := currentTime()
 	if err := r.ParseForm(); err != nil {
@@ -97,16 +124,25 @@ func ExportHandler(w http.ResponseWriter, r *http.Request) error {
 	if len(matches) == 0 {
 		// Maintain backwards compatibility
 		match := r.FormValue("match")
+		if len(match) == 0 {
+			return fmt.Errorf("missing `match[]` arg")
+		}
 		matches = []string{match}
 	}
-	start := getTime(r, "start", 0)
-	end := getTime(r, "end", ct)
+	start, err := getTime(r, "start", 0)
+	if err != nil {
+		return err
+	}
+	end, err := getTime(r, "end", ct)
+	if err != nil {
+		return err
+	}
 	format := r.FormValue("format")
 	deadline := getDeadline(r)
 	if start >= end {
-		start = end - defaultStep
+		end = start + defaultStep
 	}
-	if err := exportHandler(w, matches, start, end, format, deadline); err != nil {
+	if err := exportHandler(at, w, matches, start, end, format, deadline); err != nil {
 		return err
 	}
 	exportDuration.UpdateDuration(startTime)
@@ -115,10 +151,10 @@ func ExportHandler(w http.ResponseWriter, r *http.Request) error {

 var exportDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/export"}`)

-func exportHandler(w http.ResponseWriter, matches []string, start, end int64, format string, deadline netstorage.Deadline) error {
+func exportHandler(at *auth.Token, w http.ResponseWriter, matches []string, start, end int64, format string, deadline netstorage.Deadline) error {
 	writeResponseFunc := WriteExportStdResponse
 	writeLineFunc := WriteExportJSONLine
-	contentType := "application/json"
+	contentType := "application/stream+json"
 	if format == "prometheus" {
 		contentType = "text/plain"
 		writeLineFunc = WriteExportPrometheusLine
@@ -132,19 +168,25 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
 		return err
 	}
 	sq := &storage.SearchQuery{
+		AccountID:    at.AccountID,
+		ProjectID:    at.ProjectID,
 		MinTimestamp: start,
 		MaxTimestamp: end,
 		TagFilterss:  tagFilterss,
 	}
-	rss, err := netstorage.ProcessSearchQuery(sq, deadline)
+	rss, isPartial, err := netstorage.ProcessSearchQuery(at, sq, true, deadline)
 	if err != nil {
 		return fmt.Errorf("cannot fetch data for %q: %s", sq, err)
 	}
+	if isPartial {
+		rss.Cancel()
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}

 	resultsCh := make(chan *quicktemplate.ByteBuffer, runtime.GOMAXPROCS(-1))
 	doneCh := make(chan error)
 	go func() {
-		err := rss.RunParallel(func(rs *netstorage.Result) {
+		err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
 			bb := quicktemplate.AcquireByteBuffer()
 			writeLineFunc(bb, rs)
 			resultsCh <- bb
@@ -156,6 +198,11 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
 	w.Header().Set("Content-Type", contentType)
 	writeResponseFunc(w, resultsCh)

+	// Consume all the data from resultsCh in the event writeResponseFunc
+	// fails to consume all the data.
+	for bb := range resultsCh {
+		quicktemplate.ReleaseByteBuffer(bb)
+	}
 	err = <-doneCh
 	if err != nil {
 		return fmt.Errorf("error during data fetching: %s", err)
@@ -166,7 +213,7 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
 // DeleteHandler processes /api/v1/admin/tsdb/delete_series prometheus API request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#delete-series
-func DeleteHandler(r *http.Request) error {
+func DeleteHandler(at *auth.Token, r *http.Request) error {
 	startTime := time.Now()
 	if err := r.ParseForm(); err != nil {
 		return fmt.Errorf("cannot parse request form values: %s", err)
@@ -175,19 +222,28 @@ func DeleteHandler(r *http.Request) error {
 		return fmt.Errorf("start and end aren't supported. Remove these args from the query in order to delete all the matching metrics")
 	}
 	matches := r.Form["match[]"]
+	if len(matches) == 0 {
+		return fmt.Errorf("missing `match[]` arg")
+	}
+	deadline := getDeadline(r)
 	tagFilterss, err := getTagFilterssFromMatches(matches)
 	if err != nil {
 		return err
 	}
 	sq := &storage.SearchQuery{
+		AccountID:   at.AccountID,
+		ProjectID:   at.ProjectID,
 		TagFilterss: tagFilterss,
 	}
-	deletedCount, err := netstorage.DeleteSeries(sq)
+	deletedCount, err := netstorage.DeleteSeries(at, sq, deadline)
 	if err != nil {
 		return fmt.Errorf("cannot delete time series matching %q: %s", matches, err)
 	}
 	if deletedCount > 0 {
-		promql.ResetRollupResultCache()
+		// Reset rollup result cache on all the vmselect nodes,
+		// since the cache may contain deleted data.
+		// TODO: reset only cache for (account, project)
+		resetRollupResultCaches()
 	}
 	deleteDuration.UpdateDuration(startTime)
 	return nil
@@ -195,15 +251,81 @@ func DeleteHandler(r *http.Request) error {

 var deleteDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/admin/tsdb/delete_series"}`)

+func resetRollupResultCaches() {
+	if len(*selectNodes) == 0 {
+		logger.Panicf("BUG: missing -selectNode flag")
+	}
+	for _, selectNode := range *selectNodes {
+		callURL := fmt.Sprintf("http://%s/internal/resetRollupResultCache", selectNode)
+		resp, err := httpClient.Get(callURL)
+		if err != nil {
+			logger.Errorf("error when accessing %q: %s", callURL, err)
+			resetRollupResultCacheErrors.Inc()
+			continue
+		}
+		if resp.StatusCode != http.StatusOK {
+			_ = resp.Body.Close()
+			logger.Errorf("unexpected status code at %q; got %d; want %d", callURL, resp.StatusCode, http.StatusOK)
+			resetRollupResultCacheErrors.Inc()
+			continue
+		}
+		_ = resp.Body.Close()
+	}
+	resetRollupResultCacheCalls.Inc()
+}
+
+var (
+	resetRollupResultCacheErrors = metrics.NewCounter("vm_reset_rollup_result_cache_errors_total")
+	resetRollupResultCacheCalls  = metrics.NewCounter("vm_reset_rollup_result_cache_calls_total")
+)
+
+var httpClient = &http.Client{
+	Timeout: time.Second * 5,
+}
+
 // LabelValuesHandler processes /api/v1/label/<labelName>/values request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values
-func LabelValuesHandler(labelName string, w http.ResponseWriter, r *http.Request) error {
+func LabelValuesHandler(at *auth.Token, labelName string, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	deadline := getDeadline(r)
-	labelValues, err := netstorage.GetLabelValues(labelName, deadline)
-	if err != nil {
-		return fmt.Errorf(`cannot obtain label values for %q: %s`, labelName, err)
+
+	if err := r.ParseForm(); err != nil {
+		return fmt.Errorf("cannot parse form values: %s", err)
+	}
+	var labelValues []string
+	var isPartial bool
+	if len(r.Form["match[]"]) == 0 && len(r.Form["start"]) == 0 && len(r.Form["end"]) == 0 {
+		var err error
+		labelValues, isPartial, err = netstorage.GetLabelValues(at, labelName, deadline)
+		if err != nil {
+			return fmt.Errorf(`cannot obtain label values for %q: %s`, labelName, err)
+		}
+	} else {
+		// Extended functionality that allows filtering by label filters and time range
+		// i.e. /api/v1/label/foo/values?match[]=foobar{baz="abc"}&start=...&end=...
+		// is equivalent to `label_values(foobar{baz="abc"}, foo)` call on the selected
+		// time range in Grafana templating.
+		matches := r.Form["match[]"]
+		if len(matches) == 0 {
+			matches = []string{fmt.Sprintf("{%s!=''}", labelName)}
+		}
+		ct := currentTime()
+		end, err := getTime(r, "end", ct)
+		if err != nil {
+			return err
+		}
+		start, err := getTime(r, "start", end-defaultStep)
+		if err != nil {
+			return err
+		}
+		labelValues, isPartial, err = labelValuesWithMatches(at, labelName, matches, start, end, deadline)
+		if err != nil {
+			return fmt.Errorf("cannot obtain label values for %q, match[]=%q, start=%d, end=%d: %s", labelName, matches, start, end, err)
+		}
+	}
+	if isPartial && getDenyPartialResponse(r) {
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
 	}

 	w.Header().Set("Content-Type", "application/json")
@@ -212,18 +334,87 @@ func LabelValuesHandler(labelName string, w http.ResponseWriter, r *http.Request
 	return nil
 }

+func labelValuesWithMatches(at *auth.Token, labelName string, matches []string, start, end int64, deadline netstorage.Deadline) ([]string, bool, error) {
+	if len(matches) == 0 {
+		logger.Panicf("BUG: matches must be non-empty")
+	}
+	tagFilterss, err := getTagFilterssFromMatches(matches)
+	if err != nil {
+		return nil, false, err
+	}
+	if start >= end {
+		end = start + defaultStep
+	}
+	sq := &storage.SearchQuery{
+		AccountID:    at.AccountID,
+		ProjectID:    at.ProjectID,
+		MinTimestamp: start,
+		MaxTimestamp: end,
+		TagFilterss:  tagFilterss,
+	}
+	rss, isPartial, err := netstorage.ProcessSearchQuery(at, sq, false, deadline)
+	if err != nil {
+		return nil, false, fmt.Errorf("cannot fetch data for %q: %s", sq, err)
+	}
+
+	m := make(map[string]struct{})
+	var mLock sync.Mutex
+	err = rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
+		labelValue := rs.MetricName.GetTagValue(labelName)
+		if len(labelValue) == 0 {
+			return
+		}
+		mLock.Lock()
+		m[string(labelValue)] = struct{}{}
+		mLock.Unlock()
+	})
+	if err != nil {
+		return nil, false, fmt.Errorf("error when data fetching: %s", err)
+	}
+
+	labelValues := make([]string, 0, len(m))
+	for labelValue := range m {
+		labelValues = append(labelValues, labelValue)
+	}
+	sort.Strings(labelValues)
+	return labelValues, isPartial, nil
+}
+
 var labelValuesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/label/{}/values"}`)

+// LabelsCountHandler processes /api/v1/labels/count request.
+func LabelsCountHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
+	startTime := time.Now()
+	deadline := getDeadline(r)
+	labelEntries, isPartial, err := netstorage.GetLabelEntries(at, deadline)
+	if err != nil {
+		return fmt.Errorf(`cannot obtain label entries: %s`, err)
+	}
+	if isPartial && getDenyPartialResponse(r) {
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	WriteLabelsCountResponse(w, labelEntries)
+	labelsCountDuration.UpdateDuration(startTime)
+	return nil
+}
+
+var labelsCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/labels/count"}`)
+
 // LabelsHandler processes /api/v1/labels request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names
-func LabelsHandler(w http.ResponseWriter, r *http.Request) error {
+func LabelsHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	deadline := getDeadline(r)
-	labels, err := netstorage.GetLabels(deadline)
+	labels, isPartial, err := netstorage.GetLabels(at, deadline)
 	if err != nil {
 		return fmt.Errorf("cannot obtain labels: %s", err)
 	}
+	if isPartial && getDenyPartialResponse(r) {
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}

 	w.Header().Set("Content-Type", "application/json")
 	WriteLabelsResponse(w, labels)
@@ -234,13 +425,17 @@ func LabelsHandler(w http.ResponseWriter, r *http.Request) error {
 var labelsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/labels"}`)

 // SeriesCountHandler processes /api/v1/series/count request.
-func SeriesCountHandler(w http.ResponseWriter, r *http.Request) error {
+func SeriesCountHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	deadline := getDeadline(r)
-	n, err := netstorage.GetSeriesCount(deadline)
+	n, isPartial, err := netstorage.GetSeriesCount(at, deadline)
 	if err != nil {
 		return fmt.Errorf("cannot obtain series count: %s", err)
 	}
+	if isPartial && getDenyPartialResponse(r) {
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}
+
 	w.Header().Set("Content-Type", "application/json")
 	WriteSeriesCountResponse(w, n)
 	seriesCountDuration.UpdateDuration(startTime)
@@ -252,7 +447,7 @@ var seriesCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="
 // SeriesHandler processes /api/v1/series request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers
-func SeriesHandler(w http.ResponseWriter, r *http.Request) error {
+func SeriesHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	ct := currentTime()

@@ -260,8 +455,22 @@ func SeriesHandler(w http.ResponseWriter, r *http.Request) error {
 		return fmt.Errorf("cannot parse form values: %s", err)
 	}
 	matches := r.Form["match[]"]
-	start := getTime(r, "start", ct-defaultStep)
-	end := getTime(r, "end", ct)
+	if len(matches) == 0 {
+		return fmt.Errorf("missing `match[]` arg")
+	}
+	end, err := getTime(r, "end", ct)
+	if err != nil {
+		return err
+	}
+	// Do not set start to minTimeMsecs by default as Prometheus does,
+	// since this leads to fetching and scanning all the data from the storage,
+	// which can take a lot of time for big storages.
+	// It is better setting start as end-defaultStep by default.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/91
+	start, err := getTime(r, "start", end-defaultStep)
+	if err != nil {
+		return err
+	}
 	deadline := getDeadline(r)

 	tagFilterss, err := getTagFilterssFromMatches(matches)
@@ -269,22 +478,27 @@ func SeriesHandler(w http.ResponseWriter, r *http.Request) error {
 		return err
 	}
 	if start >= end {
-		start = end - defaultStep
+		end = start + defaultStep
 	}
 	sq := &storage.SearchQuery{
+		AccountID:    at.AccountID,
+		ProjectID:    at.ProjectID,
 		MinTimestamp: start,
 		MaxTimestamp: end,
 		TagFilterss:  tagFilterss,
 	}
-	rss, err := netstorage.ProcessSearchQuery(sq, deadline)
+	rss, isPartial, err := netstorage.ProcessSearchQuery(at, sq, false, deadline)
 	if err != nil {
 		return fmt.Errorf("cannot fetch data for %q: %s", sq, err)
 	}
+	if isPartial && getDenyPartialResponse(r) {
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}

 	resultsCh := make(chan *quicktemplate.ByteBuffer)
 	doneCh := make(chan error)
 	go func() {
-		err := rss.RunParallel(func(rs *netstorage.Result) {
+		err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
 			bb := quicktemplate.AcquireByteBuffer()
 			writemetricNameObject(bb, &rs.MetricName)
 			resultsCh <- bb
@@ -297,11 +511,10 @@ func SeriesHandler(w http.ResponseWriter, r *http.Request) error {
 	WriteSeriesResponse(w, resultsCh)

 	// Consume all the data from resultsCh in the event WriteSeriesResponse
-	// fail to consume all the data.
+	// fails to consume all the data.
 	for bb := range resultsCh {
 		quicktemplate.ReleaseByteBuffer(bb)
 	}
-
 	err = <-doneCh
 	if err != nil {
 		return fmt.Errorf("error during data fetching: %s", err)
@@ -315,20 +528,36 @@ var seriesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
 // QueryHandler processes /api/v1/query request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
-func QueryHandler(w http.ResponseWriter, r *http.Request) error {
+func QueryHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	ct := currentTime()

 	query := r.FormValue("query")
-	start := getTime(r, "time", ct)
-	step := getDuration(r, "step", latencyOffset)
+	if len(query) == 0 {
+		return fmt.Errorf("missing `query` arg")
+	}
+	start, err := getTime(r, "time", ct)
+	if err != nil {
+		return err
+	}
+	queryOffset := getLatencyOffsetMilliseconds()
+	step, err := getDuration(r, "step", queryOffset)
+	if err != nil {
+		return err
+	}
 	deadline := getDeadline(r)
+	lookbackDelta, err := getMaxLookback(r)
+	if err != nil {
+		return err
+	}

 	if len(query) > *maxQueryLen {
 		return fmt.Errorf(`too long query; got %d bytes; mustn't exceed %d bytes`, len(query), *maxQueryLen)
 	}
-	if ct-start < latencyOffset {
-		start -= latencyOffset
+	if !getBool(r, "nocache") && ct-start < queryOffset {
+		// Adjust start time only if `nocache` arg isn't set.
+		// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/241
+		start = ct - queryOffset
 	}
 	if childQuery, windowStr, offsetStr := promql.IsMetricSelectorWithRollup(query); childQuery != "" {
 		var window int64
@@ -350,7 +579,7 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
 		start -= offset
 		end := start
 		start = end - window
-		if err := exportHandler(w, []string{childQuery}, start, end, "promapi", deadline); err != nil {
+		if err := exportHandler(at, w, []string{childQuery}, start, end, "promapi", deadline); err != nil {
 			return err
 		}
 		queryDuration.UpdateDuration(startTime)
@@ -358,12 +587,16 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
 	}

 	ec := promql.EvalConfig{
-		Start:    start,
-		End:      start,
-		Step:     step,
-		Deadline: deadline,
+		AuthToken:     at,
+		Start:         start,
+		End:           start,
+		Step:          step,
+		Deadline:      deadline,
+		LookbackDelta: lookbackDelta,
+
+		DenyPartialResponse: getDenyPartialResponse(r),
 	}
-	result, err := promql.Exec(&ec, query)
+	result, err := promql.Exec(&ec, query, true)
 	if err != nil {
 		return fmt.Errorf("cannot execute %q: %s", query, err)
 	}
@@ -379,65 +612,123 @@ var queryDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v
 // QueryRangeHandler processes /api/v1/query_range request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
-func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
+func QueryRangeHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	ct := currentTime()

 	query := r.FormValue("query")
-	start := getTime(r, "start", ct-defaultStep)
-	end := getTime(r, "end", ct)
-	step := getDuration(r, "step", defaultStep)
+	if len(query) == 0 {
+		return fmt.Errorf("missing `query` arg")
+	}
+	start, err := getTime(r, "start", ct-defaultStep)
+	if err != nil {
+		return err
+	}
+	end, err := getTime(r, "end", ct)
+	if err != nil {
+		return err
+	}
+	step, err := getDuration(r, "step", defaultStep)
+	if err != nil {
+		return err
+	}
 	deadline := getDeadline(r)
 	mayCache := !getBool(r, "nocache")
+	lookbackDelta, err := getMaxLookback(r)
+	if err != nil {
+		return err
+	}

 	// Validate input args.
 	if len(query) > *maxQueryLen {
 		return fmt.Errorf(`too long query; got %d bytes; mustn't exceed %d bytes`, len(query), *maxQueryLen)
 	}
 	if start > end {
-		start = end
+		end = start + defaultStep
 	}
 	if err := promql.ValidateMaxPointsPerTimeseries(start, end, step); err != nil {
 		return err
 	}
-	start, end = promql.AdjustStartEnd(start, end, step)
+	if mayCache {
+		start, end = promql.AdjustStartEnd(start, end, step)
+	}

 	ec := promql.EvalConfig{
-		Start:    start,
-		End:      end,
-		Step:     step,
-		Deadline: deadline,
-		MayCache: mayCache,
+		AuthToken:     at,
+		Start:         start,
+		End:           end,
+		Step:          step,
+		Deadline:      deadline,
+		MayCache:      mayCache,
+		LookbackDelta: lookbackDelta,
+
+		DenyPartialResponse: getDenyPartialResponse(r),
 	}
-	result, err := promql.Exec(&ec, query)
+	result, err := promql.Exec(&ec, query, false)
 	if err != nil {
 		return fmt.Errorf("cannot execute %q: %s", query, err)
 	}
-	if ct-end < latencyOffset {
-		adjustLastPoints(result)
+	queryOffset := getLatencyOffsetMilliseconds()
+	if ct-end < queryOffset {
+		result = adjustLastPoints(result)
 	}

+	// Remove NaN values as Prometheus does.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/153
+	removeNaNValuesInplace(result)
+
 	w.Header().Set("Content-Type", "application/json")
 	WriteQueryRangeResponse(w, result)
 	queryRangeDuration.UpdateDuration(startTime)
 	return nil
 }

+func removeNaNValuesInplace(tss []netstorage.Result) {
+	for i := range tss {
+		ts := &tss[i]
+		hasNaNs := false
+		for _, v := range ts.Values {
+			if math.IsNaN(v) {
+				hasNaNs = true
+				break
+			}
+		}
+		if !hasNaNs {
+			// Fast path: nothing to remove.
+			continue
+		}
+
+		// Slow path: remove NaNs.
+		srcTimestamps := ts.Timestamps
+		dstValues := ts.Values[:0]
+		dstTimestamps := ts.Timestamps[:0]
+		for j, v := range ts.Values {
+			if math.IsNaN(v) {
+				continue
+			}
+			dstValues = append(dstValues, v)
+			dstTimestamps = append(dstTimestamps, srcTimestamps[j])
+		}
+		ts.Values = dstValues
+		ts.Timestamps = dstTimestamps
+	}
+}
+
 var queryRangeDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/query_range"}`)

 // adjustLastPoints substitutes the last point values with the previous
 // point values, since the last points may contain garbage.
-func adjustLastPoints(tss []netstorage.Result) {
+func adjustLastPoints(tss []netstorage.Result) []netstorage.Result {
 	if len(tss) == 0 {
-		return
+		return nil
 	}

 	// Search for the last non-NaN value across all the timeseries.
 	lastNonNaNIdx := -1
 	for i := range tss {
-		r := &tss[i]
-		j := len(r.Values) - 1
-		for j >= 0 && math.IsNaN(r.Values[j]) {
+		values := tss[i].Values
+		j := len(values) - 1
+		for j >= 0 && math.IsNaN(values[j]) {
 			j--
 		}
 		if j > lastNonNaNIdx {
@@ -446,75 +737,102 @@ func adjustLastPoints(tss []netstorage.Result) {
 	}
 	if lastNonNaNIdx == -1 {
 		// All timeseries contain only NaNs.
-		return
+		return nil
 	}

-	// Substitute last three values starting from lastNonNaNIdx
+	// Substitute the last two values starting from lastNonNaNIdx
 	// with the previous values for each timeseries.
 	for i := range tss {
-		r := &tss[i]
-		for j := 0; j < 3; j++ {
+		values := tss[i].Values
+		for j := 0; j < 2; j++ {
 			idx := lastNonNaNIdx + j
-			if idx <= 0 || idx >= len(r.Values) {
+			if idx <= 0 || idx >= len(values) || math.IsNaN(values[idx-1]) {
 				continue
 			}
-			r.Values[idx] = r.Values[idx-1]
+			values[idx] = values[idx-1]
 		}
 	}
+	return tss
 }

-func getTime(r *http.Request, argKey string, defaultValue int64) int64 {
+func getTime(r *http.Request, argKey string, defaultValue int64) (int64, error) {
 	argValue := r.FormValue(argKey)
 	if len(argValue) == 0 {
-		return defaultValue
+		return defaultValue, nil
 	}
 	secs, err := strconv.ParseFloat(argValue, 64)
 	if err != nil {
 		// Try parsing string format
 		t, err := time.Parse(time.RFC3339, argValue)
 		if err != nil {
-			return defaultValue
+			// Handle Prometheus'-provided minTime and maxTime.
+			// See https://github.com/prometheus/client_golang/issues/614
+			switch argValue {
+			case prometheusMinTimeFormatted:
+				return minTimeMsecs, nil
+			case prometheusMaxTimeFormatted:
+				return maxTimeMsecs, nil
+			}
+			return 0, fmt.Errorf("cannot parse %q=%q: %s", argKey, argValue, err)
 		}
 		secs = float64(t.UnixNano()) / 1e9
 	}
 	msecs := int64(secs * 1e3)
-	if msecs < minTimeMsecs || msecs > maxTimeMsecs {
-		return defaultValue
+	if msecs < minTimeMsecs {
+		msecs = 0
 	}
-	return msecs
+	if msecs > maxTimeMsecs {
+		msecs = maxTimeMsecs
+	}
+	return msecs, nil
 }

+var (
+	// These constants were obtained from https://github.com/prometheus/prometheus/blob/91d7175eaac18b00e370965f3a8186cc40bf9f55/web/api/v1/api.go#L442
+	// See https://github.com/prometheus/client_golang/issues/614 for details.
+	prometheusMinTimeFormatted = time.Unix(math.MinInt64/1000+62135596801, 0).UTC().Format(time.RFC3339Nano)
+	prometheusMaxTimeFormatted = time.Unix(math.MaxInt64/1000-62135596801, 999999999).UTC().Format(time.RFC3339Nano)
+)
+
 const (
 	// These values prevent from overflow when storing msec-precision time in int64.
-	minTimeMsecs = int64(-1<<63) / 1e6
+	minTimeMsecs = 0 // use 0 instead of `int64(-1<<63) / 1e6` because the storage engine doesn't actually support negative time
 	maxTimeMsecs = int64(1<<63-1) / 1e6
 )

-func getDuration(r *http.Request, argKey string, defaultValue int64) int64 {
+func getDuration(r *http.Request, argKey string, defaultValue int64) (int64, error) {
 	argValue := r.FormValue(argKey)
 	if len(argValue) == 0 {
-		return defaultValue
+		return defaultValue, nil
 	}
 	secs, err := strconv.ParseFloat(argValue, 64)
 	if err != nil {
 		// Try parsing string format
 		d, err := time.ParseDuration(argValue)
 		if err != nil {
-			return defaultValue
+			return 0, fmt.Errorf("cannot parse %q=%q: %s", argKey, argValue, err)
 		}
 		secs = d.Seconds()
 	}
 	msecs := int64(secs * 1e3)
 	if msecs <= 0 || msecs > maxDurationMsecs {
-		return defaultValue
+		return 0, fmt.Errorf("%q=%dms is out of allowed range [%d ... %d]", argKey, msecs, 0, int64(maxDurationMsecs))
 	}
-	return msecs
+	return msecs, nil
 }

 const maxDurationMsecs = 100 * 365 * 24 * 3600 * 1000

+func getMaxLookback(r *http.Request) (int64, error) {
+	d := int64(*maxLookback / time.Millisecond)
+	return getDuration(r, "max_lookback", d)
+}
+
 func getDeadline(r *http.Request) netstorage.Deadline {
-	d := getDuration(r, "timeout", 0)
+	d, err := getDuration(r, "timeout", 0)
+	if err != nil {
+		d = 0
+	}
 	dMax := int64(maxQueryDuration.Seconds() * 1e3)
 	if d <= 0 || d > dMax {
 		d = dMax
@@ -548,3 +866,18 @@ func getTagFilterssFromMatches(matches []string) ([][]storage.TagFilter, error)
 	}
 	return tagFilterss, nil
 }
+
+func getLatencyOffsetMilliseconds() int64 {
+	d := int64(*latencyOffset / time.Millisecond)
+	if d <= 1000 {
+		d = 1000
+	}
+	return d
+}
+
+func getDenyPartialResponse(r *http.Request) bool {
+	if *denyPartialResponse {
+		return true
+	}
+	return getBool(r, "deny_partial_response")
+}
--- a/app/vmselect/prometheus/prometheus_test.go
+++ b/app/vmselect/prometheus/prometheus_test.go
@@ -0,0 +1,115 @@
+package prometheus
+
+import (
+	"fmt"
+	"math"
+	"net/http"
+	"net/url"
+	"reflect"
+	"testing"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
+)
+
+func TestRemoveNaNValuesInplace(t *testing.T) {
+	f := func(tss []netstorage.Result, tssExpected []netstorage.Result) {
+		t.Helper()
+		removeNaNValuesInplace(tss)
+		if !reflect.DeepEqual(tss, tssExpected) {
+			t.Fatalf("unexpected result; got %v; want %v", tss, tssExpected)
+		}
+	}
+
+	nan := math.NaN()
+
+	f(nil, nil)
+	f([]netstorage.Result{
+		{
+			Timestamps: []int64{100, 200, 300},
+			Values:     []float64{1, 2, 3},
+		},
+		{
+			Timestamps: []int64{100, 200, 300, 400},
+			Values:     []float64{nan, nan, 3, nan},
+		},
+	}, []netstorage.Result{
+		{
+			Timestamps: []int64{100, 200, 300},
+			Values:     []float64{1, 2, 3},
+		},
+		{
+			Timestamps: []int64{300},
+			Values:     []float64{3},
+		},
+	})
+}
+
+func TestGetTimeSuccess(t *testing.T) {
+	f := func(s string, timestampExpected int64) {
+		t.Helper()
+		urlStr := fmt.Sprintf("http://foo.bar/baz?s=%s", url.QueryEscape(s))
+		r, err := http.NewRequest("GET", urlStr, nil)
+		if err != nil {
+			t.Fatalf("unexpected error in NewRequest: %s", err)
+		}
+
+		// Verify defaultValue
+		ts, err := getTime(r, "foo", 123)
+		if err != nil {
+			t.Fatalf("unexpected error when obtaining default time from getTime(%q): %s", s, err)
+		}
+		if ts != 123 {
+			t.Fatalf("unexpected default value for getTime(%q); got %d; want %d", s, ts, 123)
+		}
+
+		// Verify timestampExpected
+		ts, err = getTime(r, "s", 123)
+		if err != nil {
+			t.Fatalf("unexpected error in getTime(%q): %s", s, err)
+		}
+		if ts != timestampExpected {
+			t.Fatalf("unexpected timestamp for getTime(%q); got %d; want %d", s, ts, timestampExpected)
+		}
+	}
+
+	f("2019-07-07T20:01:02Z", 1562529662000)
+	f("2019-07-07T20:47:40+03:00", 1562521660000)
+	f("-292273086-05-16T16:47:06Z", minTimeMsecs)
+	f("292277025-08-18T07:12:54.999999999Z", maxTimeMsecs)
+	f("1562529662.324", 1562529662324)
+	f("-9223372036.854", minTimeMsecs)
+	f("-9223372036.855", minTimeMsecs)
+	f("9223372036.855", maxTimeMsecs)
+}
+
+func TestGetTimeError(t *testing.T) {
+	f := func(s string) {
+		t.Helper()
+		urlStr := fmt.Sprintf("http://foo.bar/baz?s=%s", url.QueryEscape(s))
+		r, err := http.NewRequest("GET", urlStr, nil)
+		if err != nil {
+			t.Fatalf("unexpected error in NewRequest: %s", err)
+		}
+
+		// Verify defaultValue
+		ts, err := getTime(r, "foo", 123)
+		if err != nil {
+			t.Fatalf("unexpected error when obtaining default time from getTime(%q): %s", s, err)
+		}
+		if ts != 123 {
+			t.Fatalf("unexpected default value for getTime(%q); got %d; want %d", s, ts, 123)
+		}
+
+		// Verify timestampExpected
+		_, err = getTime(r, "s", 123)
+		if err == nil {
+			t.Fatalf("expecting non-nil error in getTime(%q)", s)
+		}
+	}
+
+	f("foo")
+	f("2019-07-07T20:01:02Zisdf")
+	f("2019-07-07T20:47:40+03:00123")
+	f("-292273086-05-16T16:47:07Z")
+	f("292277025-08-18T07:12:54.999999998Z")
+}
--- a/app/vmselect/prometheus/series_count_response.qtpl
+++ b/app/vmselect/prometheus/series_count_response.qtpl
@@ -3,7 +3,7 @@ SeriesCountResponse generates response for /api/v1/series/count .
 {% func SeriesCountResponse(n uint64) %}
 {
 	"status":"success",
-	"data":[{%d int(n) %}]
+	"data":[{%dl int64(n) %}]
 }
 {% endfunc %}
 {% endstripspace %}
--- a/app/vmselect/prometheus/series_count_response.qtpl.go
+++ b/app/vmselect/prometheus/series_count_response.qtpl.go
@@ -24,7 +24,7 @@ func StreamSeriesCountResponse(qw422016 *qt422016.Writer, n uint64) {
 //line app/vmselect/prometheus/series_count_response.qtpl:3
 	qw422016.N().S(`{"status":"success","data":[`)
 //line app/vmselect/prometheus/series_count_response.qtpl:6
-	qw422016.N().D(int(n))
+	qw422016.N().DL(int64(n))
 //line app/vmselect/prometheus/series_count_response.qtpl:6
 	qw422016.N().S(`]}`)
 //line app/vmselect/prometheus/series_count_response.qtpl:8
--- a/app/vmselect/promql/aggr.go
+++ b/app/vmselect/promql/aggr.go
@@ -6,6 +6,11 @@ import (
 	"sort"
 	"strconv"
 	"strings"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/histogram"
 )

 var aggrFuncs = map[string]aggrFunc{
@@ -23,9 +28,20 @@ var aggrFuncs = map[string]aggrFunc{
 	"quantile":     aggrFuncQuantile,

 	// Extended PromQL funcs
-	"median":   aggrFuncMedian,
-	"limitk":   aggrFuncLimitK,
-	"distinct": newAggrFunc(aggrFuncDistinct),
+	"median":         aggrFuncMedian,
+	"limitk":         aggrFuncLimitK,
+	"distinct":       newAggrFunc(aggrFuncDistinct),
+	"sum2":           newAggrFunc(aggrFuncSum2),
+	"geomean":        newAggrFunc(aggrFuncGeomean),
+	"histogram":      newAggrFunc(aggrFuncHistogram),
+	"topk_min":       newAggrFuncRangeTopK(minValue, false),
+	"topk_max":       newAggrFuncRangeTopK(maxValue, false),
+	"topk_avg":       newAggrFuncRangeTopK(avgValue, false),
+	"topk_median":    newAggrFuncRangeTopK(medianValue, false),
+	"bottomk_min":    newAggrFuncRangeTopK(minValue, true),
+	"bottomk_max":    newAggrFuncRangeTopK(maxValue, true),
+	"bottomk_avg":    newAggrFuncRangeTopK(avgValue, true),
+	"bottomk_median": newAggrFuncRangeTopK(medianValue, true),
 }

 type aggrFunc func(afa *aggrFuncArg) ([]*timeseries, error)
@@ -65,33 +81,26 @@ func newAggrFunc(afe func(tss []*timeseries) []*timeseries) aggrFunc {
 	}
 }

+func removeGroupTags(metricName *storage.MetricName, modifier *modifierExpr) {
+	groupOp := strings.ToLower(modifier.Op)
+	switch groupOp {
+	case "", "by":
+		metricName.RemoveTagsOn(modifier.Args)
+	case "without":
+		metricName.RemoveTagsIgnoring(modifier.Args)
+	default:
+		logger.Panicf("BUG: unknown group modifier: %q", groupOp)
+	}
+}
+
 func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeseries, modifier *modifierExpr, keepOriginal bool) ([]*timeseries, error) {
 	arg := copyTimeseriesMetricNames(argOrig)

-	// Filter out superflouos tags.
-	var groupTags []string
-	groupOp := "by"
-	if modifier.Op != "" {
-		groupTags = modifier.Args
-		groupOp = strings.ToLower(modifier.Op)
-	}
-	switch groupOp {
-	case "by":
-		for _, ts := range arg {
-			ts.MetricName.RemoveTagsOn(groupTags)
-		}
-	case "without":
-		for _, ts := range arg {
-			ts.MetricName.RemoveTagsIgnoring(groupTags)
-		}
-	default:
-		return nil, fmt.Errorf(`unknown modifier: %q`, groupOp)
-	}
-
 	// Perform grouping.
 	m := make(map[string][]*timeseries)
 	bb := bbPool.Get()
 	for i, ts := range arg {
+		removeGroupTags(&ts.MetricName, modifier)
 		bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
 		if keepOriginal {
 			ts = argOrig[i]
@@ -100,10 +109,18 @@ func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeserie
 	}
 	bbPool.Put(bb)

+	srcTssCount := 0
+	dstTssCount := 0
 	rvs := make([]*timeseries, 0, len(m))
 	for _, tss := range m {
 		rv := afe(tss)
 		rvs = append(rvs, rv...)
+		srcTssCount += len(tss)
+		dstTssCount += len(rv)
+		if dstTssCount > 2000 && dstTssCount > 16*srcTssCount {
+			// This looks like count_values explosion.
+			return nil, fmt.Errorf(`too many timeseries after aggragation; got %d; want less than %d`, dstTssCount, 16*srcTssCount)
+		}
 	}
 	return rvs, nil
 }
@@ -132,6 +149,84 @@ func aggrFuncSum(tss []*timeseries) []*timeseries {
 	return tss[:1]
 }

+func aggrFuncSum2(tss []*timeseries) []*timeseries {
+	dst := tss[0]
+	for i := range dst.Values {
+		sum2 := float64(0)
+		count := 0
+		for _, ts := range tss {
+			v := ts.Values[i]
+			if math.IsNaN(v) {
+				continue
+			}
+			sum2 += v * v
+			count++
+		}
+		if count == 0 {
+			sum2 = nan
+		}
+		dst.Values[i] = sum2
+	}
+	return tss[:1]
+}
+
+func aggrFuncGeomean(tss []*timeseries) []*timeseries {
+	if len(tss) == 1 {
+		// Fast path - nothing to geomean.
+		return tss
+	}
+	dst := tss[0]
+	for i := range dst.Values {
+		p := 1.0
+		count := 0
+		for _, ts := range tss {
+			v := ts.Values[i]
+			if math.IsNaN(v) {
+				continue
+			}
+			p *= v
+			count++
+		}
+		if count == 0 {
+			p = nan
+		}
+		dst.Values[i] = math.Pow(p, 1/float64(count))
+	}
+	return tss[:1]
+}
+
+func aggrFuncHistogram(tss []*timeseries) []*timeseries {
+	var h metrics.Histogram
+	m := make(map[string]*timeseries)
+	for i := range tss[0].Values {
+		h.Reset()
+		for _, ts := range tss {
+			v := ts.Values[i]
+			h.Update(v)
+		}
+		h.VisitNonZeroBuckets(func(vmrange string, count uint64) {
+			ts := m[vmrange]
+			if ts == nil {
+				ts = &timeseries{}
+				ts.CopyFromShallowTimestamps(tss[0])
+				ts.MetricName.RemoveTag("vmrange")
+				ts.MetricName.AddTag("vmrange", vmrange)
+				values := ts.Values
+				for k := range values {
+					values[k] = 0
+				}
+				m[vmrange] = ts
+			}
+			ts.Values[i] = float64(count)
+		})
+	}
+	rvs := make([]*timeseries, 0, len(m))
+	for _, ts := range m {
+		rvs = append(rvs, ts)
+	}
+	return vmrangeBucketsToLE(rvs)
+}
+
 func aggrFuncMin(tss []*timeseries) []*timeseries {
 	if len(tss) == 1 {
 		// Fast path - nothing to min.
@@ -260,7 +355,11 @@ func aggrFuncCount(tss []*timeseries) []*timeseries {
 			}
 			count++
 		}
-		dst.Values[i] = float64(count)
+		v := float64(count)
+		if count == 0 {
+			v = nan
+		}
+		dst.Values[i] = v
 	}
 	return tss[:1]
 }
@@ -297,10 +396,32 @@ func aggrFuncCountValues(afa *aggrFuncArg) ([]*timeseries, error) {
 	if err != nil {
 		return nil, err
 	}
+
+	// Remove dstLabel from grouping like Prometheus does.
+	modifier := &afa.ae.Modifier
+	switch strings.ToLower(modifier.Op) {
+	case "without":
+		modifier.Args = append(modifier.Args, dstLabel)
+	case "by":
+		dstArgs := modifier.Args[:0]
+		for _, arg := range modifier.Args {
+			if arg == dstLabel {
+				continue
+			}
+			dstArgs = append(dstArgs, arg)
+		}
+		modifier.Args = dstArgs
+	default:
+		// Do nothing
+	}
+
 	afe := func(tss []*timeseries) []*timeseries {
 		m := make(map[float64]bool)
 		for _, ts := range tss {
 			for _, v := range ts.Values {
+				if math.IsNaN(v) {
+					continue
+				}
 				m[v] = true
 			}
 		}
@@ -313,7 +434,7 @@ func aggrFuncCountValues(afa *aggrFuncArg) ([]*timeseries, error) {
 		var rvs []*timeseries
 		for _, v := range values {
 			var dst timeseries
-			dst.CopyFrom(tss[0])
+			dst.CopyFromShallowTimestamps(tss[0])
 			dst.MetricName.RemoveTag(dstLabel)
 			dst.MetricName.AddTag(dstLabel, strconv.FormatFloat(v, 'g', -1, 64))
 			for i := range dst.Values {
@@ -347,37 +468,138 @@ func newAggrFuncTopK(isReverse bool) aggrFunc {
 			return nil, err
 		}
 		afe := func(tss []*timeseries) []*timeseries {
-			rvs := tss
-			for n := range rvs[0].Values {
-				sort.Slice(rvs, func(i, j int) bool {
-					a := rvs[i].Values[n]
-					b := rvs[j].Values[n]
-					cmp := lessWithNaNs(a, b)
+			for n := range tss[0].Values {
+				sort.Slice(tss, func(i, j int) bool {
+					a := tss[i].Values[n]
+					b := tss[j].Values[n]
 					if isReverse {
-						cmp = !cmp
+						a, b = b, a
 					}
-					return cmp
+					return lessWithNaNs(a, b)
 				})
-				if math.IsNaN(ks[n]) {
-					ks[n] = 0
-				}
-				k := int(ks[n])
-				if k < 0 {
-					k = 0
-				}
-				if k > len(rvs) {
-					k = len(rvs)
-				}
-				for _, ts := range rvs[:len(rvs)-k] {
-					ts.Values[n] = nan
-				}
+				fillNaNsAtIdx(n, ks[n], tss)
 			}
-			return rvs
+			return removeNaNs(tss)
 		}
 		return aggrFuncExt(afe, args[1], &afa.ae.Modifier, true)
 	}
 }

+type tsWithValue struct {
+	ts    *timeseries
+	value float64
+}
+
+func newAggrFuncRangeTopK(f func(values []float64) float64, isReverse bool) aggrFunc {
+	return func(afa *aggrFuncArg) ([]*timeseries, error) {
+		args := afa.args
+		if err := expectTransformArgsNum(args, 2); err != nil {
+			return nil, err
+		}
+		ks, err := getScalar(args[0], 0)
+		if err != nil {
+			return nil, err
+		}
+		afe := func(tss []*timeseries) []*timeseries {
+			maxs := make([]tsWithValue, len(tss))
+			for i, ts := range tss {
+				value := f(ts.Values)
+				maxs[i] = tsWithValue{
+					ts:    ts,
+					value: value,
+				}
+			}
+			sort.Slice(maxs, func(i, j int) bool {
+				a := maxs[i].value
+				b := maxs[j].value
+				if isReverse {
+					a, b = b, a
+				}
+				return lessWithNaNs(a, b)
+			})
+			for i := range maxs {
+				tss[i] = maxs[i].ts
+			}
+			for i, k := range ks {
+				fillNaNsAtIdx(i, k, tss)
+			}
+			return removeNaNs(tss)
+		}
+		return aggrFuncExt(afe, args[1], &afa.ae.Modifier, true)
+	}
+}
+
+func fillNaNsAtIdx(idx int, k float64, tss []*timeseries) {
+	if math.IsNaN(k) {
+		k = 0
+	}
+	kn := int(k)
+	if kn < 0 {
+		kn = 0
+	}
+	if kn > len(tss) {
+		kn = len(tss)
+	}
+	for _, ts := range tss[:len(tss)-kn] {
+		ts.Values[idx] = nan
+	}
+}
+
+func minValue(values []float64) float64 {
+	if len(values) == 0 {
+		return nan
+	}
+	min := values[0]
+	for _, v := range values[1:] {
+		if v < min {
+			min = v
+		}
+	}
+	return min
+}
+
+func maxValue(values []float64) float64 {
+	if len(values) == 0 {
+		return nan
+	}
+	max := values[0]
+	for _, v := range values[1:] {
+		if v > max {
+			max = v
+		}
+	}
+	return max
+}
+
+func avgValue(values []float64) float64 {
+	sum := float64(0)
+	count := 0
+	for _, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		count++
+		sum += v
+	}
+	if count == 0 {
+		return nan
+	}
+	return sum / float64(count)
+}
+
+func medianValue(values []float64) float64 {
+	h := histogram.GetFast()
+	for _, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		h.Update(v)
+	}
+	value := h.Quantile(0.5)
+	histogram.PutFast(h)
+	return value
+}
+
 func aggrFuncLimitK(afa *aggrFuncArg) ([]*timeseries, error) {
 	args := afa.args
 	if err := expectTransformArgsNum(args, 2); err != nil {
@@ -457,6 +679,7 @@ func newAggrQuantileFunc(phis []float64) func(tss []*timeseries) []*timeseries {
 			idx := int(math.Round(float64(len(tss)-1) * phi))
 			dst.Values[n] = tss[idx].Values[n]
 		}
+		tss[0] = dst
 		return tss[:1]
 	}
 }
--- a/app/vmselect/promql/aggr_incremental.go
+++ b/app/vmselect/promql/aggr_incremental.go
@@ -0,0 +1,450 @@
+package promql
+
+import (
+	"math"
+	"strings"
+	"sync"
+)
+
+// callbacks for optimized incremental calculations for aggregate functions
+// over rollups over metricExpr.
+//
+// These calculations save RAM for aggregates over big number of time series.
+var incrementalAggrFuncCallbacksMap = map[string]*incrementalAggrFuncCallbacks{
+	"sum": {
+		updateAggrFunc:   updateAggrSum,
+		mergeAggrFunc:    mergeAggrSum,
+		finalizeAggrFunc: finalizeAggrCommon,
+	},
+	"min": {
+		updateAggrFunc:   updateAggrMin,
+		mergeAggrFunc:    mergeAggrMin,
+		finalizeAggrFunc: finalizeAggrCommon,
+	},
+	"max": {
+		updateAggrFunc:   updateAggrMax,
+		mergeAggrFunc:    mergeAggrMax,
+		finalizeAggrFunc: finalizeAggrCommon,
+	},
+	"avg": {
+		updateAggrFunc:   updateAggrAvg,
+		mergeAggrFunc:    mergeAggrAvg,
+		finalizeAggrFunc: finalizeAggrAvg,
+	},
+	"count": {
+		updateAggrFunc:   updateAggrCount,
+		mergeAggrFunc:    mergeAggrCount,
+		finalizeAggrFunc: finalizeAggrCount,
+	},
+	"sum2": {
+		updateAggrFunc:   updateAggrSum2,
+		mergeAggrFunc:    mergeAggrSum2,
+		finalizeAggrFunc: finalizeAggrCommon,
+	},
+	"geomean": {
+		updateAggrFunc:   updateAggrGeomean,
+		mergeAggrFunc:    mergeAggrGeomean,
+		finalizeAggrFunc: finalizeAggrGeomean,
+	},
+}
+
+type incrementalAggrFuncContext struct {
+	ae *aggrFuncExpr
+
+	mLock sync.Mutex
+	m     map[uint]map[string]*incrementalAggrContext
+
+	callbacks *incrementalAggrFuncCallbacks
+}
+
+func newIncrementalAggrFuncContext(ae *aggrFuncExpr, callbacks *incrementalAggrFuncCallbacks) *incrementalAggrFuncContext {
+	return &incrementalAggrFuncContext{
+		ae:        ae,
+		m:         make(map[uint]map[string]*incrementalAggrContext),
+		callbacks: callbacks,
+	}
+}
+
+func (iafc *incrementalAggrFuncContext) updateTimeseries(ts *timeseries, workerID uint) {
+	iafc.mLock.Lock()
+	m := iafc.m[workerID]
+	if m == nil {
+		m = make(map[string]*incrementalAggrContext, 1)
+		iafc.m[workerID] = m
+	}
+	iafc.mLock.Unlock()
+
+	removeGroupTags(&ts.MetricName, &iafc.ae.Modifier)
+	bb := bbPool.Get()
+	bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
+	iac := m[string(bb.B)]
+	if iac == nil {
+		tsAggr := &timeseries{
+			Values:     make([]float64, len(ts.Values)),
+			Timestamps: ts.Timestamps,
+			denyReuse:  true,
+		}
+		tsAggr.MetricName.CopyFrom(&ts.MetricName)
+		iac = &incrementalAggrContext{
+			ts:     tsAggr,
+			values: make([]float64, len(ts.Values)),
+		}
+		m[string(bb.B)] = iac
+	}
+	bbPool.Put(bb)
+	iafc.callbacks.updateAggrFunc(iac, ts.Values)
+}
+
+func (iafc *incrementalAggrFuncContext) finalizeTimeseries() []*timeseries {
+	// There is no need in iafc.mLock.Lock here, since finalizeTimeseries must be called
+	// without concurrent goroutines touching iafc.
+	mGlobal := make(map[string]*incrementalAggrContext)
+	mergeAggrFunc := iafc.callbacks.mergeAggrFunc
+	for _, m := range iafc.m {
+		for k, iac := range m {
+			iacGlobal := mGlobal[k]
+			if iacGlobal == nil {
+				mGlobal[k] = iac
+				continue
+			}
+			mergeAggrFunc(iacGlobal, iac)
+		}
+	}
+	tss := make([]*timeseries, 0, len(mGlobal))
+	finalizeAggrFunc := iafc.callbacks.finalizeAggrFunc
+	for _, iac := range mGlobal {
+		finalizeAggrFunc(iac)
+		tss = append(tss, iac.ts)
+	}
+	return tss
+}
+
+type incrementalAggrFuncCallbacks struct {
+	updateAggrFunc   func(iac *incrementalAggrContext, values []float64)
+	mergeAggrFunc    func(dst, src *incrementalAggrContext)
+	finalizeAggrFunc func(iac *incrementalAggrContext)
+}
+
+func getIncrementalAggrFuncCallbacks(name string) *incrementalAggrFuncCallbacks {
+	name = strings.ToLower(name)
+	return incrementalAggrFuncCallbacksMap[name]
+}
+
+type incrementalAggrContext struct {
+	ts     *timeseries
+	values []float64
+}
+
+func finalizeAggrCommon(iac *incrementalAggrContext) {
+	counts := iac.values
+	dstValues := iac.ts.Values
+	_ = dstValues[len(counts)-1]
+	for i, v := range counts {
+		if v == 0 {
+			dstValues[i] = nan
+		}
+	}
+}
+
+func updateAggrSum(iac *incrementalAggrContext, values []float64) {
+	dstValues := iac.ts.Values
+	dstCounts := iac.values
+	_ = dstValues[len(values)-1]
+	_ = dstCounts[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		dstValues[i] += v
+	}
+}
+
+func mergeAggrSum(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	_ = srcCounts[len(srcValues)-1]
+	_ = dstCounts[len(srcValues)-1]
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if srcCounts[i] == 0 {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		dstValues[i] += v
+	}
+}
+
+func updateAggrMin(iac *incrementalAggrContext, values []float64) {
+	dstValues := iac.ts.Values
+	dstCounts := iac.values
+	_ = dstValues[len(values)-1]
+	_ = dstCounts[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		if v < dstValues[i] {
+			dstValues[i] = v
+		}
+	}
+}
+
+func mergeAggrMin(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	_ = srcCounts[len(srcValues)-1]
+	_ = dstCounts[len(srcValues)-1]
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if srcCounts[i] == 0 {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		if v < dstValues[i] {
+			dstValues[i] = v
+		}
+	}
+}
+
+func updateAggrMax(iac *incrementalAggrContext, values []float64) {
+	dstValues := iac.ts.Values
+	dstCounts := iac.values
+	_ = dstValues[len(values)-1]
+	_ = dstCounts[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		if v > dstValues[i] {
+			dstValues[i] = v
+		}
+	}
+}
+
+func mergeAggrMax(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	_ = srcCounts[len(srcValues)-1]
+	_ = dstCounts[len(srcValues)-1]
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if srcCounts[i] == 0 {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		if v > dstValues[i] {
+			dstValues[i] = v
+		}
+	}
+}
+
+func updateAggrAvg(iac *incrementalAggrContext, values []float64) {
+	// Do not use `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation,
+	// since it is slower and has no obvious benefits in increased precision.
+	dstValues := iac.ts.Values
+	dstCounts := iac.values
+	_ = dstValues[len(values)-1]
+	_ = dstCounts[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		dstValues[i] += v
+		dstCounts[i]++
+	}
+}
+
+func mergeAggrAvg(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	_ = srcCounts[len(srcValues)-1]
+	_ = dstCounts[len(srcValues)-1]
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if srcCounts[i] == 0 {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = srcCounts[i]
+			continue
+		}
+		dstValues[i] += v
+		dstCounts[i] += srcCounts[i]
+	}
+}
+
+func finalizeAggrAvg(iac *incrementalAggrContext) {
+	dstValues := iac.ts.Values
+	counts := iac.values
+	_ = dstValues[len(counts)-1]
+	for i, v := range counts {
+		if v == 0 {
+			dstValues[i] = nan
+			continue
+		}
+		dstValues[i] /= v
+	}
+}
+
+func updateAggrCount(iac *incrementalAggrContext, values []float64) {
+	dstValues := iac.ts.Values
+	_ = dstValues[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		dstValues[i]++
+	}
+}
+
+func mergeAggrCount(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		dstValues[i] += v
+	}
+}
+
+func finalizeAggrCount(iac *incrementalAggrContext) {
+	dstValues := iac.ts.Values
+	for i, v := range dstValues {
+		if v == 0 {
+			dstValues[i] = nan
+		}
+	}
+}
+
+func updateAggrSum2(iac *incrementalAggrContext, values []float64) {
+	dstValues := iac.ts.Values
+	dstCounts := iac.values
+	_ = dstValues[len(values)-1]
+	_ = dstCounts[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v * v
+			dstCounts[i] = 1
+			continue
+		}
+		dstValues[i] += v * v
+	}
+}
+
+func mergeAggrSum2(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	_ = srcCounts[len(srcValues)-1]
+	_ = dstCounts[len(srcValues)-1]
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if srcCounts[i] == 0 {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		dstValues[i] += v
+	}
+}
+
+func updateAggrGeomean(iac *incrementalAggrContext, values []float64) {
+	dstValues := iac.ts.Values
+	dstCounts := iac.values
+	_ = dstValues[len(values)-1]
+	_ = dstCounts[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		dstValues[i] *= v
+		dstCounts[i]++
+	}
+}
+
+func mergeAggrGeomean(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	_ = srcCounts[len(srcValues)-1]
+	_ = dstCounts[len(srcValues)-1]
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if srcCounts[i] == 0 {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = srcCounts[i]
+			continue
+		}
+		dstValues[i] *= v
+		dstCounts[i] += srcCounts[i]
+	}
+}
+
+func finalizeAggrGeomean(iac *incrementalAggrContext) {
+	dstValues := iac.ts.Values
+	counts := iac.values
+	_ = dstValues[len(counts)-1]
+	for i, v := range counts {
+		if v == 0 {
+			dstValues[i] = nan
+			continue
+		}
+		dstValues[i] = math.Pow(dstValues[i], 1/v)
+	}
+}
--- a/app/vmselect/promql/aggr_incremental_test.go
+++ b/app/vmselect/promql/aggr_incremental_test.go
@@ -0,0 +1,188 @@
+package promql
+
+import (
+	"fmt"
+	"math"
+	"reflect"
+	"runtime"
+	"sync"
+	"testing"
+)
+
+func TestIncrementalAggr(t *testing.T) {
+	defaultTimestamps := []int64{100e3, 200e3, 300e3, 400e3}
+	values := [][]float64{
+		{1, nan, 2, nan},
+		{3, nan, nan, 4},
+		{nan, nan, 5, 6},
+		{7, nan, 8, 9},
+		{4, nan, nan, nan},
+		{2, nan, 3, 2},
+		{0, nan, 1, 1},
+	}
+	tssSrc := make([]*timeseries, len(values))
+	for i, vs := range values {
+		ts := &timeseries{
+			Timestamps: defaultTimestamps,
+			Values:     vs,
+		}
+		tssSrc[i] = ts
+	}
+
+	copyTimeseries := func(tssSrc []*timeseries) []*timeseries {
+		tssDst := make([]*timeseries, len(tssSrc))
+		for i, tsSrc := range tssSrc {
+			var tsDst timeseries
+			tsDst.CopyFromShallowTimestamps(tsSrc)
+			tssDst[i] = &tsDst
+		}
+		return tssDst
+	}
+
+	f := func(name string, valuesExpected []float64) {
+		t.Helper()
+		callbacks := getIncrementalAggrFuncCallbacks(name)
+		ae := &aggrFuncExpr{
+			Name: name,
+		}
+		tssExpected := []*timeseries{{
+			Timestamps: defaultTimestamps,
+			Values:     valuesExpected,
+		}}
+		// run the test multiple times to make sure there are no side effects on concurrency
+		for i := 0; i < 10; i++ {
+			iafc := newIncrementalAggrFuncContext(ae, callbacks)
+			tssSrcCopy := copyTimeseries(tssSrc)
+			if err := testIncrementalParallelAggr(iafc, tssSrcCopy, tssExpected); err != nil {
+				t.Fatalf("unexpected error on iteration %d: %s", i, err)
+			}
+		}
+	}
+
+	t.Run("sum", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{17, nan, 19, 22}
+		f("sum", valuesExpected)
+	})
+	t.Run("min", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{0, nan, 1, 1}
+		f("min", valuesExpected)
+	})
+	t.Run("max", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{7, nan, 8, 9}
+		f("max", valuesExpected)
+	})
+	t.Run("avg", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{2.8333333333333335, nan, 3.8, 4.4}
+		f("avg", valuesExpected)
+	})
+	t.Run("count", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{6, nan, 5, 5}
+		f("count", valuesExpected)
+	})
+	t.Run("sum2", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{79, nan, 103, 138}
+		f("sum2", valuesExpected)
+	})
+	t.Run("geomean", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{0, nan, 2.9925557394776896, 3.365865436338599}
+		f("geomean", valuesExpected)
+	})
+}
+
+func testIncrementalParallelAggr(iafc *incrementalAggrFuncContext, tssSrc, tssExpected []*timeseries) error {
+	const workersCount = 3
+	tsCh := make(chan *timeseries)
+	var wg sync.WaitGroup
+	wg.Add(workersCount)
+	for i := 0; i < workersCount; i++ {
+		go func(workerID uint) {
+			defer wg.Done()
+			for ts := range tsCh {
+				runtime.Gosched() // allow other goroutines performing the work
+				iafc.updateTimeseries(ts, workerID)
+			}
+		}(uint(i))
+	}
+	for _, ts := range tssSrc {
+		tsCh <- ts
+	}
+	close(tsCh)
+	wg.Wait()
+	tssActual := iafc.finalizeTimeseries()
+	if err := expectTimeseriesEqual(tssActual, tssExpected); err != nil {
+		return fmt.Errorf("%s; tssActual=%v, tssExpected=%v", err, tssActual, tssExpected)
+	}
+	return nil
+}
+
+func expectTimeseriesEqual(actual, expected []*timeseries) error {
+	if len(actual) != len(expected) {
+		return fmt.Errorf("unexpected number of time series; got %d; want %d", len(actual), len(expected))
+	}
+	mActual := timeseriesToMap(actual)
+	mExpected := timeseriesToMap(expected)
+	if len(mActual) != len(mExpected) {
+		return fmt.Errorf("unexpected number of time series after converting to map; got %d; want %d", len(mActual), len(mExpected))
+	}
+	for k, tsExpected := range mExpected {
+		tsActual := mActual[k]
+		if tsActual == nil {
+			return fmt.Errorf("missing time series for key=%q", k)
+		}
+		if err := expectTsEqual(tsActual, tsExpected); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func timeseriesToMap(tss []*timeseries) map[string]*timeseries {
+	m := make(map[string]*timeseries, len(tss))
+	for _, ts := range tss {
+		k := ts.MetricName.Marshal(nil)
+		m[string(k)] = ts
+	}
+	return m
+}
+
+func expectTsEqual(actual, expected *timeseries) error {
+	mnActual := actual.MetricName.Marshal(nil)
+	mnExpected := expected.MetricName.Marshal(nil)
+	if string(mnActual) != string(mnExpected) {
+		return fmt.Errorf("unexpected metric name; got %q; want %q", mnActual, mnExpected)
+	}
+	if !reflect.DeepEqual(actual.Timestamps, expected.Timestamps) {
+		return fmt.Errorf("unexpected timestamps; got %v; want %v", actual.Timestamps, expected.Timestamps)
+	}
+	if err := compareValues(actual.Values, expected.Values); err != nil {
+		return fmt.Errorf("%s; actual %v; expected %v", err, actual.Values, expected.Values)
+	}
+	return nil
+}
+
+func compareValues(vs1, vs2 []float64) error {
+	if len(vs1) != len(vs2) {
+		return fmt.Errorf("unexpected number of values; got %d; want %d", len(vs1), len(vs2))
+	}
+	for i, v1 := range vs1 {
+		v2 := vs2[i]
+		if math.IsNaN(v1) {
+			if !math.IsNaN(v2) {
+				return fmt.Errorf("unexpected value; got %v; want %v", v1, v2)
+			}
+			continue
+		}
+		eps := math.Abs(v1 - v2)
+		if eps > 1e-14 {
+			return fmt.Errorf("unexpected value; got %v; want %v", v1, v2)
+		}
+	}
+	return nil
+}
--- a/app/vmselect/promql/arch.go
+++ b/app/vmselect/promql/arch.go
@@ -0,0 +1,5 @@
+package promql
+
+import "unsafe"
+
+const maxByteSliceLen = 1<<(31+9*(unsafe.Sizeof(int(0))/8)) - 1
--- a/app/vmselect/promql/arch_amd64.go
+++ b/app/vmselect/promql/arch_amd64.go
@@ -1,3 +0,0 @@
-package promql
-
-const maxByteSliceLen = 1 << 40
--- a/app/vmselect/promql/arch_arm.go
+++ b/app/vmselect/promql/arch_arm.go
@@ -1,3 +0,0 @@
-package promql
-
-const maxByteSliceLen = 1<<31 - 1
--- a/app/vmselect/promql/binary_op.go
+++ b/app/vmselect/promql/binary_op.go
@@ -260,6 +260,9 @@ func newBinaryOpFunc(bf func(left, right float64, isBool bool) float64) binaryOp
 				dstValues[j] = bf(a, b, isBool)
 			}
 		}
+		// Optimization: remove time series containing only NaNs.
+		// This is quite common after applying filters like `q > 0`.
+		dst = removeNaNs(dst)
 		return dst, nil
 	}
 }
@@ -271,7 +274,7 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 			rvsLeft := make([]*timeseries, len(right))
 			tsLeft := left[0]
 			for i, tsRight := range right {
-				tsRight.MetricName.ResetMetricGroup()
+				resetMetricGroupIfRequired(be, tsRight)
 				rvsLeft[i] = tsLeft
 			}
 			return rvsLeft, right, right, nil
@@ -281,7 +284,7 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 			rvsRight := make([]*timeseries, len(left))
 			tsRight := right[0]
 			for i, tsLeft := range left {
-				tsLeft.MetricName.ResetMetricGroup()
+				resetMetricGroupIfRequired(be, tsLeft)
 				rvsRight[i] = tsRight
 			}
 			return left, rvsRight, left, nil
@@ -289,19 +292,14 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 	}

 	// Slow path: `vector op vector` or `a op {on|ignoring} {group_left|group_right} b`
-	ensureOneX := func(side string, tss []*timeseries) error {
-		if len(tss) == 0 {
-			logger.Panicf("BUG: tss must contain at least one value")
-		}
-		if len(tss) == 1 {
-			return nil
-		}
-		return fmt.Errorf(`duplicate timeseries on the %s side of %q: %s %s`, side, be.Op, stringMetricTags(&tss[0].MetricName), be.GroupModifier.AppendString(nil))
-	}
 	var rvsLeft, rvsRight []*timeseries
 	mLeft, mRight := createTimeseriesMapByTagSet(be, left, right)
 	joinOp := strings.ToLower(be.JoinModifier.Op)
-	joinTags := be.JoinModifier.Args
+	groupOp := strings.ToLower(be.GroupModifier.Op)
+	if len(groupOp) == 0 {
+		groupOp = "ignoring"
+	}
+	groupTags := be.GroupModifier.Args
 	for k, tssLeft := range mLeft {
 		tssRight := mRight[k]
 		if len(tssRight) == 0 {
@@ -309,37 +307,38 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 		}
 		switch joinOp {
 		case "group_left":
-			if err := ensureOneX("right", tssRight); err != nil {
+			var err error
+			rvsLeft, rvsRight, err = groupJoin("right", be, rvsLeft, rvsRight, tssLeft, tssRight)
+			if err != nil {
 				return nil, nil, nil, err
 			}
-			src := tssRight[0]
-			for _, ts := range tssLeft {
-				ts.MetricName.AddMissingTags(joinTags, &src.MetricName)
-				rvsLeft = append(rvsLeft, ts)
-				rvsRight = append(rvsRight, src)
-			}
 		case "group_right":
-			if err := ensureOneX("left", tssLeft); err != nil {
+			var err error
+			rvsRight, rvsLeft, err = groupJoin("left", be, rvsRight, rvsLeft, tssRight, tssLeft)
+			if err != nil {
 				return nil, nil, nil, err
 			}
-			src := tssLeft[0]
-			for _, ts := range tssRight {
-				ts.MetricName.AddMissingTags(joinTags, &src.MetricName)
-				rvsLeft = append(rvsLeft, src)
-				rvsRight = append(rvsRight, ts)
-			}
 		case "":
-			if err := ensureOneX("left", tssLeft); err != nil {
+			if err := ensureSingleTimeseries("left", be, tssLeft); err != nil {
 				return nil, nil, nil, err
 			}
-			if err := ensureOneX("right", tssRight); err != nil {
+			if err := ensureSingleTimeseries("right", be, tssRight); err != nil {
 				return nil, nil, nil, err
 			}
-			tssLeft[0].MetricName.ResetMetricGroup()
-			rvsLeft = append(rvsLeft, tssLeft[0])
+			tsLeft := tssLeft[0]
+			resetMetricGroupIfRequired(be, tsLeft)
+			switch groupOp {
+			case "on":
+				tsLeft.MetricName.RemoveTagsOn(groupTags)
+			case "ignoring":
+				tsLeft.MetricName.RemoveTagsIgnoring(groupTags)
+			default:
+				logger.Panicf("BUG: unexpected binary op modifier %q", groupOp)
+			}
+			rvsLeft = append(rvsLeft, tsLeft)
 			rvsRight = append(rvsRight, tssRight[0])
 		default:
-			return nil, nil, nil, fmt.Errorf(`unexpected join modifier %q`, joinOp)
+			logger.Panicf("BUG: unexpected join modifier %q", joinOp)
 		}
 	}
 	dst := rvsLeft
@@ -349,6 +348,103 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 	return rvsLeft, rvsRight, dst, nil
 }

+func ensureSingleTimeseries(side string, be *binaryOpExpr, tss []*timeseries) error {
+	if len(tss) == 0 {
+		logger.Panicf("BUG: tss must contain at least one value")
+	}
+	for len(tss) > 1 {
+		if !mergeNonOverlappingTimeseries(tss[0], tss[len(tss)-1]) {
+			return fmt.Errorf(`duplicate time series on the %s side of %s %s: %s and %s`, side, be.Op, be.GroupModifier.AppendString(nil),
+				stringMetricTags(&tss[0].MetricName), stringMetricTags(&tss[len(tss)-1].MetricName))
+		}
+		tss = tss[:len(tss)-1]
+	}
+	return nil
+}
+
+func groupJoin(singleTimeseriesSide string, be *binaryOpExpr, rvsLeft, rvsRight, tssLeft, tssRight []*timeseries) ([]*timeseries, []*timeseries, error) {
+	joinTags := be.JoinModifier.Args
+	var m map[string]*timeseries
+	for _, tsLeft := range tssLeft {
+		resetMetricGroupIfRequired(be, tsLeft)
+		if len(tssRight) == 1 {
+			// Easy case - right part contains only a single matching time series.
+			tsLeft.MetricName.AddMissingTags(joinTags, &tssRight[0].MetricName)
+			rvsLeft = append(rvsLeft, tsLeft)
+			rvsRight = append(rvsRight, tssRight[0])
+			continue
+		}
+
+		// Hard case - right part contains multiple matching time series.
+		// Verify it doesn't result in duplicate MetricName values after adding missing tags.
+		if m == nil {
+			m = make(map[string]*timeseries, len(tssRight))
+		} else {
+			for k := range m {
+				delete(m, k)
+			}
+		}
+		bb := bbPool.Get()
+		for _, tsRight := range tssRight {
+			var tsCopy timeseries
+			tsCopy.CopyFromShallowTimestamps(tsLeft)
+			tsCopy.MetricName.AddMissingTags(joinTags, &tsRight.MetricName)
+			bb.B = marshalMetricTagsSorted(bb.B[:0], &tsCopy.MetricName)
+			if tsExisting := m[string(bb.B)]; tsExisting != nil {
+				// Try merging tsExisting with tsRight if they don't overlap.
+				if mergeNonOverlappingTimeseries(tsExisting, tsRight) {
+					continue
+				}
+				return nil, nil, fmt.Errorf("duplicate time series on the %s side of `%s %s %s`: %s and %s",
+					singleTimeseriesSide, be.Op, be.GroupModifier.AppendString(nil), be.JoinModifier.AppendString(nil),
+					stringMetricTags(&tsExisting.MetricName), stringMetricTags(&tsRight.MetricName))
+			}
+			m[string(bb.B)] = tsRight
+			rvsLeft = append(rvsLeft, &tsCopy)
+			rvsRight = append(rvsRight, tsRight)
+		}
+		bbPool.Put(bb)
+	}
+	return rvsLeft, rvsRight, nil
+}
+
+func mergeNonOverlappingTimeseries(dst, src *timeseries) bool {
+	// Verify whether the time series can be merged.
+	srcValues := src.Values
+	dstValues := dst.Values
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if math.IsNaN(v) {
+			continue
+		}
+		if !math.IsNaN(dstValues[i]) {
+			return false
+		}
+	}
+
+	// Time series can be merged. Merge them.
+	for i, v := range srcValues {
+		if math.IsNaN(v) {
+			continue
+		}
+		dstValues[i] = v
+	}
+	return true
+}
+
+func resetMetricGroupIfRequired(be *binaryOpExpr, ts *timeseries) {
+	if isBinaryOpCmp(be.Op) && !be.Bool {
+		// Do not reset MetricGroup for non-boolean `compare` binary ops like Prometheus does.
+		return
+	}
+	switch be.Op {
+	case "default", "if", "ifnot":
+		// Do not reset MetricGroup for these ops.
+		return
+	}
+	ts.MetricName.ResetMetricGroup()
+}
+
 func binaryOpPlus(left, right float64) float64 {
 	return left + right
 }
@@ -395,10 +491,25 @@ func binaryOpIfnot(left, right float64) float64 {
 }

 func binaryOpEq(left, right float64) bool {
+	// Special handling for nan == nan.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
+	if math.IsNaN(left) {
+		return math.IsNaN(right)
+	}
+
 	return left == right
 }

 func binaryOpNeq(left, right float64) bool {
+	// Special handling for comparison with nan.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
+	if math.IsNaN(left) {
+		return !math.IsNaN(right)
+	}
+	if math.IsNaN(right) {
+		return true
+	}
+
 	return left != right
 }

--- a/app/vmselect/promql/eval.go
+++ b/app/vmselect/promql/eval.go
@@ -8,6 +8,7 @@ import (
 	"sync"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
@@ -16,10 +17,10 @@ import (
 )

 var (
-	maxPointsPerTimeseries = flag.Int("search.maxPointsPerTimeseries", 10e3, "The maximum points per a single timeseries returned from the search")
+	maxPointsPerTimeseries = flag.Int("search.maxPointsPerTimeseries", 30e3, "The maximum points per a single timeseries returned from the search")
 )

-// The minumum number of points per timeseries for enabling time rounding.
+// The minimum number of points per timeseries for enabling time rounding.
 // This improves cache hit ratio for frequently requested queries over
 // big time ranges.
 const minTimeseriesPointsForTimeRounding = 50
@@ -31,7 +32,7 @@ const minTimeseriesPointsForTimeRounding = 50
 func ValidateMaxPointsPerTimeseries(start, end, step int64) error {
 	points := (end-start)/step + 1
 	if uint64(points) > uint64(*maxPointsPerTimeseries) {
-		return fmt.Errorf(`too many points for the given step=%d, start=%d and end=%d: %d; cannot exceed %d points`,
+		return fmt.Errorf(`too many points for the given step=%d, start=%d and end=%d: %d; cannot exceed -search.maxPointsPerTimeseries=%d`,
 			step, start, end, uint64(points), *maxPointsPerTimeseries)
 	}
 	return nil
@@ -62,14 +63,20 @@ func AdjustStartEnd(start, end, step int64) (int64, int64) {

 // EvalConfig is the configuration required for query evaluation via Exec
 type EvalConfig struct {
-	Start int64
-	End   int64
-	Step  int64
+	AuthToken *auth.Token
+	Start     int64
+	End       int64
+	Step      int64

 	Deadline netstorage.Deadline

 	MayCache bool

+	// LookbackDelta is analog to `-query.lookback-delta` from Prometheus.
+	LookbackDelta int64
+
+	DenyPartialResponse bool
+
 	timestamps     []int64
 	timestampsOnce sync.Once
 }
@@ -77,11 +84,14 @@ type EvalConfig struct {
 // newEvalConfig returns new EvalConfig copy from src.
 func newEvalConfig(src *EvalConfig) *EvalConfig {
 	var ec EvalConfig
+	ec.AuthToken = src.AuthToken
 	ec.Start = src.Start
 	ec.End = src.End
 	ec.Step = src.Step
 	ec.Deadline = src.Deadline
 	ec.MayCache = src.MayCache
+	ec.LookbackDelta = src.LookbackDelta
+	ec.DenyPartialResponse = src.DenyPartialResponse

 	// do not copy src.timestamps - they must be generated again.
 	return &ec
@@ -145,14 +155,14 @@ func evalExpr(ec *EvalConfig, e expr) ([]*timeseries, error) {
 		re := &rollupExpr{
 			Expr: me,
 		}
-		rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, re)
+		rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, re, nil)
 		if err != nil {
 			return nil, fmt.Errorf(`cannot evaluate %q: %s`, me.AppendString(nil), err)
 		}
 		return rv, nil
 	}
 	if re, ok := e.(*rollupExpr); ok {
-		rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, re)
+		rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, re, nil)
 		if err != nil {
 			return nil, fmt.Errorf(`cannot evaluate %q: %s`, re.AppendString(nil), err)
 		}
@@ -188,13 +198,30 @@ func evalExpr(ec *EvalConfig, e expr) ([]*timeseries, error) {
 		if err != nil {
 			return nil, err
 		}
-		rv, err := evalRollupFunc(ec, fe.Name, rf, re)
+		rv, err := evalRollupFunc(ec, fe.Name, rf, re, nil)
 		if err != nil {
 			return nil, fmt.Errorf(`cannot evaluate %q: %s`, fe.AppendString(nil), err)
 		}
 		return rv, nil
 	}
 	if ae, ok := e.(*aggrFuncExpr); ok {
+		if callbacks := getIncrementalAggrFuncCallbacks(ae.Name); callbacks != nil {
+			fe, nrf := tryGetArgRollupFuncWithMetricExpr(ae)
+			if fe != nil {
+				// There is an optimized path for calculating aggrFuncExpr over rollupFunc over metricExpr.
+				// The optimized path saves RAM for aggregates over big number of time series.
+				args, re, err := evalRollupFuncArgs(ec, fe)
+				if err != nil {
+					return nil, err
+				}
+				rf, err := nrf(args)
+				if err != nil {
+					return nil, err
+				}
+				iafc := newIncrementalAggrFuncContext(ae, callbacks)
+				return evalRollupFunc(ec, fe.Name, rf, re, iafc)
+			}
+		}
 		args, err := evalExprs(ec, ae.Args)
 		if err != nil {
 			return nil, err
@@ -249,6 +276,71 @@ func evalExpr(ec *EvalConfig, e expr) ([]*timeseries, error) {
 	return nil, fmt.Errorf("unexpected expression %q", e.AppendString(nil))
 }

+func tryGetArgRollupFuncWithMetricExpr(ae *aggrFuncExpr) (*funcExpr, newRollupFunc) {
+	if len(ae.Args) != 1 {
+		return nil, nil
+	}
+	e := ae.Args[0]
+	// Make sure e contains one of the following:
+	// - metricExpr
+	// - metricExpr[d]
+	// - rollupFunc(metricExpr)
+	// - rollupFunc(metricExpr[d])
+
+	if me, ok := e.(*metricExpr); ok {
+		// e = metricExpr
+		if me.IsEmpty() {
+			return nil, nil
+		}
+		fe := &funcExpr{
+			Name: "default_rollup",
+			Args: []expr{me},
+		}
+		nrf := getRollupFunc(fe.Name)
+		return fe, nrf
+	}
+	if re, ok := e.(*rollupExpr); ok {
+		if me, ok := re.Expr.(*metricExpr); !ok || me.IsEmpty() || re.ForSubquery() {
+			return nil, nil
+		}
+		// e = metricExpr[d]
+		fe := &funcExpr{
+			Name: "default_rollup",
+			Args: []expr{re},
+		}
+		nrf := getRollupFunc(fe.Name)
+		return fe, nrf
+	}
+	fe, ok := e.(*funcExpr)
+	if !ok {
+		return nil, nil
+	}
+	nrf := getRollupFunc(fe.Name)
+	if nrf == nil {
+		return nil, nil
+	}
+	rollupArgIdx := getRollupArgIdx(fe.Name)
+	arg := fe.Args[rollupArgIdx]
+	if me, ok := arg.(*metricExpr); ok {
+		if me.IsEmpty() {
+			return nil, nil
+		}
+		// e = rollupFunc(metricExpr)
+		return &funcExpr{
+			Name: fe.Name,
+			Args: []expr{me},
+		}, nrf
+	}
+	if re, ok := arg.(*rollupExpr); ok {
+		if me, ok := re.Expr.(*metricExpr); !ok || me.IsEmpty() || re.ForSubquery() {
+			return nil, nil
+		}
+		// e = rollupFunc(metricExpr[d])
+		return fe, nrf
+	}
+	return nil, nil
+}
+
 func evalExprs(ec *EvalConfig, es []expr) ([][]*timeseries, error) {
 	var rvs [][]*timeseries
 	for _, e := range es {
@@ -288,8 +380,8 @@ func getRollupExprArg(arg expr) *rollupExpr {
 			Expr: arg,
 		}
 	}
-	if len(re.Step) == 0 && !re.InheritStep {
-		// Return standard rollup if it doesn't set step.
+	if !re.ForSubquery() {
+		// Return standard rollup if it doesn't contain subquery.
 		return re
 	}
 	me, ok := re.Expr.(*metricExpr)
@@ -308,7 +400,7 @@ func getRollupExprArg(arg expr) *rollupExpr {
 	return &reNew
 }

-func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, re *rollupExpr) ([]*timeseries, error) {
+func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, re *rollupExpr, iafc *incrementalAggrFuncContext) ([]*timeseries, error) {
 	ecNew := ec
 	var offset int64
 	if len(re.Offset) > 0 {
@@ -325,19 +417,11 @@ func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, re *rollupExpr)
 	var rvs []*timeseries
 	var err error
 	if me, ok := re.Expr.(*metricExpr); ok {
-		if me.IsEmpty() {
-			rvs = evalNumber(ecNew, nan)
-		} else {
-			var window int64
-			if len(re.Window) > 0 {
-				window, err = DurationValue(re.Window, ec.Step)
-				if err != nil {
-					return nil, err
-				}
-			}
-			rvs, err = evalRollupFuncWithMetricExpr(ecNew, name, rf, me, window)
-		}
+		rvs, err = evalRollupFuncWithMetricExpr(ecNew, name, rf, me, iafc, re.Window)
 	} else {
+		if iafc != nil {
+			logger.Panicf("BUG: iafc must be nil for rollup %q over subquery %q", name, re.AppendString(nil))
+		}
 		rvs, err = evalRollupFuncWithSubquery(ecNew, name, rf, re)
 	}
 	if err != nil {
@@ -379,8 +463,7 @@ func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, re *
 	}

 	ecSQ := newEvalConfig(ec)
-	ecSQ.Start -= window + maxSilenceInterval
-	ecSQ.End += step
+	ecSQ.Start -= window + maxSilenceInterval + step
 	ecSQ.Step = step
 	if err := ValidateMaxPointsPerTimeseries(ecSQ.Start, ecSQ.End, ecSQ.Step); err != nil {
 		return nil, err
@@ -392,33 +475,22 @@ func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, re *
 	}

 	sharedTimestamps := getTimestamps(ec.Start, ec.End, ec.Step)
-	preFunc, rcs := getRollupConfigs(name, rf, ec.Start, ec.End, ec.Step, window, sharedTimestamps)
+	preFunc, rcs := getRollupConfigs(name, rf, ec.Start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
 	tss := make([]*timeseries, 0, len(tssSQ)*len(rcs))
 	var tssLock sync.Mutex
+	removeMetricGroup := !rollupFuncsKeepMetricGroup[name]
 	doParallel(tssSQ, func(tsSQ *timeseries, values []float64, timestamps []int64) ([]float64, []int64) {
 		values, timestamps = removeNanValues(values[:0], timestamps[:0], tsSQ.Values, tsSQ.Timestamps)
 		preFunc(values, timestamps)
 		for _, rc := range rcs {
 			var ts timeseries
-			ts.MetricName.CopyFrom(&tsSQ.MetricName)
-			if len(rc.TagValue) > 0 {
-				ts.MetricName.AddTag("rollup", rc.TagValue)
-			}
-			ts.Values = rc.Do(ts.Values[:0], values, timestamps)
-			ts.Timestamps = sharedTimestamps
-			ts.denyReuse = true
+			doRollupForTimeseries(rc, &ts, &tsSQ.MetricName, values, timestamps, sharedTimestamps, removeMetricGroup)
 			tssLock.Lock()
 			tss = append(tss, &ts)
 			tssLock.Unlock()
 		}
 		return values, timestamps
 	})
-	if !rollupFuncsKeepMetricGroup[name] {
-		tss = copyTimeseriesMetricNames(tss)
-		for _, ts := range tss {
-			ts.MetricName.ResetMetricGroup()
-		}
-	}
 	return tss, nil
 }

@@ -472,31 +544,27 @@ func removeNanValues(dstValues []float64, dstTimestamps []int64, values []float6
 	return dstValues, dstTimestamps
 }

-func getMaxPointsPerRollup() int {
-	maxPointsPerRollupOnce.Do(func() {
-		n := memory.Allowed() / 16 / 8
-		if n <= 16 {
-			n = 16
-		}
-		maxPointsPerRollup = n
-	})
-	return maxPointsPerRollup
-}
-
-var (
-	maxPointsPerRollup     int
-	maxPointsPerRollupOnce sync.Once
-)
-
 var (
 	rollupResultCacheFullHits    = metrics.NewCounter(`vm_rollup_result_cache_full_hits_total`)
 	rollupResultCachePartialHits = metrics.NewCounter(`vm_rollup_result_cache_partial_hits_total`)
 	rollupResultCacheMiss        = metrics.NewCounter(`vm_rollup_result_cache_miss_total`)
 )

-func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me *metricExpr, window int64) ([]*timeseries, error) {
+func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me *metricExpr, iafc *incrementalAggrFuncContext, windowStr string) ([]*timeseries, error) {
+	if me.IsEmpty() {
+		return evalNumber(ec, nan), nil
+	}
+	var window int64
+	if len(windowStr) > 0 {
+		var err error
+		window, err = DurationValue(windowStr, ec.Step)
+		if err != nil {
+			return nil, err
+		}
+	}
+
 	// Search for partial results in cache.
-	tssCached, start := rollupResultCacheV.Get(name, ec, me, window)
+	tssCached, start := rollupResultCacheV.Get(name, ec, me, iafc, window)
 	if start > ec.End {
 		// The result is fully cached.
 		rollupResultCacheFullHits.Inc()
@@ -510,14 +578,19 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me

 	// Fetch the remaining part of the result.
 	sq := &storage.SearchQuery{
+		AccountID:    ec.AuthToken.AccountID,
+		ProjectID:    ec.AuthToken.ProjectID,
 		MinTimestamp: start - window - maxSilenceInterval,
 		MaxTimestamp: ec.End + ec.Step,
 		TagFilterss:  [][]storage.TagFilter{me.TagFilters},
 	}
-	rss, err := netstorage.ProcessSearchQuery(sq, ec.Deadline)
+	rss, isPartial, err := netstorage.ProcessSearchQuery(ec.AuthToken, sq, true, ec.Deadline)
 	if err != nil {
 		return nil, err
 	}
+	if isPartial && ec.DenyPartialResponse {
+		return nil, fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}
 	rssLen := rss.Len()
 	if rssLen == 0 {
 		rss.Cancel()
@@ -528,33 +601,96 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
 		return tss, nil
 	}
 	sharedTimestamps := getTimestamps(start, ec.End, ec.Step)
-	preFunc, rcs := getRollupConfigs(name, rf, start, ec.End, ec.Step, window, sharedTimestamps)
+	preFunc, rcs := getRollupConfigs(name, rf, start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)

 	// Verify timeseries fit available memory after the rollup.
 	// Take into account points from tssCached.
 	pointsPerTimeseries := 1 + (ec.End-ec.Start)/ec.Step
-	if uint64(pointsPerTimeseries) > uint64(getMaxPointsPerRollup()/rssLen/len(rcs)) {
-		rss.Cancel()
-		return nil, fmt.Errorf("cannot process more than %d data points for %d time series with %d points in each time series; "+
-			"possible solutions are: reducing the number of matching time series; switching to node with more RAM; increasing `step` query arg (%gs)",
-			getMaxPointsPerRollup(), rssLen*len(rcs), pointsPerTimeseries, float64(ec.Step)/1e3)
+	timeseriesLen := rssLen
+	if iafc != nil {
+		// Incremental aggregates require hold only GOMAXPROCS timeseries in memory.
+		timeseriesLen = runtime.GOMAXPROCS(-1)
+		if iafc.ae.Modifier.Op != "" {
+			// Increase the number of timeseries for non-empty group list: `aggr() by (something)`,
+			// since each group can have own set of time series in memory.
+			// Estimate the number of such groups is lower than 100 :)
+			timeseriesLen *= 100
+		}
 	}
+	rollupPoints := mulNoOverflow(pointsPerTimeseries, int64(timeseriesLen*len(rcs)))
+	rollupMemorySize := mulNoOverflow(rollupPoints, 16)
+	rml := getRollupMemoryLimiter()
+	if !rml.Get(uint64(rollupMemorySize)) {
+		rss.Cancel()
+		return nil, fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+
+			"possible solutions are: reducing the number of matching time series; switching to node with more RAM; "+
+			"increasing -memory.allowedPercent; increasing `step` query arg (%gs)",
+			rollupPoints, rssLen*len(rcs), pointsPerTimeseries, float64(ec.Step)/1e3)
+	}
+	defer rml.Put(uint64(rollupMemorySize))

 	// Evaluate rollup
-	tss := make([]*timeseries, 0, rssLen*len(rcs))
+	removeMetricGroup := !rollupFuncsKeepMetricGroup[name]
+	var tss []*timeseries
+	if iafc != nil {
+		tss, err = evalRollupWithIncrementalAggregate(iafc, rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
+	} else {
+		tss, err = evalRollupNoIncrementalAggregate(rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
+	}
+	if err != nil {
+		return nil, err
+	}
+	tss = mergeTimeseries(tssCached, tss, start, ec)
+	if !isPartial {
+		rollupResultCacheV.Put(name, ec, me, iafc, window, tss)
+	}
+	return tss, nil
+}
+
+var (
+	rollupMemoryLimiter     memoryLimiter
+	rollupMemoryLimiterOnce sync.Once
+)
+
+func getRollupMemoryLimiter() *memoryLimiter {
+	rollupMemoryLimiterOnce.Do(func() {
+		rollupMemoryLimiter.MaxSize = uint64(memory.Allowed()) / 4
+	})
+	return &rollupMemoryLimiter
+}
+
+func evalRollupWithIncrementalAggregate(iafc *incrementalAggrFuncContext, rss *netstorage.Results, rcs []*rollupConfig,
+	preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64, removeMetricGroup bool) ([]*timeseries, error) {
+	err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
+		preFunc(rs.Values, rs.Timestamps)
+		ts := getTimeseries()
+		defer putTimeseries(ts)
+		for _, rc := range rcs {
+			ts.Reset()
+			doRollupForTimeseries(rc, ts, &rs.MetricName, rs.Values, rs.Timestamps, sharedTimestamps, removeMetricGroup)
+			iafc.updateTimeseries(ts, workerID)
+
+			// ts.Timestamps points to sharedTimestamps. Zero it, so it can be re-used.
+			ts.Timestamps = nil
+			ts.denyReuse = false
+		}
+	})
+	if err != nil {
+		return nil, err
+	}
+	tss := iafc.finalizeTimeseries()
+	return tss, nil
+}
+
+func evalRollupNoIncrementalAggregate(rss *netstorage.Results, rcs []*rollupConfig,
+	preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64, removeMetricGroup bool) ([]*timeseries, error) {
+	tss := make([]*timeseries, 0, rss.Len()*len(rcs))
 	var tssLock sync.Mutex
-	err = rss.RunParallel(func(rs *netstorage.Result) {
+	err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
 		preFunc(rs.Values, rs.Timestamps)
 		for _, rc := range rcs {
 			var ts timeseries
-			ts.MetricName.CopyFrom(&rs.MetricName)
-			if len(rc.TagValue) > 0 {
-				ts.MetricName.AddTag("rollup", rc.TagValue)
-			}
-			ts.Values = rc.Do(ts.Values[:0], rs.Values, rs.Timestamps)
-			ts.Timestamps = sharedTimestamps
-			ts.denyReuse = true
-
+			doRollupForTimeseries(rc, &ts, &rs.MetricName, rs.Values, rs.Timestamps, sharedTimestamps, removeMetricGroup)
 			tssLock.Lock()
 			tss = append(tss, &ts)
 			tssLock.Unlock()
@@ -563,19 +699,25 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
 	if err != nil {
 		return nil, err
 	}
-	if !rollupFuncsKeepMetricGroup[name] {
-		tss = copyTimeseriesMetricNames(tss)
-		for _, ts := range tss {
-			ts.MetricName.ResetMetricGroup()
-		}
-	}
-	tss = mergeTimeseries(tssCached, tss, start, ec)
-	rollupResultCacheV.Put(name, ec, me, window, tss)
-
 	return tss, nil
 }

-func getRollupConfigs(name string, rf rollupFunc, start, end, step, window int64, sharedTimestamps []int64) (func(values []float64, timestamps []int64), []*rollupConfig) {
+func doRollupForTimeseries(rc *rollupConfig, tsDst *timeseries, mnSrc *storage.MetricName, valuesSrc []float64, timestampsSrc []int64,
+	sharedTimestamps []int64, removeMetricGroup bool) {
+	tsDst.MetricName.CopyFrom(mnSrc)
+	if len(rc.TagValue) > 0 {
+		tsDst.MetricName.AddTag("rollup", rc.TagValue)
+	}
+	if removeMetricGroup {
+		tsDst.MetricName.ResetMetricGroup()
+	}
+	tsDst.Values = rc.Do(tsDst.Values[:0], valuesSrc, timestampsSrc)
+	tsDst.Timestamps = sharedTimestamps
+	tsDst.denyReuse = true
+}
+
+func getRollupConfigs(name string, rf rollupFunc, start, end, step, window int64, lookbackDelta int64, sharedTimestamps []int64) (
+	func(values []float64, timestamps []int64), []*rollupConfig) {
 	preFunc := func(values []float64, timestamps []int64) {}
 	if rollupFuncsRemoveCounterResets[name] {
 		preFunc = func(values []float64, timestamps []int64) {
@@ -584,13 +726,15 @@ func getRollupConfigs(name string, rf rollupFunc, start, end, step, window int64
 	}
 	newRollupConfig := func(rf rollupFunc, tagValue string) *rollupConfig {
 		return &rollupConfig{
-			TagValue:   tagValue,
-			Func:       rf,
-			Start:      start,
-			End:        end,
-			Step:       step,
-			Window:     window,
-			Timestamps: sharedTimestamps,
+			TagValue:        tagValue,
+			Func:            rf,
+			Start:           start,
+			End:             end,
+			Step:            step,
+			Window:          window,
+			MayAdjustWindow: rollupFuncsMayAdjustWindow[name],
+			LookbackDelta:   lookbackDelta,
+			Timestamps:      sharedTimestamps,
 		}
 	}
 	appendRollupConfigs := func(dst []*rollupConfig) []*rollupConfig {
@@ -617,6 +761,11 @@ func getRollupConfigs(name string, rf rollupFunc, start, end, step, window int64
 			deltaValues(values)
 		}
 		rcs = appendRollupConfigs(rcs)
+	case "rollup_candlestick":
+		rcs = append(rcs, newRollupConfig(rollupFirst, "open"))
+		rcs = append(rcs, newRollupConfig(rollupLast, "close"))
+		rcs = append(rcs, newRollupConfig(rollupMin, "low"))
+		rcs = append(rcs, newRollupConfig(rollupMax, "high"))
 	default:
 		rcs = append(rcs, newRollupConfig(rf, ""))
 	}
@@ -628,6 +777,8 @@ var bbPool bytesutil.ByteBufferPool
 func evalNumber(ec *EvalConfig, n float64) []*timeseries {
 	var ts timeseries
 	ts.denyReuse = true
+	ts.MetricName.AccountID = ec.AuthToken.AccountID
+	ts.MetricName.ProjectID = ec.AuthToken.ProjectID
 	timestamps := ec.getSharedTimestamps()
 	values := make([]float64, len(timestamps))
 	for i := range timestamps {
@@ -653,3 +804,11 @@ func evalTime(ec *EvalConfig) []*timeseries {
 	}
 	return rv
 }
+
+func mulNoOverflow(a, b int64) int64 {
+	if math.MaxInt64/b < a {
+		// Overflow
+		return math.MaxInt64
+	}
+	return a * b
+}
--- a/app/vmselect/promql/exec.go
+++ b/app/vmselect/promql/exec.go
@@ -1,16 +1,23 @@
 package promql

 import (
+	"flag"
 	"fmt"
 	"math"
 	"sort"
 	"sync"
 	"sync/atomic"
+	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/metrics"
 )

+var logSlowQueryDuration = flag.Duration("search.logSlowQueryDuration", 5*time.Second, "Log queries with execution time exceeding this value. Zero disables slow query logging")
+
+var slowQueries = metrics.NewCounter(`vm_slow_queries_total`)
+
 // ExpandWithExprs expands WITH expressions inside q and returns the resulting
 // PromQL without WITH expressions.
 func ExpandWithExprs(q string) (string, error) {
@@ -22,8 +29,20 @@ func ExpandWithExprs(q string) (string, error) {
 	return string(buf), nil
 }

-// Exec executes q for the given ec until the deadline.
-func Exec(ec *EvalConfig, q string) ([]netstorage.Result, error) {
+// Exec executes q for the given ec.
+func Exec(ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result, error) {
+	if *logSlowQueryDuration > 0 {
+		startTime := time.Now()
+		defer func() {
+			d := time.Since(startTime)
+			if d >= *logSlowQueryDuration {
+				logger.Infof("slow query according to -search.logSlowQueryDuration=%s: duration=%s, start=%d, end=%d, step=%d, accountID=%d, projectID=%d, query=%q",
+					*logSlowQueryDuration, d, ec.Start/1000, ec.End/1000, ec.Step/1000, ec.AuthToken.AccountID, ec.AuthToken.ProjectID, q)
+				slowQueries.Inc()
+			}
+		}()
+	}
+
 	ec.validate()

 	e, err := parsePromQLWithCache(q)
@@ -50,6 +69,14 @@ func Exec(ec *EvalConfig, q string) ([]netstorage.Result, error) {
 	}
 	ec.End -= ec.Step

+	if isFirstPointOnly {
+		// Remove all the points except the first one from every time series.
+		for _, ts := range rv {
+			ts.Values = ts.Values[:1]
+			ts.Timestamps = ts.Timestamps[:1]
+		}
+	}
+
 	maySort := maySortResults(e, rv)
 	result, err := timeseriesToResult(rv, maySort)
 	if err != nil {
@@ -78,14 +105,14 @@ func maySortResults(e expr, tss []*timeseries) bool {
 func timeseriesToResult(tss []*timeseries, maySort bool) ([]netstorage.Result, error) {
 	tss = removeNaNs(tss)
 	result := make([]netstorage.Result, len(tss))
-	m := make(map[string]bool)
+	m := make(map[string]struct{}, len(tss))
 	bb := bbPool.Get()
 	for i, ts := range tss {
 		bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
-		if m[string(bb.B)] {
-			return nil, fmt.Errorf(`duplicate output timeseries: %s%s`, ts.MetricName.MetricGroup, stringMetricName(&ts.MetricName))
+		if _, ok := m[string(bb.B)]; ok {
+			return nil, fmt.Errorf(`duplicate output timeseries: %s`, stringMetricName(&ts.MetricName))
 		}
-		m[string(bb.B)] = true
+		m[string(bb.B)] = struct{}{}

 		rs := &result[i]
 		rs.MetricNameMarshaled = append(rs.MetricNameMarshaled[:0], bb.B...)
@@ -107,18 +134,23 @@ func timeseriesToResult(tss []*timeseries, maySort bool) ([]netstorage.Result, e
 func removeNaNs(tss []*timeseries) []*timeseries {
 	rvs := tss[:0]
 	for _, ts := range tss {
-		nans := 0
+		allNans := true
 		for _, v := range ts.Values {
-			if math.IsNaN(v) {
-				nans++
+			if !math.IsNaN(v) {
+				allNans = false
+				break
 			}
 		}
-		if nans == len(ts.Values) {
+		if allNans {
 			// Skip timeseries with all NaNs.
 			continue
 		}
 		rvs = append(rvs, ts)
 	}
+	for i := len(rvs); i < len(tss); i++ {
+		// Zero unused time series, so GC could reclaim them.
+		tss[i] = nil
+	}
 	return rvs
 }

@@ -162,11 +194,14 @@ type parseCacheValue struct {
 }

 type parseCache struct {
-	m  map[string]*parseCacheValue
-	mu sync.RWMutex
+	// Move atomic counters to the top of struct for 8-byte alignment on 32-bit arch.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/212

 	requests uint64
 	misses   uint64
+
+	m  map[string]*parseCacheValue
+	mu sync.RWMutex
 }

 func (pc *parseCache) Requests() uint64 {
--- a/app/vmselect/promql/exec_test.go
+++ b/app/vmselect/promql/exec_test.go
--- a/app/vmselect/promql/lexer.go
+++ b/app/vmselect/promql/lexer.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"strconv"
 	"strings"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 )

 type lexer struct {
@@ -85,10 +87,7 @@ again:
 		goto tokenFoundLabel
 	}
 	if isIdentPrefix(s) {
-		token, err = scanIdent(s)
-		if err != nil {
-			return "", err
-		}
+		token = scanIdent(s)
 		goto tokenFoundLabel
 	}
 	if isStringPrefix(s) {
@@ -150,12 +149,6 @@ func scanString(s string) (string, error) {
 }

 func scanPositiveNumber(s string) (string, error) {
-	if strings.HasPrefix(s, "Inf") {
-		return "Inf", nil
-	}
-	if strings.HasPrefix(s, "NaN") {
-		return "NaN", nil
-	}
 	// Scan integer part. It may be empty if fractional part exists.
 	i := 0
 	for i < len(s) && isDecimalChar(s[i]) {
@@ -210,15 +203,103 @@ func scanPositiveNumber(s string) (string, error) {
 	return s[:j], nil
 }

-func scanIdent(s string) (string, error) {
-	if len(s) == 0 {
-		return "", fmt.Errorf("ident cannot be empty")
-	}
+func scanIdent(s string) string {
 	i := 0
-	for i < len(s) && isIdentChar(s[i]) {
-		i++
+	for i < len(s) {
+		if isIdentChar(s[i]) {
+			i++
+			continue
+		}
+		if s[i] != '\\' {
+			break
+		}
+
+		// Do not verify the next char, since it is escaped.
+		i += 2
+		if i > len(s) {
+			i--
+			break
+		}
 	}
-	return s[:i], nil
+	if i == 0 {
+		logger.Panicf("BUG: scanIdent couldn't find a single ident char; make sure isIdentPrefix called before scanIdent")
+	}
+	return s[:i]
+}
+
+func unescapeIdent(s string) string {
+	n := strings.IndexByte(s, '\\')
+	if n < 0 {
+		return s
+	}
+	dst := make([]byte, 0, len(s))
+	for {
+		dst = append(dst, s[:n]...)
+		s = s[n+1:]
+		if len(s) == 0 {
+			return string(dst)
+		}
+		if s[0] == 'x' && len(s) >= 3 {
+			h1 := fromHex(s[1])
+			h2 := fromHex(s[2])
+			if h1 >= 0 && h2 >= 0 {
+				dst = append(dst, byte((h1<<4)|h2))
+				s = s[3:]
+			} else {
+				dst = append(dst, s[0])
+				s = s[1:]
+			}
+		} else {
+			dst = append(dst, s[0])
+			s = s[1:]
+		}
+		n = strings.IndexByte(s, '\\')
+		if n < 0 {
+			dst = append(dst, s...)
+			return string(dst)
+		}
+	}
+}
+
+func fromHex(ch byte) int {
+	if ch >= '0' && ch <= '9' {
+		return int(ch - '0')
+	}
+	if ch >= 'a' && ch <= 'f' {
+		return int((ch - 'a') + 10)
+	}
+	if ch >= 'A' && ch <= 'F' {
+		return int((ch - 'A') + 10)
+	}
+	return -1
+}
+
+func toHex(n byte) byte {
+	if n < 10 {
+		return '0' + n
+	}
+	return 'a' + (n - 10)
+}
+
+func appendEscapedIdent(dst, s []byte) []byte {
+	for i := 0; i < len(s); i++ {
+		ch := s[i]
+		if isIdentChar(ch) {
+			if i == 0 && !isFirstIdentChar(ch) {
+				// hex-encode the first char
+				dst = append(dst, '\\', 'x', toHex(ch>>4), toHex(ch&0xf))
+			} else {
+				dst = append(dst, ch)
+			}
+		} else if ch >= 0x20 && ch < 0x7f {
+			// Leave ASCII printable chars as is
+			dst = append(dst, '\\', ch)
+		} else {
+			// hex-encode non-printable chars
+			dst = append(dst, '\\', 'x', toHex(ch>>4), toHex(ch&0xf))
+		}
+	}
+	return dst
 }

 func (lex *lexer) Prev() {
@@ -246,6 +327,14 @@ func scanTagFilterOpPrefix(s string) int {
 	return -1
 }

+func isInfOrNaN(s string) bool {
+	if len(s) != 3 {
+		return false
+	}
+	s = strings.ToLower(s)
+	return s == "inf" || s == "nan"
+}
+
 func isOffset(s string) bool {
 	s = strings.ToLower(s)
 	return s == "offset"
@@ -274,7 +363,7 @@ func isPositiveNumberPrefix(s string) bool {

 	// Check for .234 numbers
 	if s[0] != '.' || len(s) < 2 {
-		return strings.HasPrefix(s, "Inf") || strings.HasPrefix(s, "NaN")
+		return false
 	}
 	return isDecimalChar(s[1])
 }
@@ -353,6 +442,10 @@ func isIdentPrefix(s string) bool {
 	if len(s) == 0 {
 		return false
 	}
+	if s[0] == '\\' {
+		// Assume this is an escape char for the next char.
+		return true
+	}
 	return isFirstIdentChar(s[0])
 }

@@ -367,7 +460,7 @@ func isIdentChar(ch byte) bool {
 	if isFirstIdentChar(ch) {
 		return true
 	}
-	return isDecimalChar(ch) || ch == ':' || ch == '.'
+	return isDecimalChar(ch) || ch == '.'
 }

 func isSpaceChar(ch byte) bool {
--- a/app/vmselect/promql/lexer_test.go
+++ b/app/vmselect/promql/lexer_test.go
@@ -5,6 +5,57 @@ import (
 	"testing"
 )

+func TestUnescapeIdent(t *testing.T) {
+	f := func(s, resultExpected string) {
+		t.Helper()
+		result := unescapeIdent(s)
+		if result != resultExpected {
+			t.Fatalf("unexpected result for unescapeIdent(%q); got %q; want %q", s, result, resultExpected)
+		}
+	}
+	f("", "")
+	f("a", "a")
+	f("\\", "")
+	f(`\\`, `\`)
+	f(`\foo\-bar`, `foo-bar`)
+	f(`a\\\\b\"c\d`, `a\\b"cd`)
+	f(`foo.bar:baz_123`, `foo.bar:baz_123`)
+	f(`foo\ bar`, `foo bar`)
+	f(`\x21`, `!`)
+	f(`\xeDfoo\x2Fbar\-\xqw\x`, "\xedfoo\x2fbar-xqwx")
+}
+
+func TestAppendEscapedIdent(t *testing.T) {
+	f := func(s, resultExpected string) {
+		t.Helper()
+		result := appendEscapedIdent(nil, []byte(s))
+		if string(result) != resultExpected {
+			t.Fatalf("unexpected result for appendEscapedIdent(%q); got %q; want %q", s, result, resultExpected)
+		}
+	}
+	f(`a`, `a`)
+	f(`a.b:c_23`, `a.b:c_23`)
+	f(`a b-cd+dd\`, `a\ b\-cd\+dd\\`)
+	f("a\x1E\x20\xee", `a\x1e\ \xee`)
+	f("\x2e\x2e", `\x2e.`)
+}
+
+func TestScanIdent(t *testing.T) {
+	f := func(s, resultExpected string) {
+		t.Helper()
+		result := scanIdent(s)
+		if result != resultExpected {
+			t.Fatalf("unexpected result for scanIdent(%q): got %q; want %q", s, result, resultExpected)
+		}
+	}
+	f("a", "a")
+	f("foo.bar:baz_123", "foo.bar:baz_123")
+	f("a+b", "a")
+	f("foo()", "foo")
+	f(`a\-b+c`, `a\-b`)
+	f(`a\ b\\\ c\`, `a\ b\\\ c\`)
+}
+
 func TestLexerNextPrev(t *testing.T) {
 	var lex lexer
 	lex.Init("foo bar baz")
--- a/app/vmselect/promql/memory_limiter.go
+++ b/app/vmselect/promql/memory_limiter.go
@@ -0,0 +1,33 @@
+package promql
+
+import (
+	"sync"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+type memoryLimiter struct {
+	MaxSize uint64
+
+	mu    sync.Mutex
+	usage uint64
+}
+
+func (ml *memoryLimiter) Get(n uint64) bool {
+	ml.mu.Lock()
+	ok := n <= ml.MaxSize && ml.MaxSize-n >= ml.usage
+	if ok {
+		ml.usage += n
+	}
+	ml.mu.Unlock()
+	return ok
+}
+
+func (ml *memoryLimiter) Put(n uint64) {
+	ml.mu.Lock()
+	if n > ml.usage {
+		logger.Panicf("BUG: n=%d cannot exceed %d", n, ml.usage)
+	}
+	ml.usage -= n
+	ml.mu.Unlock()
+}
--- a/app/vmselect/promql/memory_limiter_test.go
+++ b/app/vmselect/promql/memory_limiter_test.go
@@ -0,0 +1,56 @@
+package promql
+
+import (
+	"testing"
+)
+
+func TestMemoryLimiter(t *testing.T) {
+	var ml memoryLimiter
+	ml.MaxSize = 100
+
+	// Allocate memory
+	if !ml.Get(10) {
+		t.Fatalf("cannot get 10 out of %d bytes", ml.MaxSize)
+	}
+	if ml.usage != 10 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 10)
+	}
+	if !ml.Get(20) {
+		t.Fatalf("cannot get 20 out of 90 bytes")
+	}
+	if ml.usage != 30 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 30)
+	}
+	if ml.Get(1000) {
+		t.Fatalf("unexpected get for 1000 bytes")
+	}
+	if ml.usage != 30 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 30)
+	}
+	if ml.Get(71) {
+		t.Fatalf("unexpected get for 71 bytes")
+	}
+	if ml.usage != 30 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 30)
+	}
+	if !ml.Get(70) {
+		t.Fatalf("cannot get 70 bytes")
+	}
+	if ml.usage != 100 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 100)
+	}
+
+	// Return memory back
+	ml.Put(10)
+	ml.Put(70)
+	if ml.usage != 20 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 20)
+	}
+	if !ml.Get(30) {
+		t.Fatalf("cannot get 30 bytes")
+	}
+	ml.Put(50)
+	if ml.usage != 0 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 0)
+	}
+}
--- a/app/vmselect/promql/parser.go
+++ b/app/vmselect/promql/parser.go
@@ -6,7 +6,6 @@ import (
 	"strings"
 	"sync"

-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 )
@@ -19,12 +18,13 @@ func getDefaultWithArgExprs() []*withArgExpr {

 			// ttf - time to fuckup
 			`ttf(freev) = smooth_exponential(
-				clamp_max(clamp_min(freev, 0) / clamp_min(deriv(-freev), 0), 365*24*3600),
+				clamp_max(clamp_max(-freev, 0) / clamp_max(deriv_fast(freev), 0), 365*24*3600),
 				clamp_max(step()/300, 1)
 			)`,

 			`median_over_time(m) = quantile_over_time(0.5, m)`,
 			`range_median(q) = range_quantile(0.5, q)`,
+			`alias(q, name) = label_set(q, "__name__", name)`,
 		})
 	})
 	return defaultWithArgExprs
@@ -116,13 +116,17 @@ func removeParensExpr(e expr) expr {
 		return fe
 	}
 	if pe, ok := e.(*parensExpr); ok {
+		args := *pe
+		for i, arg := range args {
+			args[i] = removeParensExpr(arg)
+		}
 		if len(*pe) == 1 {
-			return removeParensExpr((*pe)[0])
+			return args[0]
 		}
 		// Treat parensExpr as a function with empty name, i.e. union()
 		fe := &funcExpr{
 			Name: "",
-			Args: *pe,
+			Args: args,
 		}
 		return fe
 	}
@@ -373,7 +377,7 @@ func (p *parser) parseSingleExpr() (expr, error) {
 }

 func (p *parser) parseSingleExprWithoutRollupSuffix() (expr, error) {
-	if isPositiveNumberPrefix(p.lex.Token) {
+	if isPositiveNumberPrefix(p.lex.Token) || isInfOrNaN(p.lex.Token) {
 		return p.parsePositiveNumberExpr()
 	}
 	if isStringPrefix(p.lex.Token) {
@@ -417,7 +421,7 @@ func (p *parser) parseSingleExprWithoutRollupSuffix() (expr, error) {
 }

 func (p *parser) parsePositiveNumberExpr() (*numberExpr, error) {
-	if !isPositiveNumberPrefix(p.lex.Token) {
+	if !isPositiveNumberPrefix(p.lex.Token) && !isInfOrNaN(p.lex.Token) {
 		return nil, fmt.Errorf(`positiveNumberExpr: unexpected token %q; want "number"`, p.lex.Token)
 	}

@@ -744,7 +748,7 @@ func expandWithExpr(was []*withArgExpr, e expr) (expr, error) {
 		if !t.HasNonEmptyMetricGroup() {
 			return t, nil
 		}
-		k := bytesutil.ToUnsafeString(t.TagFilters[0].Value)
+		k := string(appendEscapedIdent(nil, t.TagFilters[0].Value))
 		wa := getWithArgExpr(was, k)
 		if wa == nil {
 			return t, nil
@@ -811,7 +815,9 @@ func expandModifierArgs(was []*withArgExpr, args []string) ([]string, error) {
 			continue
 		}
 		if len(wa.Args) > 0 {
-			return nil, fmt.Errorf("cannot use func %q instead of %q in %s", wa.Name, arg, args)
+			// Template funcs cannot be used inside modifier list. Leave the arg as is.
+			dstArgs = append(dstArgs, arg)
+			continue
 		}
 		me, ok := wa.Expr.(*metricExpr)
 		if ok {
@@ -851,6 +857,10 @@ func expandModifierArgs(was []*withArgExpr, args []string) ([]string, error) {

 func expandWithExprExt(was []*withArgExpr, wa *withArgExpr, args []expr) (expr, error) {
 	if len(wa.Args) != len(args) {
+		if args == nil {
+			// Just return metricExpr with the wa.Name name.
+			return newMetricExpr(wa.Name), nil
+		}
 		return nil, fmt.Errorf("invalid number of args for %q; got %d; want %d", wa.Name, len(args), len(wa.Args))
 	}
 	wasNew := make([]*withArgExpr, 0, len(was)+len(args))
@@ -869,6 +879,14 @@ func expandWithExprExt(was []*withArgExpr, wa *withArgExpr, args []expr) (expr,
 	return expandWithExpr(wasNew, wa.Expr)
 }

+func newMetricExpr(name string) *metricExpr {
+	return &metricExpr{
+		TagFilters: []storage.TagFilter{{
+			Value: []byte(name),
+		}},
+	}
+}
+
 func extractStringValue(token string) (string, error) {
 	if !isStringPrefix(token) {
 		return "", fmt.Errorf(`stringExpr must contain only string literals; got %q`, token)
@@ -1074,9 +1092,6 @@ func (p *parser) parseTagFilterExpr() (*tagFilterExpr, error) {
 	}
 	var tfe tagFilterExpr
 	tfe.Key = p.lex.Token
-	if tfe.Key == "__name__" {
-		tfe.Key = ""
-	}
 	if err := p.lex.Next(); err != nil {
 		return nil, err
 	}
@@ -1125,8 +1140,16 @@ func (tfe *tagFilterExpr) toTagFilter() (*storage.TagFilter, error) {
 	}

 	var tf storage.TagFilter
-	tf.Key = []byte(tfe.Key)
-	tf.Value = []byte(tfe.Value.S)
+	tf.Key = []byte(unescapeIdent(tfe.Key))
+	if len(tfe.Key) == 0 {
+		tf.Value = []byte(unescapeIdent(tfe.Value.S))
+	} else {
+		tf.Value = []byte(tfe.Value.S)
+	}
+	if string(tf.Key) == "__name__" {
+		// This is required for storage.Search
+		tf.Key = nil
+	}
 	tf.IsRegexp = tfe.IsRegexp
 	tf.IsNegative = tfe.IsNegative
 	if !tf.IsRegexp {
@@ -1507,7 +1530,7 @@ func (wa *withArgExpr) AppendString(dst []byte) []byte {
 }

 type rollupExpr struct {
-	// The expression for the rollup. Usually it is metricExpr, but may be arbitary expr
+	// The expression for the rollup. Usually it is metricExpr, but may be arbitrary expr
 	// if subquery is used. https://prometheus.io/blog/2019/01/28/subquery-support/
 	Expr expr

@@ -1531,6 +1554,10 @@ type rollupExpr struct {
 	InheritStep bool
 }

+func (re *rollupExpr) ForSubquery() bool {
+	return len(re.Step) > 0 || re.InheritStep
+}
+
 func (re *rollupExpr) AppendString(dst []byte) []byte {
 	needParens := func() bool {
 		if _, ok := re.Expr.(*rollupExpr); ok {
@@ -1585,7 +1612,7 @@ func (me *metricExpr) AppendString(dst []byte) []byte {
 	if len(tfs) > 0 {
 		tf := &tfs[0]
 		if len(tf.Key) == 0 && !tf.IsNegative && !tf.IsRegexp {
-			dst = append(dst, tf.Value...)
+			dst = appendEscapedIdent(dst, tf.Value)
 			tfs = tfs[1:]
 		}
 	}
@@ -1627,7 +1654,7 @@ func appendStringTagFilter(dst []byte, tf *storage.TagFilter) []byte {
 	if len(tf.Key) == 0 {
 		dst = append(dst, "__name__"...)
 	} else {
-		dst = append(dst, tf.Key...)
+		dst = appendEscapedIdent(dst, tf.Key)
 	}
 	var op string
 	if tf.IsNegative {
--- a/app/vmselect/promql/parser_test.go
+++ b/app/vmselect/promql/parser_test.go
@@ -118,6 +118,17 @@ func TestParsePromQLSuccess(t *testing.T) {
 	same("with")
 	same("WITH")
 	same("With")
+	same("alias")
+	same(`alias{foo="bar"}`)
+	same(`aLIas{alias="aa"}`)
+	another(`al\ias`, `alias`)
+	// identifiers with with escape chars
+	same(`foo\ bar`)
+	same(`foo\-bar\{{baz\+bar="aa"}`)
+	another(`\x2E\x2ef\oo{b\xEF\ar="aa"}`, `\x2e.foo{b\xefar="aa"}`)
+	// Duplicate filters
+	same(`foo{__name__="bar"}`)
+	same(`foo{a="b", a="c", __name__="aaa", b="d"}`)
 	// Metric filters ending with comma
 	another(`m{foo="bar",}`, `m{foo="bar"}`)
 	// String concat in tag value
@@ -159,14 +170,34 @@ func TestParsePromQLSuccess(t *testing.T) {
 	another(`-.2`, `-0.2`)
 	another(`-.2E-2`, `-0.002`)
 	same(`NaN`)
+	another(`nan`, `NaN`)
+	another(`NAN`, `NaN`)
+	another(`nAN`, `NaN`)
 	another(`Inf`, `+Inf`)
+	another(`INF`, `+Inf`)
+	another(`inf`, `+Inf`)
 	another(`+Inf`, `+Inf`)
 	another(`-Inf`, `-Inf`)
+	another(`-inF`, `-Inf`)

 	// binaryOpExpr
-	another(`NaN + 2 *3 * Inf`, `NaN`)
-	another(`Inf - Inf`, `NaN`)
-	another(`Inf + Inf`, `+Inf`)
+	another(`nan == nan`, `NaN`)
+	another(`nan ==bool nan`, `1`)
+	another(`nan !=bool nan`, `0`)
+	another(`nan !=bool 2`, `1`)
+	another(`2 !=bool nan`, `1`)
+	another(`nan >bool nan`, `0`)
+	another(`nan <bool nan`, `0`)
+	another(`1 ==bool nan`, `0`)
+	another(`NaN !=bool 1`, `1`)
+	another(`inf >=bool 2`, `1`)
+	another(`-1 >bool -inf`, `1`)
+	another(`-1 <bool -inf`, `0`)
+	another(`nan + 2 *3 * inf`, `NaN`)
+	another(`INF - Inf`, `NaN`)
+	another(`Inf + inf`, `+Inf`)
+	another(`1/0`, `+Inf`)
+	another(`0/0`, `NaN`)
 	another(`-m`, `0 - m`)
 	same(`m + ignoring () n[5m]`)
 	another(`M + IGNORING () N[5m]`, `M + ignoring () N[5m]`)
@@ -221,6 +252,8 @@ func TestParsePromQLSuccess(t *testing.T) {
 	another(`(-foo + ((bar) / (baz))) + ((23))`, `((0 - foo) + (bar / baz)) + 23`)
 	another(`(FOO + ((Bar) / (baZ))) + ((23))`, `(FOO + (Bar / baZ)) + 23`)
 	same(`(foo, bar)`)
+	another(`((foo, bar),(baz))`, `((foo, bar), baz)`)
+	same(`(foo, (bar, baz), ((x, y), (z, y), xx))`)
 	another(`1+(foo, bar,)`, `1 + (foo, bar)`)
 	another(`((foo(bar,baz)), (1+(2)+(3,4)+()))`, `(foo(bar, baz), (3 + (3, 4)) + ())`)
 	same(`()`)
@@ -251,6 +284,8 @@ func TestParsePromQLSuccess(t *testing.T) {
 	same(`rate(rate(m[5m]))`)
 	same(`rate(rate(m[5m])[1h:])`)
 	same(`rate(rate(m[5m])[1h:3s])`)
+	// funcName with escape chars
+	same(`foo\(ba\-r()`)

 	// aggrFuncExpr
 	same(`sum(http_server_request) by ()`)
@@ -295,10 +330,14 @@ func TestParsePromQLSuccess(t *testing.T) {
 	another(`with (ct={job="test", i="bar"}) ct + {ct, x="d"} + foo{ct, ct} + ctx(1)`,
 		`(({job="test", i="bar"} + {job="test", i="bar", x="d"}) + foo{job="test", i="bar"}) + ctx(1)`)
 	another(`with (foo = bar) {__name__=~"foo"}`, `{__name__=~"foo"}`)
-	another(`with (foo = bar) {__name__="foo"}`, `bar`)
+	another(`with (foo = bar) foo{__name__="foo"}`, `bar`)
 	another(`with (foo = bar) {__name__="foo", x="y"}`, `bar{x="y"}`)
 	another(`with (foo(bar) = {__name__!="bar"}) foo(x)`, `{__name__!="bar"}`)
-	another(`with (foo(bar) = {__name__="bar"}) foo(x)`, `x`)
+	another(`with (foo(bar) = bar{__name__="bar"}) foo(x)`, `x`)
+	another(`with (foo\-bar(baz) = baz + baz) foo\-bar((x,y))`, `(x, y) + (x, y)`)
+	another(`with (foo\-bar(baz) = baz + baz) foo\-bar(x*y)`, `(x * y) + (x * y)`)
+	another(`with (foo\-bar(baz) = baz + baz) foo\-bar(x\*y)`, `x\*y + x\*y`)
+	another(`with (foo\-bar(b\ az) = b\ az + b\ az) foo\-bar(x\*y)`, `x\*y + x\*y`)
 	// override ttf to something new.
 	another(`with (ttf = a) ttf + b`, `a + b`)
 	// override ttf to ru
@@ -332,8 +371,11 @@ func TestParsePromQLSuccess(t *testing.T) {
 	another(`with (x="a", y=x) y+"bc"`, `"abc"`)
 	another(`with (x="a", y="b"+x) "we"+y+"z"+f()`, `"webaz" + f()`)
 	another(`with (f(x) = m{foo=x+"y", bar="y"+x, baz=x} + x) f("qwe")`, `m{foo="qwey", bar="yqwe", baz="qwe"} + "qwe"`)
+	another(`with (f(a)=a) f`, `f`)
+	another(`with (f\q(a)=a) f\q`, `fq`)

 	// Verify withExpr for aggr func modifiers
+	another(`with (f(x) = x, y = sum(m) by (f)) y`, `sum(m) by (f)`)
 	another(`with (f(x) = sum(m) by (x)) f(foo)`, `sum(m) by (foo)`)
 	another(`with (f(x) = sum(m) by (x)) f((foo, bar, foo))`, `sum(m) by (foo, bar)`)
 	another(`with (f(x) = sum(m) without (x,y)) f((a, b))`, `sum(m) without (a, b, y)`)
@@ -658,7 +700,7 @@ func TestParsePromQLError(t *testing.T) {
 	f(`with (x=m) f(b, a{x})`)
 	f(`with (x=m) sum(a{x})`)
 	f(`with (x=m) (a{x})`)
-	f(`with (f(a)=a) f`)
+	f(`with (f(a)=a) f(1, 2)`)
 	f(`with (f(x)=x{foo="bar"}) f(1)`)
 	f(`with (f(x)=x{foo="bar"}) f(m + n)`)
 	f(`with (f = with`)
@@ -668,8 +710,7 @@ func TestParsePromQLError(t *testing.T) {
 	f(`with (f(,)=x) x`)
 	f(`with (x(a) = {b="c"}) foo{x}`)
 	f(`with (f(x) = m{foo=xx}) f("qwe")`)
-	f(`a + with(f(x)=x) f`)
-	f(`with (f(x) = x, y = sum(m) by (f)) y`)
+	f(`a + with(f(x)=x) f(1,2)`)
 	f(`with (f(x) = sum(m) by (x)) f({foo="bar"})`)
 	f(`with (f(x) = sum(m) by (x)) f((xx(), {foo="bar"}))`)
 	f(`with (f(x) = m + on (x) n) f(xx())`)
--- a/app/vmselect/promql/regexp_cache.go
+++ b/app/vmselect/promql/regexp_cache.go
@@ -1,7 +1,6 @@
 package promql

 import (
-	"fmt"
 	"regexp"
 	"sync"
 	"sync/atomic"
@@ -10,12 +9,16 @@ import (
 )

 func compileRegexpAnchored(re string) (*regexp.Regexp, error) {
+	reAnchored := "^(?:" + re + ")$"
+	return compileRegexp(reAnchored)
+}
+
+func compileRegexp(re string) (*regexp.Regexp, error) {
 	rcv := regexpCacheV.Get(re)
 	if rcv != nil {
 		return rcv.r, rcv.err
 	}
-	regexAnchored := fmt.Sprintf("^(?:%s)$", re)
-	r, err := regexp.Compile(regexAnchored)
+	r, err := regexp.Compile(re)
 	rcv = &regexpCacheValue{
 		r:   r,
 		err: err,
@@ -48,11 +51,14 @@ type regexpCacheValue struct {
 }

 type regexpCache struct {
-	m  map[string]*regexpCacheValue
-	mu sync.RWMutex
+	// Move atomic counters to the top of struct for 8-byte alignment on 32-bit arch.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/212

 	requests uint64
 	misses   uint64
+
+	m  map[string]*regexpCacheValue
+	mu sync.RWMutex
 }

 func (rc *regexpCache) Requests() uint64 {
@@ -77,7 +83,7 @@ func (rc *regexpCache) Get(regexp string) *regexpCacheValue {
 	rcv := rc.m[regexp]
 	rc.mu.RUnlock()

-	if rc == nil {
+	if rcv == nil {
 		atomic.AddUint64(&rc.misses, 1)
 	}
 	return rcv
--- a/app/vmselect/promql/rollup.go
+++ b/app/vmselect/promql/rollup.go
@@ -3,7 +3,6 @@ package promql
 import (
 	"fmt"
 	"math"
-	"sort"
 	"strings"
 	"sync"

@@ -19,13 +18,14 @@ var rollupFuncs = map[string]newRollupFunc{
 	// See funcs accepting range-vector on https://prometheus.io/docs/prometheus/latest/querying/functions/ .
 	"changes":            newRollupFuncOneArg(rollupChanges),
 	"delta":              newRollupFuncOneArg(rollupDelta),
-	"deriv":              newRollupFuncOneArg(rollupDeriv),
+	"deriv":              newRollupFuncOneArg(rollupDerivSlow),
+	"deriv_fast":         newRollupFuncOneArg(rollupDerivFast),
 	"holt_winters":       newRollupHoltWinters,
 	"idelta":             newRollupFuncOneArg(rollupIdelta),
-	"increase":           newRollupFuncOneArg(rollupDelta),  // + rollupFuncsRemoveCounterResets
-	"irate":              newRollupFuncOneArg(rollupIderiv), // + rollupFuncsRemoveCounterResets
+	"increase":           newRollupFuncOneArg(rollupIncrease), // + rollupFuncsRemoveCounterResets
+	"irate":              newRollupFuncOneArg(rollupIderiv),   // + rollupFuncsRemoveCounterResets
 	"predict_linear":     newRollupPredictLinear,
-	"rate":               newRollupFuncOneArg(rollupDeriv), // + rollupFuncsRemoveCounterResets
+	"rate":               newRollupFuncOneArg(rollupDerivFast), // + rollupFuncsRemoveCounterResets
 	"resets":             newRollupFuncOneArg(rollupResets),
 	"avg_over_time":      newRollupFuncOneArg(rollupAvg),
 	"min_over_time":      newRollupFuncOneArg(rollupMin),
@@ -37,16 +37,36 @@ var rollupFuncs = map[string]newRollupFunc{
 	"stdvar_over_time":   newRollupFuncOneArg(rollupStdvar),

 	// Additional rollup funcs.
-	"first_over_time":    newRollupFuncOneArg(rollupFirst),
-	"last_over_time":     newRollupFuncOneArg(rollupLast),
-	"distinct_over_time": newRollupFuncOneArg(rollupDistinct),
-	"integrate":          newRollupFuncOneArg(rollupIntegrate),
-	"ideriv":             newRollupFuncOneArg(rollupIderiv),
-	"rollup":             newRollupFuncOneArg(rollupFake),
-	"rollup_rate":        newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
-	"rollup_deriv":       newRollupFuncOneArg(rollupFake),
-	"rollup_delta":       newRollupFuncOneArg(rollupFake),
-	"rollup_increase":    newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
+	"sum2_over_time":      newRollupFuncOneArg(rollupSum2),
+	"geomean_over_time":   newRollupFuncOneArg(rollupGeomean),
+	"first_over_time":     newRollupFuncOneArg(rollupFirst),
+	"last_over_time":      newRollupFuncOneArg(rollupLast),
+	"distinct_over_time":  newRollupFuncOneArg(rollupDistinct),
+	"increases_over_time": newRollupFuncOneArg(rollupIncreases),
+	"decreases_over_time": newRollupFuncOneArg(rollupDecreases),
+	"integrate":           newRollupFuncOneArg(rollupIntegrate),
+	"ideriv":              newRollupFuncOneArg(rollupIderiv),
+	"lifetime":            newRollupFuncOneArg(rollupLifetime),
+	"lag":                 newRollupFuncOneArg(rollupLag),
+	"scrape_interval":     newRollupFuncOneArg(rollupScrapeInterval),
+	"rollup":              newRollupFuncOneArg(rollupFake),
+	"rollup_rate":         newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
+	"rollup_deriv":        newRollupFuncOneArg(rollupFake),
+	"rollup_delta":        newRollupFuncOneArg(rollupFake),
+	"rollup_increase":     newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
+	"rollup_candlestick":  newRollupFuncOneArg(rollupFake),
+}
+
+var rollupFuncsMayAdjustWindow = map[string]bool{
+	"default_rollup":  true,
+	"first_over_time": true,
+	"last_over_time":  true,
+	"deriv":           true,
+	"deriv_fast":      true,
+	"irate":           true,
+	"rate":            true,
+	"lifetime":        true,
+	"scrape_interval": true,
 }

 var rollupFuncsRemoveCounterResets = map[string]bool{
@@ -64,6 +84,7 @@ var rollupFuncsKeepMetricGroup = map[string]bool{
 	"max_over_time":      true,
 	"quantile_over_time": true,
 	"rollup":             true,
+	"geomean_over_time":  true,
 }

 func getRollupArgIdx(funcName string) int {
@@ -92,8 +113,10 @@ type rollupFuncArg struct {
 	values        []float64
 	timestamps    []int64

-	idx  int
-	step int64
+	currTimestamp int64
+	idx           int
+	step          int64
+	realPrevValue float64
 }

 func (rfa *rollupFuncArg) reset() {
@@ -101,8 +124,10 @@ func (rfa *rollupFuncArg) reset() {
 	rfa.prevTimestamp = 0
 	rfa.values = nil
 	rfa.timestamps = nil
+	rfa.currTimestamp = 0
 	rfa.idx = 0
 	rfa.step = 0
+	rfa.realPrevValue = nan
 }

 // rollupFunc must return rollup value for the given rfa.
@@ -120,7 +145,17 @@ type rollupConfig struct {
 	Step   int64
 	Window int64

+	// Whether window may be adjusted to 2 x interval between data points.
+	// This is needed for functions which have dt in the denominator
+	// such as rate, deriv, etc.
+	// Without the adjustement their value would jump in unexpected directions
+	// when using window smaller than 2 x scrape_interval.
+	MayAdjustWindow bool
+
 	Timestamps []int64
+
+	// LoookbackDelta is the analog to `-query.lookback-delta` from Prometheus world.
+	LookbackDelta int64
 }

 var (
@@ -158,43 +193,48 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
 	dstValues = decimal.ExtendFloat64sCapacity(dstValues, len(rc.Timestamps))

 	maxPrevInterval := getMaxPrevInterval(timestamps)
+	if rc.LookbackDelta > 0 && maxPrevInterval > rc.LookbackDelta {
+		maxPrevInterval = rc.LookbackDelta
+	}
 	window := rc.Window
 	if window <= 0 {
 		window = rc.Step
 	}
-	if window < maxPrevInterval {
+	if rc.MayAdjustWindow && window < maxPrevInterval {
 		window = maxPrevInterval
 	}
 	rfa := getRollupFuncArg()
 	rfa.idx = 0
 	rfa.step = rc.Step
+	rfa.realPrevValue = nan

 	i := 0
 	j := 0
-	for _, ts := range rc.Timestamps {
-		tEnd := ts + rc.Step
+	ni := 0
+	nj := 0
+	for _, tEnd := range rc.Timestamps {
 		tStart := tEnd - window
-		n := sort.Search(len(timestamps)-i, func(n int) bool {
-			return timestamps[i+n] > tStart
-		})
-		i += n
+		ni = seekFirstTimestampIdxAfter(timestamps[i:], tStart, ni)
+		i += ni
 		if j < i {
 			j = i
 		}
-		n = sort.Search(len(timestamps)-j, func(n int) bool {
-			return timestamps[j+n] > tEnd
-		})
-		j += n
+		nj = seekFirstTimestampIdxAfter(timestamps[j:], tEnd, nj)
+		j += nj

 		rfa.prevValue = nan
 		rfa.prevTimestamp = tStart - maxPrevInterval
-		if i > 0 && timestamps[i-1] > rfa.prevTimestamp {
+		if i < len(timestamps) && i > 0 && timestamps[i-1] > rfa.prevTimestamp {
 			rfa.prevValue = values[i-1]
 			rfa.prevTimestamp = timestamps[i-1]
 		}

 		rfa.values = values[i:j]
 		rfa.timestamps = timestamps[i:j]
+		rfa.currTimestamp = tEnd
+		if i > 0 {
+			rfa.realPrevValue = values[i-1]
+		}
 		value := rc.Func(rfa)
 		rfa.idx++
 		dstValues = append(dstValues, value)
@@ -204,16 +244,98 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
 	return dstValues
 }

+func seekFirstTimestampIdxAfter(timestamps []int64, seekTimestamp int64, nHint int) int {
+	if len(timestamps) == 0 || timestamps[0] > seekTimestamp {
+		return 0
+	}
+	startIdx := nHint - 2
+	if startIdx < 0 {
+		startIdx = 0
+	}
+	if startIdx >= len(timestamps) {
+		startIdx = len(timestamps) - 1
+	}
+	endIdx := nHint + 2
+	if endIdx > len(timestamps) {
+		endIdx = len(timestamps)
+	}
+	if startIdx > 0 && timestamps[startIdx] <= seekTimestamp {
+		timestamps = timestamps[startIdx:]
+		endIdx -= startIdx
+	} else {
+		startIdx = 0
+	}
+	if endIdx < len(timestamps) && timestamps[endIdx] > seekTimestamp {
+		timestamps = timestamps[:endIdx]
+	}
+	if len(timestamps) < 16 {
+		// Fast path: the number of timestamps to search is small, so scan them all.
+		for i, timestamp := range timestamps {
+			if timestamp > seekTimestamp {
+				return startIdx + i
+			}
+		}
+		return startIdx + len(timestamps)
+	}
+	// Slow path: too big len(timestamps), so use binary search.
+	i := binarySearchInt64(timestamps, seekTimestamp+1)
+	return startIdx + int(i)
+}
+
+func binarySearchInt64(a []int64, v int64) uint {
+	// Copy-pasted sort.Search from https://golang.org/src/sort/search.go?s=2246:2286#L49
+	i, j := uint(0), uint(len(a))
+	for i < j {
+		h := (i + j) >> 1
+		if h < uint(len(a)) && a[h] < v {
+			i = h + 1
+		} else {
+			j = h
+		}
+	}
+	return i
+}
+
 func getMaxPrevInterval(timestamps []int64) int64 {
 	if len(timestamps) < 2 {
 		return int64(maxSilenceInterval)
 	}
-	d := (timestamps[len(timestamps)-1] - timestamps[0]) / int64(len(timestamps)-1)
-	if d <= 0 {
-		return 1
+
+	// Estimate scrape interval as 0.6 quantile for the first 100 intervals.
+	h := histogram.GetFast()
+	tsPrev := timestamps[0]
+	timestamps = timestamps[1:]
+	if len(timestamps) > 100 {
+		timestamps = timestamps[:100]
 	}
-	// Slightly increase d in order to handle possible jitter in scrape interval.
-	return d + (d / 16)
+	for _, ts := range timestamps {
+		h.Update(float64(ts - tsPrev))
+		tsPrev = ts
+	}
+	d := int64(h.Quantile(0.6))
+	histogram.PutFast(h)
+	if d <= 0 {
+		return int64(maxSilenceInterval)
+	}
+	// Increase d more for smaller scrape intervals in order to hide possible gaps
+	// when high jitter is present.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/139 .
+	if d <= 2*1000 {
+		return d + 4*d
+	}
+	if d <= 4*1000 {
+		return d + 2*d
+	}
+	if d <= 8*1000 {
+		return d + d
+	}
+	if d <= 16*1000 {
+		return d + d/2
+	}
+	if d <= 32*1000 {
+		return d + d/4
+	}
+	return d + d/8
 }

 func removeCounterResets(values []float64) {
@@ -246,12 +368,14 @@ func deltaValues(values []float64) {
 	if len(values) == 0 {
 		return
 	}
+	prevDelta := float64(0)
 	prevValue := values[0]
 	for i, v := range values[1:] {
-		values[i] = v - prevValue
+		prevDelta = v - prevValue
+		values[i] = prevDelta
 		prevValue = v
 	}
-	values[len(values)-1] = nan
+	values[len(values)-1] = prevDelta
 }

 func derivValues(values []float64, timestamps []int64) {
@@ -260,16 +384,23 @@ func derivValues(values []float64, timestamps []int64) {
 	if len(values) == 0 {
 		return
 	}
+	prevDeriv := float64(0)
 	prevValue := values[0]
 	prevTs := timestamps[0]
 	for i, v := range values[1:] {
 		ts := timestamps[i+1]
+		if ts == prevTs {
+			// Use the previous value for duplicate timestamps.
+			values[i] = prevDeriv
+			continue
+		}
 		dt := float64(ts-prevTs) * 1e-3
-		values[i] = (v - prevValue) / dt
+		prevDeriv = (v - prevValue) / dt
+		values[i] = prevDeriv
 		prevValue = v
 		prevTs = ts
 	}
-	values[len(values)-1] = nan
+	values[len(values)-1] = prevDeriv
 }

 type newRollupFunc func(args []interface{}) (rollupFunc, error)
@@ -296,11 +427,11 @@ func newRollupHoltWinters(args []interface{}) (rollupFunc, error) {
 		return nil, err
 	}
 	rf := func(rfa *rollupFuncArg) float64 {
-		// There is no need in handling NaNs here, since they must be cleanup up
+		// There is no need in handling NaNs here, since they must be cleaned up
 		// before calling rollup funcs.
 		values := rfa.values
 		if len(values) == 0 {
-			return nan
+			return rfa.prevValue
 		}
 		sf := sfs[rfa.idx]
 		if sf <= 0 || sf >= 1 {
@@ -342,41 +473,55 @@ func newRollupPredictLinear(args []interface{}) (rollupFunc, error) {
 		return nil, err
 	}
 	rf := func(rfa *rollupFuncArg) float64 {
-		// There is no need in handling NaNs here, since they must be cleanup up
-		// before calling rollup funcs.
-		values := rfa.values
-		timestamps := rfa.timestamps
-		if len(values) == 0 {
+		v, k := linearRegression(rfa)
+		if math.IsNaN(v) {
 			return nan
 		}
-
-		// See https://en.wikipedia.org/wiki/Simple_linear_regression#Numerical_example
-		// TODO: determine whether this shit really works.
-		tFirst := rfa.prevTimestamp
-		vSum := rfa.prevValue
-		if math.IsNaN(rfa.prevValue) {
-			tFirst = timestamps[0]
-			vSum = 0
-		}
-		tSum := float64(0)
-		tvSum := float64(0)
-		ttSum := float64(0)
-		for i, v := range values {
-			dt := float64(timestamps[i]-tFirst) * 1e-3
-			vSum += v
-			tSum += dt
-			tvSum += dt * v
-			ttSum += dt * dt
-		}
-		n := float64(len(values))
-		k := (n*tvSum - tSum*vSum) / (n*ttSum - tSum*tSum)
-		v := (vSum - k*tSum) / n
 		sec := secs[rfa.idx]
 		return v + k*sec
 	}
 	return rf, nil
 }

+func linearRegression(rfa *rollupFuncArg) (float64, float64) {
+	// There is no need in handling NaNs here, since they must be cleaned up
+	// before calling rollup funcs.
+	values := rfa.values
+	timestamps := rfa.timestamps
+	if len(values) == 0 {
+		return rfa.prevValue, 0
+	}
+
+	// See https://en.wikipedia.org/wiki/Simple_linear_regression#Numerical_example
+	tFirst := rfa.prevTimestamp
+	vSum := rfa.prevValue
+	tSum := float64(0)
+	tvSum := float64(0)
+	ttSum := float64(0)
+	n := 1.0
+	if math.IsNaN(rfa.prevValue) {
+		tFirst = timestamps[0]
+		vSum = 0
+		n = 0
+	}
+	for i, v := range values {
+		dt := float64(timestamps[i]-tFirst) * 1e-3
+		vSum += v
+		tSum += dt
+		tvSum += dt * v
+		ttSum += dt * dt
+	}
+	n += float64(len(values))
+	if n == 1 {
+		return vSum, 0
+	}
+	k := (n*tvSum - tSum*vSum) / (n*ttSum - tSum*tSum)
+	v := (vSum - k*tSum) / n
+	// Adjust v to the last timestamp on the given time range.
+	v += k * (float64(timestamps[len(timestamps)-1]-tFirst) * 1e-3)
+	return v, k
+}
+
 func newRollupQuantile(args []interface{}) (rollupFunc, error) {
 	if err := expectRollupArgsNum(args, 2); err != nil {
 		return nil, err
@@ -386,11 +531,15 @@ func newRollupQuantile(args []interface{}) (rollupFunc, error) {
 		return nil, err
 	}
 	rf := func(rfa *rollupFuncArg) float64 {
-		// There is no need in handling NaNs here, since they must be cleanup up
+		// There is no need in handling NaNs here, since they must be cleaned up
 		// before calling rollup funcs.
 		values := rfa.values
 		if len(values) == 0 {
-			return nan
+			return rfa.prevValue
+		}
+		if len(values) == 1 {
+			// Fast path - only a single value.
+			return values[0]
 		}
 		hf := histogram.GetFast()
 		for _, v := range values {
@@ -408,11 +557,11 @@ func rollupAvg(rfa *rollupFuncArg) float64 {
 	// Do not use `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation,
 	// since it is slower and has no significant benefits in precision.

-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		return rfa.prevValue
 	}
 	var sum float64
 	for _, v := range values {
@@ -422,13 +571,16 @@ func rollupAvg(rfa *rollupFuncArg) float64 {
 }

 func rollupMin(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
+	minValue := rfa.prevValue
 	values := rfa.values
-	if len(values) == 0 {
-		return nan
+	if math.IsNaN(minValue) {
+		if len(values) == 0 {
+			return nan
+		}
+		minValue = values[0]
 	}
-	minValue := values[0]
 	for _, v := range values {
 		if v < minValue {
 			minValue = v
@@ -438,13 +590,16 @@ func rollupMin(rfa *rollupFuncArg) float64 {
 }

 func rollupMax(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
+	maxValue := rfa.prevValue
 	values := rfa.values
-	if len(values) == 0 {
-		return nan
+	if math.IsNaN(maxValue) {
+		if len(values) == 0 {
+			return nan
+		}
+		maxValue = values[0]
 	}
-	maxValue := values[0]
 	for _, v := range values {
 		if v > maxValue {
 			maxValue = v
@@ -454,11 +609,14 @@ func rollupMax(rfa *rollupFuncArg) float64 {
 }

 func rollupSum(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return 0
 	}
 	var sum float64
 	for _, v := range values {
@@ -467,12 +625,43 @@ func rollupSum(rfa *rollupFuncArg) float64 {
 	return sum
 }

-func rollupCount(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+func rollupSum2(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		return rfa.prevValue * rfa.prevValue
+	}
+	var sum2 float64
+	for _, v := range values {
+		sum2 += v * v
+	}
+	return sum2
+}
+
+func rollupGeomean(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
+	// before calling rollup funcs.
+	values := rfa.values
+	if len(values) == 0 {
+		return rfa.prevValue
+	}
+	p := 1.0
+	for _, v := range values {
+		p *= v
+	}
+	return math.Pow(p, 1/float64(len(values)))
+}
+
+func rollupCount(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
+	// before calling rollup funcs.
+	values := rfa.values
+	if len(values) == 0 {
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return 0
 	}
 	return float64(len(values))
 }
@@ -485,11 +674,18 @@ func rollupStddev(rfa *rollupFuncArg) float64 {
 func rollupStdvar(rfa *rollupFuncArg) float64 {
 	// See `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation

-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return 0
+	}
+	if len(values) == 1 {
+		// Fast path.
+		return values[0]
 	}
 	var avg float64
 	var count float64
@@ -504,7 +700,15 @@ func rollupStdvar(rfa *rollupFuncArg) float64 {
 }

 func rollupDelta(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	return rollupDeltaInternal(rfa, false)
+}
+
+func rollupIncrease(rfa *rollupFuncArg) float64 {
+	return rollupDeltaInternal(rfa, true)
+}
+
+func rollupDeltaInternal(rfa *rollupFuncArg, canUseRealPrevValue bool) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	prevValue := rfa.prevValue
@@ -512,43 +716,65 @@ func rollupDelta(rfa *rollupFuncArg) float64 {
 		if len(values) == 0 {
 			return nan
 		}
+		if len(values) == 1 {
+			if canUseRealPrevValue && !math.IsNaN(rfa.realPrevValue) {
+				// Fix against removeCounterResets.
+				return values[0] - rfa.realPrevValue
+			}
+			// Assume that the previous non-existing value was 0.
+			return values[0]
+		}
 		prevValue = values[0]
 		values = values[1:]
 	}
 	if len(values) == 0 {
-		return nan
+		// Assume that the value didn't change on the given interval.
+		return 0
 	}
 	return values[len(values)-1] - prevValue
 }

 func rollupIdelta(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		// Assume that the value didn't change on the given interval.
+		return 0
 	}
 	lastValue := values[len(values)-1]
 	values = values[:len(values)-1]
 	if len(values) == 0 {
 		prevValue := rfa.prevValue
 		if math.IsNaN(prevValue) {
-			return nan
+			// Assume that the previous non-existing value was 0.
+			return lastValue
 		}
 		return lastValue - prevValue
 	}
 	return lastValue - values[len(values)-1]
 }

-func rollupDeriv(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+func rollupDerivSlow(rfa *rollupFuncArg) float64 {
+	// Use linear regression like Prometheus does.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/73
+	_, k := linearRegression(rfa)
+	return k
+}
+
+func rollupDerivFast(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	timestamps := rfa.timestamps
 	prevValue := rfa.prevValue
 	prevTimestamp := rfa.prevTimestamp
 	if math.IsNaN(prevValue) {
-		if len(values) == 0 {
+		if len(values) < 2 {
+			// It is impossible to calculate derivative on 0 or 1 values.
 			return nan
 		}
 		prevValue = values[0]
@@ -557,7 +783,8 @@ func rollupDeriv(rfa *rollupFuncArg) float64 {
 		timestamps = timestamps[1:]
 	}
 	if len(values) == 0 {
-		return nan
+		// Assume that the value didn't change on the given interval.
+		return 0
 	}
 	vEnd := values[len(values)-1]
 	tEnd := timestamps[len(timestamps)-1]
@@ -567,43 +794,97 @@ func rollupDeriv(rfa *rollupFuncArg) float64 {
 }

 func rollupIderiv(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	timestamps := rfa.timestamps
-	if len(values) == 0 {
-		return nan
+	if len(values) < 2 {
+		if len(values) == 0 || math.IsNaN(rfa.prevValue) {
+			// It is impossible to calculate derivative on 0 or 1 values.
+			return nan
+		}
+		return (values[0] - rfa.prevValue) / (float64(timestamps[0]-rfa.prevTimestamp) * 1e-3)
 	}
 	vEnd := values[len(values)-1]
 	tEnd := timestamps[len(timestamps)-1]
 	values = values[:len(values)-1]
 	timestamps = timestamps[:len(timestamps)-1]
-	prevValue := rfa.prevValue
-	prevTimestamp := rfa.prevTimestamp
-	if len(values) == 0 {
-		if math.IsNaN(prevValue) {
+	// Skip data points with duplicate timestamps.
+	for len(timestamps) > 0 && timestamps[len(timestamps)-1] >= tEnd {
+		timestamps = timestamps[:len(timestamps)-1]
+	}
+	var tStart int64
+	var vStart float64
+	if len(timestamps) == 0 {
+		if math.IsNaN(rfa.prevValue) {
+			return 0
+		}
+		tStart = rfa.prevTimestamp
+		vStart = rfa.prevValue
+	} else {
+		tStart = timestamps[len(timestamps)-1]
+		vStart = values[len(timestamps)-1]
+	}
+	dv := vEnd - vStart
+	dt := tEnd - tStart
+	return dv / (float64(dt) * 1e-3)
+}
+
+func rollupLifetime(rfa *rollupFuncArg) float64 {
+	// Calculate the duration between the first and the last data points.
+	timestamps := rfa.timestamps
+	if math.IsNaN(rfa.prevValue) {
+		if len(timestamps) < 2 {
 			return nan
 		}
-	} else {
-		prevValue = values[len(values)-1]
-		prevTimestamp = timestamps[len(timestamps)-1]
+		return float64(timestamps[len(timestamps)-1]-timestamps[0]) * 1e-3
 	}
-	dv := vEnd - prevValue
-	dt := tEnd - prevTimestamp
-	return dv / (float64(dt) / 1000)
+	if len(timestamps) == 0 {
+		return nan
+	}
+	return float64(timestamps[len(timestamps)-1]-rfa.prevTimestamp) * 1e-3
+}
+
+func rollupLag(rfa *rollupFuncArg) float64 {
+	// Calculate the duration between the current timestamp and the last data point.
+	timestamps := rfa.timestamps
+	if len(timestamps) == 0 {
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return float64(rfa.currTimestamp-rfa.prevTimestamp) * 1e-3
+	}
+	return float64(rfa.currTimestamp-timestamps[len(timestamps)-1]) * 1e-3
+}
+
+func rollupScrapeInterval(rfa *rollupFuncArg) float64 {
+	// Calculate the average interval between data points.
+	timestamps := rfa.timestamps
+	if math.IsNaN(rfa.prevValue) {
+		if len(timestamps) < 2 {
+			return nan
+		}
+		return float64(timestamps[len(timestamps)-1]-timestamps[0]) * 1e-3 / float64(len(timestamps)-1)
+	}
+	if len(timestamps) == 0 {
+		return nan
+	}
+	return (float64(timestamps[len(timestamps)-1]-rfa.prevTimestamp) * 1e-3) / float64(len(timestamps))
 }

 func rollupChanges(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
-	if len(values) == 0 {
-		return nan
-	}
-	n := 0
 	prevValue := rfa.prevValue
+	n := 0
 	if math.IsNaN(prevValue) {
+		if len(values) == 0 {
+			return nan
+		}
 		prevValue = values[0]
+		values = values[1:]
+		n++
 	}
 	for _, v := range values {
 		if v != prevValue {
@@ -614,12 +895,15 @@ func rollupChanges(rfa *rollupFuncArg) float64 {
 	return float64(n)
 }

-func rollupResets(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+func rollupIncreases(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return 0
 	}
 	prevValue := rfa.prevValue
 	if math.IsNaN(prevValue) {
@@ -627,7 +911,38 @@ func rollupResets(rfa *rollupFuncArg) float64 {
 		values = values[1:]
 	}
 	if len(values) == 0 {
-		return nan
+		return 0
+	}
+	n := 0
+	for _, v := range values {
+		if v > prevValue {
+			n++
+		}
+		prevValue = v
+	}
+	return float64(n)
+}
+
+// `decreases_over_time` logic is the same as `resets` logic.
+var rollupDecreases = rollupResets
+
+func rollupResets(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
+	// before calling rollup funcs.
+	values := rfa.values
+	if len(values) == 0 {
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return 0
+	}
+	prevValue := rfa.prevValue
+	if math.IsNaN(prevValue) {
+		prevValue = values[0]
+		values = values[1:]
+	}
+	if len(values) == 0 {
+		return 0
 	}
 	n := 0
 	for _, v := range values {
@@ -646,7 +961,7 @@ func rollupFirst(rfa *rollupFuncArg) float64 {
 		return v
 	}

-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
@@ -655,24 +970,27 @@ func rollupFirst(rfa *rollupFuncArg) float64 {
 	return values[0]
 }

-var rollupDefault = rollupFirst
+var rollupDefault = rollupLast

 func rollupLast(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		return rfa.prevValue
 	}
 	return values[len(values)-1]
 }

 func rollupDistinct(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return 0
 	}
 	m := make(map[float64]struct{})
 	for _, v := range values {
@@ -684,12 +1002,15 @@ func rollupDistinct(rfa *rollupFuncArg) float64 {
 func rollupIntegrate(rfa *rollupFuncArg) float64 {
 	prevTimestamp := rfa.prevTimestamp

-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	timestamps := rfa.timestamps
 	if len(values) == 0 {
-		return nan
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return 0
 	}
 	prevValue := rfa.prevValue
 	if math.IsNaN(prevValue) {
@@ -699,7 +1020,7 @@ func rollupIntegrate(rfa *rollupFuncArg) float64 {
 		timestamps = timestamps[1:]
 	}
 	if len(values) == 0 {
-		return nan
+		return 0
 	}

 	var sum float64
@@ -707,6 +1028,8 @@ func rollupIntegrate(rfa *rollupFuncArg) float64 {
 		timestamp := timestamps[i]
 		dt := float64(timestamp-prevTimestamp) * 1e-3
 		sum += 0.5 * (v + prevValue) * dt
+		prevTimestamp = timestamp
+		prevValue = v
 	}
 	return sum
 }
--- a/app/vmselect/promql/rollup_result_cache.go
+++ b/app/vmselect/promql/rollup_result_cache.go
@@ -2,21 +2,26 @@ package promql

 import (
 	"crypto/rand"
+	"flag"
 	"fmt"
-	"runtime"
 	"sync"
 	"sync/atomic"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
 	"github.com/VictoriaMetrics/fastcache"
 	"github.com/VictoriaMetrics/metrics"
 )

+var disableCache = flag.Bool("search.disableCache", false, "Whether to disable response caching. This may be useful during data backfilling")
+
 var rollupResultCacheV = &rollupResultCache{
-	fastcache.New(1024 * 1024), // This is a cache for testing.
+	c: workingsetcache.New(1024*1024, time.Hour), // This is a cache for testing.
 }
 var rollupResultCachePath string

@@ -37,16 +42,23 @@ var (
 )

 // InitRollupResultCache initializes the rollupResult cache
+//
+// if cachePath is empty, then the cache isn't stored to persistent disk.
 func InitRollupResultCache(cachePath string) {
 	rollupResultCachePath = cachePath
 	startTime := time.Now()
-	var c *fastcache.Cache
+	cacheSize := getRollupResultCacheSize()
+	var c *workingsetcache.Cache
 	if len(rollupResultCachePath) > 0 {
 		logger.Infof("loading rollupResult cache from %q...", rollupResultCachePath)
-		c = fastcache.LoadFromFileOrNew(rollupResultCachePath, getRollupResultCacheSize())
+		c = workingsetcache.Load(rollupResultCachePath, cacheSize, time.Hour)
 	} else {
-		c = fastcache.New(getRollupResultCacheSize())
+		c = workingsetcache.New(cacheSize, time.Hour)
 	}
+	if *disableCache {
+		c.Reset()
+	}
+
 	stats := &fastcache.Stats{}
 	var statsLock sync.Mutex
 	var statsLastUpdate time.Time
@@ -64,7 +76,7 @@ func InitRollupResultCache(cachePath string) {
 		return stats
 	}
 	if len(rollupResultCachePath) > 0 {
-		logger.Infof("loaded rollupResult cache from %q in %s; entriesCount: %d, bytesSize: %d",
+		logger.Infof("loaded rollupResult cache from %q in %s; entriesCount: %d, sizeBytes: %d",
 			rollupResultCachePath, time.Since(startTime), fcs().EntriesCount, fcs().BytesSize)
 	}

@@ -89,25 +101,28 @@ func InitRollupResultCache(cachePath string) {
 // StopRollupResultCache closes the rollupResult cache.
 func StopRollupResultCache() {
 	if len(rollupResultCachePath) == 0 {
-		rollupResultCacheV.c.Reset()
+		rollupResultCacheV.c.Stop()
+		rollupResultCacheV.c = nil
 		return
 	}
-	gomaxprocs := runtime.GOMAXPROCS(-1)
 	logger.Infof("saving rollupResult cache to %q...", rollupResultCachePath)
 	startTime := time.Now()
-	if err := rollupResultCacheV.c.SaveToFileConcurrent(rollupResultCachePath, gomaxprocs); err != nil {
+	if err := rollupResultCacheV.c.Save(rollupResultCachePath); err != nil {
 		logger.Errorf("cannot close rollupResult cache at %q: %s", rollupResultCachePath, err)
-	} else {
-		var fcs fastcache.Stats
-		rollupResultCacheV.c.UpdateStats(&fcs)
-		rollupResultCacheV.c.Reset()
-		logger.Infof("saved rollupResult cache to %q in %s; entriesCount: %d, bytesSize: %d",
-			rollupResultCachePath, time.Since(startTime), fcs.EntriesCount, fcs.BytesSize)
+		return
 	}
+	var fcs fastcache.Stats
+	rollupResultCacheV.c.UpdateStats(&fcs)
+	rollupResultCacheV.c.Stop()
+	rollupResultCacheV.c = nil
+	logger.Infof("saved rollupResult cache to %q in %s; entriesCount: %d, sizeBytes: %d",
+		rollupResultCachePath, time.Since(startTime), fcs.EntriesCount, fcs.BytesSize)
 }

+// TODO: convert this cache to distributed cache shared among vmselect
+// instances in the cluster.
 type rollupResultCache struct {
-	c *fastcache.Cache
+	c *workingsetcache.Cache
 }

 var rollupResultCacheResets = metrics.NewCounter(`vm_cache_resets_total{type="promql/rollupResult"}`)
@@ -118,8 +133,8 @@ func ResetRollupResultCache() {
 	rollupResultCacheV.c.Reset()
 }

-func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExpr, window int64) (tss []*timeseries, newStart int64) {
-	if !ec.mayCache() {
+func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExpr, iafc *incrementalAggrFuncContext, window int64) (tss []*timeseries, newStart int64) {
+	if *disableCache || !ec.mayCache() {
 		return nil, ec.Start
 	}

@@ -127,7 +142,7 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
 	bb := bbPool.Get()
 	defer bbPool.Put(bb)

-	bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, window, ec.Step)
+	bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, ec.AuthToken, me, iafc, window, ec.Step)
 	metainfoBuf := rrc.c.Get(nil, bb.B)
 	if len(metainfoBuf) == 0 {
 		return nil, ec.Start
@@ -141,15 +156,23 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
 		return nil, ec.Start
 	}
 	bb.B = key.Marshal(bb.B[:0])
-	resultBuf := rrc.c.GetBig(nil, bb.B)
-	if len(resultBuf) == 0 {
+	compressedResultBuf := resultBufPool.Get()
+	defer resultBufPool.Put(compressedResultBuf)
+	compressedResultBuf.B = rrc.c.GetBig(compressedResultBuf.B[:0], bb.B)
+	if len(compressedResultBuf.B) == 0 {
 		mi.RemoveKey(key)
 		metainfoBuf = mi.Marshal(metainfoBuf[:0])
-		bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, window, ec.Step)
+		bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, ec.AuthToken, me, iafc, window, ec.Step)
 		rrc.c.Set(bb.B, metainfoBuf)
 		return nil, ec.Start
 	}
-	tss, err := unmarshalTimeseriesFast(resultBuf)
+	// Decompress into newly allocated byte slice, since tss returned from unmarshalTimeseriesFast
+	// refers to the byte slice, so it cannot be returned to the resultBufPool.
+	resultBuf, err := encoding.DecompressZSTD(nil, compressedResultBuf.B)
+	if err != nil {
+		logger.Panicf("BUG: cannot decompress resultBuf from rollupResultCache: %s; it looks like it was improperly saved", err)
+	}
+	tss, err = unmarshalTimeseriesFast(resultBuf)
 	if err != nil {
 		logger.Panicf("BUG: cannot unmarshal timeseries from rollupResultCache: %s; it looks like it was improperly saved", err)
 	}
@@ -189,8 +212,10 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
 	return tss, newStart
 }

-func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExpr, window int64, tss []*timeseries) {
-	if len(tss) == 0 || !ec.mayCache() {
+var resultBufPool bytesutil.ByteBufferPool
+
+func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExpr, iafc *incrementalAggrFuncContext, window int64, tss []*timeseries) {
+	if *disableCache || len(tss) == 0 || !ec.mayCache() {
 		return
 	}

@@ -220,11 +245,16 @@ func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExp

 	// Store tss in the cache.
 	maxMarshaledSize := getRollupResultCacheSize() / 4
-	tssMarshaled := marshalTimeseriesFast(tss, maxMarshaledSize, ec.Step)
-	if tssMarshaled == nil {
+	resultBuf := resultBufPool.Get()
+	defer resultBufPool.Put(resultBuf)
+	resultBuf.B = marshalTimeseriesFast(resultBuf.B[:0], tss, maxMarshaledSize, ec.Step)
+	if len(resultBuf.B) == 0 {
 		tooBigRollupResults.Inc()
 		return
 	}
+	compressedResultBuf := resultBufPool.Get()
+	defer resultBufPool.Put(compressedResultBuf)
+	compressedResultBuf.B = encoding.CompressZSTDLevel(compressedResultBuf.B[:0], resultBuf.B, 1)

 	bb := bbPool.Get()
 	defer bbPool.Put(bb)
@@ -233,9 +263,9 @@ func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExp
 	key.prefix = rollupResultCacheKeyPrefix
 	key.suffix = atomic.AddUint64(&rollupResultCacheKeySuffix, 1)
 	bb.B = key.Marshal(bb.B[:0])
-	rrc.c.SetBig(bb.B, tssMarshaled)
+	rrc.c.SetBig(bb.B, compressedResultBuf.B)

-	bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, window, ec.Step)
+	bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, ec.AuthToken, me, iafc, window, ec.Step)
 	metainfoBuf := rrc.c.Get(nil, bb.B)
 	var mi rollupResultCacheMetainfo
 	if len(metainfoBuf) > 0 {
@@ -263,10 +293,18 @@ var (
 var tooBigRollupResults = metrics.NewCounter("vm_too_big_rollup_results_total")

 // Increment this value every time the format of the cache changes.
-const rollupResultCacheVersion = 4
+const rollupResultCacheVersion = 6

-func marshalRollupResultCacheKey(dst []byte, funcName string, me *metricExpr, window, step int64) []byte {
+func marshalRollupResultCacheKey(dst []byte, funcName string, at *auth.Token, me *metricExpr, iafc *incrementalAggrFuncContext, window, step int64) []byte {
 	dst = append(dst, rollupResultCacheVersion)
+	if iafc == nil {
+		dst = append(dst, 0)
+	} else {
+		dst = append(dst, 1)
+		dst = iafc.ae.AppendString(dst)
+	}
+	dst = encoding.MarshalUint32(dst, at.AccountID)
+	dst = encoding.MarshalUint32(dst, at.ProjectID)
 	dst = encoding.MarshalUint64(dst, uint64(len(funcName)))
 	dst = append(dst, funcName...)
 	dst = encoding.MarshalInt64(dst, window)
--- a/app/vmselect/promql/rollup_result_cache_test.go
+++ b/app/vmselect/promql/rollup_result_cache_test.go
@@ -3,6 +3,7 @@ package promql
 import (
 	"testing"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 )

@@ -15,6 +16,11 @@ func TestRollupResultCache(t *testing.T) {
 		End:   2000,
 		Step:  200,

+		AuthToken: &auth.Token{
+			AccountID: 333,
+			ProjectID: 843,
+		},
+
 		MayCache: true,
 	}
 	me := &metricExpr{
@@ -23,10 +29,15 @@ func TestRollupResultCache(t *testing.T) {
 			Value: []byte("xxx"),
 		}},
 	}
+	iafc := &incrementalAggrFuncContext{
+		ae: &aggrFuncExpr{
+			Name: "foobar",
+		},
+	}

 	// Try obtaining an empty value.
 	t.Run("empty", func(t *testing.T) {
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != ec.Start {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, ec.Start)
 		}
@@ -36,21 +47,42 @@ func TestRollupResultCache(t *testing.T) {
 	})

 	// Store timeseries overlapping with start
-	t.Run("start-overlap", func(t *testing.T) {
+	t.Run("start-overlap-no-iafc", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{800, 1000, 1200},
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 1400 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
 		}
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
+				Timestamps: []int64{1000, 1200},
+				Values:     []float64{1, 2},
+			},
+		}
+		testTimeseriesEqual(t, tss, tssExpected)
+	})
+	t.Run("start-overlap-with-iafc", func(t *testing.T) {
+		ResetRollupResultCache()
+		tss := []*timeseries{
+			{
+				Timestamps: []int64{800, 1000, 1200},
+				Values:     []float64{0, 1, 2},
+			},
+		}
+		rollupResultCacheV.Put(funcName, ec, me, iafc, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, iafc, window)
+		if newStart != 1400 {
+			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
+		}
+		tssExpected := []*timeseries{
+			{
 				Timestamps: []int64{1000, 1200},
 				Values:     []float64{1, 2},
 			},
@@ -62,13 +94,13 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("end-overlap", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1800, 2000, 2200, 2400},
 				Values:     []float64{333, 0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 1000 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
 		}
@@ -81,13 +113,13 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("full-cover", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1200, 1400, 1600},
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 1000 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
 		}
@@ -100,13 +132,13 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("before-start", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{200, 400, 600},
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 1000 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
 		}
@@ -119,13 +151,13 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("after-end", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{2200, 2400, 2600},
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 1000 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
 		}
@@ -138,18 +170,18 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("bigger-than-start-end", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{800, 1000, 1200, 1400, 1600, 1800, 2000, 2200},
 				Values:     []float64{0, 1, 2, 3, 4, 5, 6, 7},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 2200 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
 		}
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{1, 2, 3, 4, 5, 6},
 			},
@@ -161,18 +193,18 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("start-end-match", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{1, 2, 3, 4, 5, 6},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 2200 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
 		}
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{1, 2, 3, 4, 5, 6},
 			},
@@ -191,8 +223,8 @@ func TestRollupResultCache(t *testing.T) {
 			}
 			tss = append(tss, ts)
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tssResult, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tssResult, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 2200 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
 		}
@@ -203,32 +235,32 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("multi-timeseries", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss1 := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{800, 1000, 1200},
 				Values:     []float64{0, 1, 2},
 			},
 		}
 		tss2 := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1800, 2000, 2200, 2400},
 				Values:     []float64{333, 0, 1, 2},
 			},
 		}
 		tss3 := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1200, 1400, 1600},
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss1)
-		rollupResultCacheV.Put(funcName, ec, me, window, tss2)
-		rollupResultCacheV.Put(funcName, ec, me, window, tss3)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss1)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss2)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss3)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 1400 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
 		}
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200},
 				Values:     []float64{1, 2},
 			},
@@ -249,14 +281,14 @@ func TestMergeTimeseries(t *testing.T) {
 	t.Run("bStart=ec.Start", func(t *testing.T) {
 		a := []*timeseries{}
 		b := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{1, 2, 3, 4, 5, 6},
 			},
 		}
 		tss := mergeTimeseries(a, b, 1000, ec)
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{1, 2, 3, 4, 5, 6},
 			},
@@ -266,14 +298,14 @@ func TestMergeTimeseries(t *testing.T) {
 	t.Run("a-empty", func(t *testing.T) {
 		a := []*timeseries{}
 		b := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1400, 1600, 1800, 2000},
 				Values:     []float64{3, 4, 5, 6},
 			},
 		}
 		tss := mergeTimeseries(a, b, bStart, ec)
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{nan, nan, 3, 4, 5, 6},
 			},
@@ -282,7 +314,7 @@ func TestMergeTimeseries(t *testing.T) {
 	})
 	t.Run("b-empty", func(t *testing.T) {
 		a := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200},
 				Values:     []float64{2, 1},
 			},
@@ -290,7 +322,7 @@ func TestMergeTimeseries(t *testing.T) {
 		b := []*timeseries{}
 		tss := mergeTimeseries(a, b, bStart, ec)
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{2, 1, nan, nan, nan, nan},
 			},
@@ -299,20 +331,20 @@ func TestMergeTimeseries(t *testing.T) {
 	})
 	t.Run("non-empty", func(t *testing.T) {
 		a := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200},
 				Values:     []float64{2, 1},
 			},
 		}
 		b := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1400, 1600, 1800, 2000},
 				Values:     []float64{3, 4, 5, 6},
 			},
 		}
 		tss := mergeTimeseries(a, b, bStart, ec)
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{2, 1, 3, 4, 5, 6},
 			},
@@ -321,14 +353,14 @@ func TestMergeTimeseries(t *testing.T) {
 	})
 	t.Run("non-empty-distinct-metric-names", func(t *testing.T) {
 		a := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200},
 				Values:     []float64{2, 1},
 			},
 		}
 		a[0].MetricName.MetricGroup = []byte("bar")
 		b := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1400, 1600, 1800, 2000},
 				Values:     []float64{3, 4, 5, 6},
 			},
@@ -336,14 +368,14 @@ func TestMergeTimeseries(t *testing.T) {
 		b[0].MetricName.MetricGroup = []byte("foo")
 		tss := mergeTimeseries(a, b, bStart, ec)
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				MetricName: storage.MetricName{
 					MetricGroup: []byte("foo"),
 				},
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{nan, nan, 3, 4, 5, 6},
 			},
-			&timeseries{
+			{
 				MetricName: storage.MetricName{
 					MetricGroup: []byte("bar"),
 				},
@@ -362,7 +394,7 @@ func testTimeseriesEqual(t *testing.T, tss, tssExpected []*timeseries) {
 	}
 	for i, ts := range tss {
 		tsExpected := tssExpected[i]
-		testMetricNamesEqual(t, &ts.MetricName, &tsExpected.MetricName)
+		testMetricNamesEqual(t, &ts.MetricName, &tsExpected.MetricName, i)
 		testRowsEqual(t, ts.Values, ts.Timestamps, tsExpected.Values, tsExpected.Timestamps)
 	}
 }
--- a/app/vmselect/promql/rollup_test.go
+++ b/app/vmselect/promql/rollup_test.go
@@ -10,6 +10,79 @@ var (
 	testTimestamps = []int64{5, 15, 24, 36, 49, 60, 78, 80, 97, 115, 120, 130}
 )

+func TestRollupIderivDuplicateTimestamps(t *testing.T) {
+	rfa := &rollupFuncArg{
+		values:     []float64{1, 2, 3, 4, 5},
+		timestamps: []int64{100, 100, 200, 300, 300},
+	}
+	n := rollupIderiv(rfa)
+	if n != 20 {
+		t.Fatalf("unexpected value; got %v; want %v", n, 20)
+	}
+
+	rfa = &rollupFuncArg{
+		values:     []float64{1, 2, 3, 4, 5},
+		timestamps: []int64{100, 100, 300, 300, 300},
+	}
+	n = rollupIderiv(rfa)
+	if n != 15 {
+		t.Fatalf("unexpected value; got %v; want %v", n, 15)
+	}
+
+	rfa = &rollupFuncArg{
+		prevValue:  nan,
+		values:     []float64{},
+		timestamps: []int64{},
+	}
+	n = rollupIderiv(rfa)
+	if !math.IsNaN(n) {
+		t.Fatalf("unexpected value; got %v; want %v", n, nan)
+	}
+
+	rfa = &rollupFuncArg{
+		prevValue:  nan,
+		values:     []float64{15},
+		timestamps: []int64{100},
+	}
+	n = rollupIderiv(rfa)
+	if !math.IsNaN(n) {
+		t.Fatalf("unexpected value; got %v; want %v", n, nan)
+	}
+
+	rfa = &rollupFuncArg{
+		prevTimestamp: 90,
+		prevValue:     10,
+		values:        []float64{15},
+		timestamps:    []int64{100},
+	}
+	n = rollupIderiv(rfa)
+	if n != 500 {
+		t.Fatalf("unexpected value; got %v; want %v", n, 0.5)
+	}
+
+	rfa = &rollupFuncArg{
+		prevTimestamp: 100,
+		prevValue:     10,
+		values:        []float64{15},
+		timestamps:    []int64{100},
+	}
+	n = rollupIderiv(rfa)
+	if n != inf {
+		t.Fatalf("unexpected value; got %v; want %v", n, inf)
+	}
+
+	rfa = &rollupFuncArg{
+		prevTimestamp: 100,
+		prevValue:     10,
+		values:        []float64{15, 20},
+		timestamps:    []int64{100, 100},
+	}
+	n = rollupIderiv(rfa)
+	if n != inf {
+		t.Fatalf("unexpected value; got %v; want %v", n, inf)
+	}
+}
+
 func TestRemoveCounterResets(t *testing.T) {
 	removeCounterResets(nil)

@@ -38,19 +111,19 @@ func TestDeltaValues(t *testing.T) {

 	values := []float64{123}
 	deltaValues(values)
-	valuesExpected := []float64{nan}
+	valuesExpected := []float64{0}
 	testRowsEqual(t, values, testTimestamps[:1], valuesExpected, testTimestamps[:1])

 	values = append([]float64{}, testValues...)
 	deltaValues(values)
-	valuesExpected = []float64{-89, 10, -23, 33, -20, 65, -87, 32, -12, 2, 0, nan}
+	valuesExpected = []float64{-89, 10, -23, 33, -20, 65, -87, 32, -12, 2, 0, 0}
 	testRowsEqual(t, values, testTimestamps, valuesExpected, testTimestamps)

 	// remove counter resets
 	values = append([]float64{}, testValues...)
 	removeCounterResets(values)
 	deltaValues(values)
-	valuesExpected = []float64{34, 10, 21, 33, 34, 65, 12, 32, 32, 2, 0, nan}
+	valuesExpected = []float64{34, 10, 21, 33, 34, 65, 12, 32, 32, 2, 0, 0}
 	testRowsEqual(t, values, testTimestamps, valuesExpected, testTimestamps)
 }

@@ -59,13 +132,13 @@ func TestDerivValues(t *testing.T) {

 	values := []float64{123}
 	derivValues(values, testTimestamps[:1])
-	valuesExpected := []float64{nan}
+	valuesExpected := []float64{0}
 	testRowsEqual(t, values, testTimestamps[:1], valuesExpected, testTimestamps[:1])

 	values = append([]float64{}, testValues...)
 	derivValues(values, testTimestamps)
 	valuesExpected = []float64{-8900, 1111.111111111111, -1916.6666666666665, 2538.461538461538, -1818.1818181818182, 3611.111111111111,
-		-43500, 1882.3529411764705, -666.6666666666666, 400, 0, nan}
+		-43500, 1882.3529411764705, -666.6666666666666, 400, 0, 0}
 	testRowsEqual(t, values, testTimestamps, valuesExpected, testTimestamps)

 	// remove counter resets
@@ -73,8 +146,15 @@ func TestDerivValues(t *testing.T) {
 	removeCounterResets(values)
 	derivValues(values, testTimestamps)
 	valuesExpected = []float64{3400, 1111.111111111111, 1750, 2538.461538461538, 3090.909090909091, 3611.111111111111,
-		6000, 1882.3529411764705, 1777.7777777777776, 400, 0, nan}
+		6000, 1882.3529411764705, 1777.7777777777776, 400, 0, 0}
 	testRowsEqual(t, values, testTimestamps, valuesExpected, testTimestamps)
+
+	// duplicate timestamps
+	values = []float64{1, 2, 3, 4, 5, 6, 7}
+	timestamps := []int64{100, 100, 200, 200, 300, 400, 400}
+	derivValues(values, timestamps)
+	valuesExpected = []float64{0, 20, 20, 20, 10, 10, 10}
+	testRowsEqual(t, values, timestamps, valuesExpected, timestamps)
 }

 func testRollupFunc(t *testing.T, funcName string, args []interface{}, meExpected *metricExpr, vExpected float64) {
@@ -102,7 +182,8 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, meExpecte
 				t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
 			}
 		} else {
-			if v != vExpected {
+			eps := math.Abs(v - vExpected)
+			if eps > 1e-14 {
 				t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
 			}
 		}
@@ -143,10 +224,10 @@ func TestRollupPredictLinear(t *testing.T) {
 		testRollupFunc(t, "predict_linear", args, &me, vExpected)
 	}

-	f(0e-3, 63.739757761102624)
-	f(50e-3, 50.39682764539959)
-	f(100e-3, 37.053897529696556)
-	f(200e-3, 10.368037298290488)
+	f(0e-3, 30.382432471845043)
+	f(50e-3, 17.03950235614201)
+	f(100e-3, 3.696572240438975)
+	f(200e-3, -22.989287990967092)
 }

 func TestRollupHoltWinters(t *testing.T) {
@@ -189,10 +270,11 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
 		testRollupFunc(t, funcName, args, &me, vExpected)
 	}

-	f("default_rollup", 123)
-	f("changes", 10)
+	f("default_rollup", 34)
+	f("changes", 11)
 	f("delta", -89)
-	f("deriv", -712)
+	f("deriv", -266.85860231406065)
+	f("deriv_fast", -712)
 	f("idelta", 0)
 	f("increase", 275)
 	f("irate", 0)
@@ -202,12 +284,18 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
 	f("min_over_time", 12)
 	f("max_over_time", 123)
 	f("sum_over_time", 565)
+	f("sum2_over_time", 37951)
+	f("geomean_over_time", 39.33466603189148)
 	f("count_over_time", 12)
 	f("stddev_over_time", 30.752935722554287)
 	f("stdvar_over_time", 945.7430555555555)
 	f("first_over_time", 123)
 	f("last_over_time", 34)
-	f("integrate", 61.0275)
+	f("integrate", 5.4705)
+	f("distinct_over_time", 8)
+	f("ideriv", 0)
+	f("decreases_over_time", 5)
+	f("increases_over_time", 5)
 }

 func TestRollupNewRollupFuncError(t *testing.T) {
@@ -259,7 +347,7 @@ func TestRollupNoWindowNoPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{nan, nan, nan, nan, 123}
+		valuesExpected := []float64{nan, nan, nan, nan, nan}
 		timestampsExpected := []int64{0, 1, 2, 3, 4}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -267,14 +355,14 @@ func TestRollupNoWindowNoPoints(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupDelta,
 			Start:  120,
-			End:    144,
+			End:    148,
 			Step:   4,
 			Window: 0,
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{2, 2, 0, 0, 0, nan, nan}
-		timestampsExpected := []int64{120, 124, 128, 132, 136, 140, 144}
+		valuesExpected := []float64{2, 0, 0, 0, nan, nan, nan, nan}
+		timestampsExpected := []int64{120, 124, 128, 132, 136, 140, 144, 148}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
 }
@@ -290,22 +378,22 @@ func TestRollupWindowNoPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{nan, nan, nan, nan, 123}
+		valuesExpected := []float64{nan, nan, nan, nan, nan}
 		timestampsExpected := []int64{0, 1, 2, 3, 4}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
 	t.Run("afterEnd", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupFirst,
-			Start:  141,
-			End:    171,
+			Start:  161,
+			End:    191,
 			Step:   10,
 			Window: 3,
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{34, nan, nan, nan}
-		timestampsExpected := []int64{141, 151, 161, 171}
+		valuesExpected := []float64{nan, nan, nan, nan}
+		timestampsExpected := []int64{161, 171, 181, 191}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
 }
@@ -315,14 +403,14 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupFirst,
 			Start:  0,
-			End:    20,
+			End:    25,
 			Step:   5,
 			Window: 0,
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{123, 123, 123, 123, 123}
-		timestampsExpected := []int64{0, 5, 10, 15, 20}
+		valuesExpected := []float64{nan, 123, 123, 123, 34, 34}
+		timestampsExpected := []int64{0, 5, 10, 15, 20, 25}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
 	t.Run("afterEnd", func(t *testing.T) {
@@ -335,7 +423,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{44, 34, 34, nan}
+		valuesExpected := []float64{12, 44, 34, nan}
 		timestampsExpected := []int64{100, 120, 140, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -349,7 +437,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{nan, 123, 54, 44, nan}
+		valuesExpected := []float64{nan, nan, 123, 54, 44}
 		timestampsExpected := []int64{-50, 0, 50, 100, 150}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -366,7 +454,7 @@ func TestRollupWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{123, 123, 34, 34, 44}
+		valuesExpected := []float64{nan, 123, 123, 34, 34}
 		timestampsExpected := []int64{0, 5, 10, 15, 20}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -380,7 +468,7 @@ func TestRollupWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{34, 34, nan, nan}
+		valuesExpected := []float64{44, 34, 34, nan}
 		timestampsExpected := []int64{100, 120, 140, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -394,12 +482,57 @@ func TestRollupWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{54, 44, nan, nan}
+		valuesExpected := []float64{nan, 54, 44, nan}
 		timestampsExpected := []int64{0, 50, 100, 150}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
 }

+func TestRollupFuncsLookbackDelta(t *testing.T) {
+	t.Run("1", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:          rollupFirst,
+			Start:         80,
+			End:           140,
+			Step:          10,
+			LookbackDelta: 1,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{99, 12, 44, nan, 32, 34, nan}
+		timestampsExpected := []int64{80, 90, 100, 110, 120, 130, 140}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("7", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:          rollupFirst,
+			Start:         80,
+			End:           140,
+			Step:          10,
+			LookbackDelta: 7,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{99, 12, 44, 44, 32, 34, nan}
+		timestampsExpected := []int64{80, 90, 100, 110, 120, 130, 140}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("0", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:          rollupFirst,
+			Start:         80,
+			End:           140,
+			Step:          10,
+			LookbackDelta: 0,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{34, 12, 12, 44, 44, 34, nan}
+		timestampsExpected := []int64{80, 90, 100, 110, 120, 130, 140}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+}
+
 func TestRollupFuncsNoWindow(t *testing.T) {
 	t.Run("first", func(t *testing.T) {
 		rc := rollupConfig{
@@ -411,7 +544,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{123, 21, 12, 34, nan}
+		valuesExpected := []float64{nan, 123, 21, 12, 34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -425,7 +558,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{4, 4, 3, 1, nan}
+		valuesExpected := []float64{nan, 4, 4, 3, 1}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -439,7 +572,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{21, 12, 32, 34, nan}
+		valuesExpected := []float64{nan, 21, 12, 12, 34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -453,7 +586,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{123, 99, 44, 34, nan}
+		valuesExpected := []float64{nan, 123, 99, 44, 34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -467,7 +600,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{222, 199, 110, 34, nan}
+		valuesExpected := []float64{nan, 222, 199, 110, 34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -481,7 +614,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{-102, -9, 22, 0, nan}
+		valuesExpected := []float64{nan, -102, -9, 22, 0}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -495,10 +628,80 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{33, -87, 0, nan}
+		valuesExpected := []float64{123, 33, -87, 0}
 		timestampsExpected := []int64{10, 50, 90, 130}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
+	t.Run("lag", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupLag,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 0,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.004, 0, 0, 0.03}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("lifetime_1", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupLifetime,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 0,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.031, 0.044, 0.04, 0.01}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("lifetime_2", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupLifetime,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 200,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.031, 0.075, 0.115, 0.125}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("scrape_interval_1", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupScrapeInterval,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 0,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.010333333333333333, 0.011, 0.013333333333333334, 0.01}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("scrape_interval_2", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupScrapeInterval,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 80,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.010333333333333333, 0.010714285714285714, 0.012, 0.0125}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
 	t.Run("changes", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupChanges,
@@ -509,10 +712,24 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{3, 4, 3, 0, nan}
+		valuesExpected := []float64{nan, 4, 4, 3, 0}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
+	t.Run("changes_small_window", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupChanges,
+			Start:  0,
+			End:    45,
+			Step:   9,
+			Window: 9,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 1, 1, 1, 1, 0}
+		timestampsExpected := []int64{0, 9, 18, 27, 36, 45}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
 	t.Run("resets", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupResets,
@@ -523,7 +740,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{2, 2, 1, 0, nan}
+		valuesExpected := []float64{nan, 2, 2, 1, 0}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -537,13 +754,13 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{55.5, 49.75, 36.666666666666664, 34, nan}
+		valuesExpected := []float64{nan, 55.5, 49.75, 36.666666666666664, 34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
 	t.Run("deriv", func(t *testing.T) {
 		rc := rollupConfig{
-			Func:   rollupDeriv,
+			Func:   rollupDerivSlow,
 			Start:  0,
 			End:    160,
 			Step:   40,
@@ -551,7 +768,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{-3290.3225806451615, -204.54545454545456, 550, 0, nan}
+		valuesExpected := []float64{0, -2879.310344827587, 558.0608793686592, 422.84569138276544, 0}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -565,7 +782,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{-1916.6666666666665, -43500, 400, 0, nan}
+		valuesExpected := []float64{nan, -1916.6666666666665, -43500, 400, 0}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -579,7 +796,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{39.81519810323691, 32.080952292598795, 5.2493385826745405, 0, nan}
+		valuesExpected := []float64{nan, 39.81519810323691, 32.080952292598795, 5.2493385826745405, 5.830951894845301}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -593,11 +810,11 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{4.6035, 4.3934999999999995, 2.166, 0.34, nan}
+		valuesExpected := []float64{nan, 1.526, 2.2795, 1.325, 0.34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
-	t.Run("distinct", func(t *testing.T) {
+	t.Run("distinct_over_time_1", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupDistinct,
 			Start:  0,
@@ -607,10 +824,45 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{4, 4, 3, 1, nan}
+		valuesExpected := []float64{nan, 4, 4, 3, 1}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
+	t.Run("distinct_over_time_2", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupDistinct,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 80,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 4, 7, 6, 3}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+}
+
+func TestRollupBigNumberOfValues(t *testing.T) {
+	const srcValuesCount = 1e4
+	rc := rollupConfig{
+		Func:   rollupDefault,
+		End:    srcValuesCount,
+		Step:   srcValuesCount / 5,
+		Window: srcValuesCount / 4,
+	}
+	rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+	srcValues := make([]float64, srcValuesCount)
+	srcTimestamps := make([]int64, srcValuesCount)
+	for i := 0; i < srcValuesCount; i++ {
+		srcValues[i] = float64(i)
+		srcTimestamps[i] = int64(i / 2)
+	}
+	values := rc.Do(nil, srcValues, srcTimestamps)
+	valuesExpected := []float64{1, 4001, 8001, 9999, nan, nan}
+	timestampsExpected := []int64{0, 2000, 4000, 6000, 8000, 10000}
+	testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 }

 func testRowsEqual(t *testing.T, values []float64, timestamps []int64, valuesExpected []float64, timestampsExpected []int64) {
@@ -641,7 +893,7 @@ func testRowsEqual(t *testing.T, values []float64, timestamps []int64, valuesExp
 			}
 			continue
 		}
-		if v != vExpected {
+		if math.Abs(v-vExpected) > 1e-15 {
 			t.Fatalf("unexpected value at values[%d]; got %f; want %f\nvalues=\n%v\nvaluesExpected=\n%v",
 				i, v, vExpected, values, valuesExpected)
 		}
--- a/app/vmselect/promql/timeseries.go
+++ b/app/vmselect/promql/timeseries.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"sort"
 	"strconv"
+	"sync"
 	"unsafe"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -38,11 +39,13 @@ func (ts *timeseries) String() string {
 	return fmt.Sprintf("MetricName=%s, Values=%g, Timestamps=%d", &ts.MetricName, ts.Values, ts.Timestamps)
 }

-func (ts *timeseries) CopyFrom(src *timeseries) {
+func (ts *timeseries) CopyFromShallowTimestamps(src *timeseries) {
 	ts.Reset()
 	ts.MetricName.CopyFrom(&src.MetricName)
 	ts.Values = append(ts.Values[:0], src.Values...)
-	ts.Timestamps = append(ts.Timestamps[:0], src.Timestamps...)
+	ts.Timestamps = src.Timestamps
+
+	ts.denyReuse = true
 }

 func (ts *timeseries) CopyFromMetricNames(src *timeseries) {
@@ -59,7 +62,21 @@ func (ts *timeseries) CopyShallow(src *timeseries) {
 	ts.denyReuse = true
 }

-func marshalTimeseriesFast(tss []*timeseries, maxSize int, step int64) []byte {
+func getTimeseries() *timeseries {
+	if v := timeseriesPool.Get(); v != nil {
+		return v.(*timeseries)
+	}
+	return &timeseries{}
+}
+
+func putTimeseries(ts *timeseries) {
+	ts.Reset()
+	timeseriesPool.Put(ts)
+}
+
+var timeseriesPool sync.Pool
+
+func marshalTimeseriesFast(dst []byte, tss []*timeseries, maxSize int, step int64) []byte {
 	if len(tss) == 0 {
 		logger.Panicf("BUG: tss cannot be empty")
 	}
@@ -75,13 +92,13 @@ func marshalTimeseriesFast(tss []*timeseries, maxSize int, step int64) []byte {

 	if size > maxSize {
 		// Do not marshal tss, since it would occupy too much space
-		return nil
+		return dst
 	}

 	// Allocate the buffer for the marshaled tss before its' marshaling.
 	// This should reduce memory fragmentation and memory usage.
-	dst := make([]byte, 0, size)
-	dst = marshalFastTimestamps(dst, tss[0].Timestamps)
+	dst = bytesutil.Resize(dst, size)
+	dst = marshalFastTimestamps(dst[:0], tss[0].Timestamps)
 	for _, ts := range tss {
 		dst = ts.marshalFastNoTimestamps(dst)
 	}
--- a/app/vmselect/promql/timeseries_test.go
+++ b/app/vmselect/promql/timeseries_test.go
@@ -74,7 +74,7 @@ func TestTimeseriesMarshalUnmarshalFast(t *testing.T) {

 			tssOrig = append(tssOrig, &ts)
 		}
-		buf := marshalTimeseriesFast(tssOrig, 1e6, 123)
+		buf := marshalTimeseriesFast(nil, tssOrig, 1e6, 123)
 		tssGot, err := unmarshalTimeseriesFast(buf)
 		if err != nil {
 			t.Fatalf("error in unmarshalTimeseriesFast: %s", err)
--- a/app/vmselect/promql/transform.go
+++ b/app/vmselect/promql/transform.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"math"
 	"math/rand"
+	"regexp"
 	"sort"
 	"strconv"
 	"strings"
@@ -61,6 +62,8 @@ var transformFuncs = map[string]transformFunc{
 	"label_keep":         transformLabelKeep,
 	"label_copy":         transformLabelCopy,
 	"label_move":         transformLabelMove,
+	"label_transform":    transformLabelTransform,
+	"label_value":        transformLabelValue,
 	"union":              transformUnion,
 	"":                   transformUnion, // empty func is a synonim to union
 	"keep_last_value":    transformKeepLastValue,
@@ -88,6 +91,7 @@ var transformFuncs = map[string]transformFunc{
 	"cos":                newTransformFuncOneArg(transformCos),
 	"asin":               newTransformFuncOneArg(transformAsin),
 	"acos":               newTransformFuncOneArg(transformAcos),
+	"prometheus_buckets": transformPrometheusBuckets,
 }

 func getTransformFunc(s string) transformFunc {
@@ -123,7 +127,8 @@ func newTransformFuncOneArg(tf func(v float64) float64) transformFunc {
 }

 func doTransformValues(arg []*timeseries, tf func(values []float64), fe *funcExpr) ([]*timeseries, error) {
-	keepMetricGroup := transformFuncsKeepMetricGroup[fe.Name]
+	name := strings.ToLower(fe.Name)
+	keepMetricGroup := transformFuncsKeepMetricGroup[name]
 	for _, ts := range arg {
 		if !keepMetricGroup {
 			ts.MetricName.ResetMetricGroup()
@@ -268,6 +273,131 @@ func transformFloor(v float64) float64 {
 	return math.Floor(v)
 }

+func transformPrometheusBuckets(tfa *transformFuncArg) ([]*timeseries, error) {
+	args := tfa.args
+	if err := expectTransformArgsNum(args, 1); err != nil {
+		return nil, err
+	}
+	rvs := vmrangeBucketsToLE(args[0])
+	return rvs, nil
+}
+
+func vmrangeBucketsToLE(tss []*timeseries) []*timeseries {
+	rvs := make([]*timeseries, 0, len(tss))
+
+	// Group timeseries by MetricGroup+tags excluding `vmrange` tag.
+	type x struct {
+		startStr string
+		endStr   string
+		start    float64
+		end      float64
+		ts       *timeseries
+	}
+	m := make(map[string][]x)
+	bb := bbPool.Get()
+	defer bbPool.Put(bb)
+	for _, ts := range tss {
+		vmrange := ts.MetricName.GetTagValue("vmrange")
+		if len(vmrange) == 0 {
+			if le := ts.MetricName.GetTagValue("le"); len(le) > 0 {
+				// Keep Prometheus-compatible buckets.
+				rvs = append(rvs, ts)
+			}
+			continue
+		}
+		n := strings.Index(bytesutil.ToUnsafeString(vmrange), "...")
+		if n < 0 {
+			continue
+		}
+		startStr := string(vmrange[:n])
+		start, err := strconv.ParseFloat(startStr, 64)
+		if err != nil {
+			continue
+		}
+		endStr := string(vmrange[n+len("..."):])
+		end, err := strconv.ParseFloat(endStr, 64)
+		if err != nil {
+			continue
+		}
+		ts.MetricName.RemoveTag("le")
+		ts.MetricName.RemoveTag("vmrange")
+		bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
+		m[string(bb.B)] = append(m[string(bb.B)], x{
+			startStr: startStr,
+			endStr:   endStr,
+			start:    start,
+			end:      end,
+			ts:       ts,
+		})
+	}
+
+	// Convert `vmrange` label in each group of time series to `le` label.
+	copyTS := func(src *timeseries, leStr string) *timeseries {
+		var ts timeseries
+		ts.CopyFromShallowTimestamps(src)
+		values := ts.Values
+		for i := range values {
+			values[i] = 0
+		}
+		ts.MetricName.RemoveTag("le")
+		ts.MetricName.AddTag("le", leStr)
+		return &ts
+	}
+	isZeroTS := func(ts *timeseries) bool {
+		for _, v := range ts.Values {
+			if v > 0 {
+				return false
+			}
+		}
+		return true
+	}
+	for _, xss := range m {
+		sort.Slice(xss, func(i, j int) bool { return xss[i].end < xss[j].end })
+		xssNew := make([]x, 0, len(xss)+2)
+		var xsPrev x
+		for _, xs := range xss {
+			ts := xs.ts
+			if isZeroTS(ts) {
+				// Skip time series with zeros. They are substituted by xssNew below.
+				continue
+			}
+			if xs.start != xsPrev.end {
+				xssNew = append(xssNew, x{
+					endStr: xs.startStr,
+					end:    xs.start,
+					ts:     copyTS(ts, xs.startStr),
+				})
+			}
+			ts.MetricName.AddTag("le", xs.endStr)
+			xssNew = append(xssNew, xs)
+			xsPrev = xs
+		}
+		if !math.IsInf(xsPrev.end, 1) {
+			xssNew = append(xssNew, x{
+				endStr: "+Inf",
+				end:    math.Inf(1),
+				ts:     copyTS(xsPrev.ts, "+Inf"),
+			})
+		}
+		xss = xssNew
+		for i := range xss[0].ts.Values {
+			count := float64(0)
+			for _, xs := range xss {
+				ts := xs.ts
+				v := ts.Values[i]
+				if !math.IsNaN(v) && v > 0 {
+					count += v
+				}
+				ts.Values[i] = count
+			}
+		}
+		for _, xs := range xss {
+			rvs = append(rvs, xs.ts)
+		}
+	}
+	return rvs
+}
+
 func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
 	args := tfa.args
 	if err := expectTransformArgsNum(args, 2); err != nil {
@@ -278,6 +408,9 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
 		return nil, err
 	}

+	// Convert buckets with `vmrange` labels to buckets with `le` labels.
+	tss := vmrangeBucketsToLE(args[1])
+
 	// Group metrics by all tags excluding "le"
 	type x struct {
 		le float64
@@ -285,7 +418,7 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
 	}
 	m := make(map[string][]x)
 	bb := bbPool.Get()
-	for _, ts := range args[1] {
+	for _, ts := range tss {
 		tagValue := ts.MetricName.GetTagValue("le")
 		if len(tagValue) == 0 {
 			continue
@@ -294,48 +427,81 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
 		if err != nil {
 			continue
 		}
-		var dst timeseries
-		dst.CopyFrom(ts)
-		dst.MetricName.ResetMetricGroup()
-		dst.MetricName.RemoveTag("le")
-		bb.B = marshalMetricTagsSorted(bb.B[:0], &dst.MetricName)
+		ts.MetricName.ResetMetricGroup()
+		ts.MetricName.RemoveTag("le")
+		bb.B = marshalMetricTagsSorted(bb.B[:0], &ts.MetricName)
 		m[string(bb.B)] = append(m[string(bb.B)], x{
 			le: le,
-			ts: &dst,
+			ts: ts,
 		})
 	}
 	bbPool.Put(bb)

 	// Calculate quantile for each group in m
-	lastNonInf := func(xss []x) float64 {
-		for len(xss) > 0 && math.IsInf(xss[len(xss)-1].le, 0) {
+
+	lastNonInf := func(i int, xss []x) float64 {
+		for len(xss) > 0 {
+			xsLast := xss[len(xss)-1]
+			v := xsLast.ts.Values[i]
+			if v == 0 {
+				return nan
+			}
+			if !math.IsNaN(v) && !math.IsInf(xsLast.le, 0) {
+				return xsLast.le
+			}
 			xss = xss[:len(xss)-1]
 		}
-		if len(xss) == 0 {
-			return nan
-		}
-		return xss[len(xss)-1].le
+		return nan
 	}
 	quantile := func(i int, phis []float64, xss []x) float64 {
-		vPrev := float64(0)
-		lePrev := float64(0)
 		phi := phis[i]
 		if math.IsNaN(phi) {
 			return nan
 		}
+		// Fix broken buckets.
+		// They are already sorted by le, so their values must be in ascending order,
+		// since the next bucket value includes all the previous buckets.
+		vPrev := float64(0)
+		for _, xs := range xss {
+			v := xs.ts.Values[i]
+			if v < vPrev {
+				xs.ts.Values[i] = vPrev
+			} else if !math.IsNaN(v) {
+				vPrev = v
+			}
+		}
+		vLast := nan
+		for len(xss) > 0 {
+			vLast = xss[len(xss)-1].ts.Values[i]
+			if !math.IsNaN(vLast) {
+				break
+			}
+			xss = xss[:len(xss)-1]
+		}
+		if vLast == 0 || math.IsNaN(vLast) {
+			return nan
+		}
 		if phi < 0 {
 			return -inf
 		}
 		if phi > 1 {
 			return inf
 		}
-		vReq := xss[len(xss)-1].ts.Values[i] * phi
+		vReq := vLast * phi
+		vPrev = 0
+		lePrev := float64(0)
 		for _, xs := range xss {
 			v := xs.ts.Values[i]
+			if math.IsNaN(v) {
+				// Skip NaNs - they may appear if the selected time range
+				// contains multiple different bucket sets.
+				continue
+			}
 			le := xs.le
-			if v <= vPrev {
-				v = vPrev
-				le = lePrev
+			if v <= 0 {
+				// Skip zero buckets.
+				lePrev = le
+				continue
 			}
 			if v < vReq {
 				vPrev = v
@@ -343,16 +509,16 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
 				continue
 			}
 			if math.IsInf(le, 0) {
-				return lastNonInf(xss)
+				return lastNonInf(i, xss)
 			}
 			if v == vPrev {
 				return lePrev
 			}
 			return lePrev + (le-lePrev)*(vReq-vPrev)/(v-vPrev)
 		}
-		return lastNonInf(xss)
+		return lastNonInf(i, xss)
 	}
-	var rvs []*timeseries
+	rvs := make([]*timeseries, 0, len(m))
 	for _, xss := range m {
 		sort.Slice(xss, func(i, j int) bool {
 			return xss[i].le < xss[j].le
@@ -363,7 +529,6 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
 		}
 		rvs = append(rvs, dst)
 	}
-
 	return rvs, nil
 }

@@ -394,13 +559,6 @@ func runningAvg(a, b float64, idx int) float64 {
 	return a + (b-a)/float64(idx+1)
 }

-func keepLastValue(a, b float64, idx int) float64 {
-	if math.IsNaN(b) {
-		return a
-	}
-	return b
-}
-
 func skipLeadingNaNs(values []float64) []float64 {
 	i := 0
 	for i < len(values) && math.IsNaN(values[i]) {
@@ -641,9 +799,7 @@ func transformUnion(tfa *transformFuncArg) ([]*timeseries, error) {
 				continue
 			}
 			m[string(bb.B)] = true
-			var dst timeseries
-			dst.CopyFrom(ts)
-			rvs = append(rvs, &dst)
+			rvs = append(rvs, ts)
 		}
 	}
 	bbPool.Put(bb)
@@ -816,6 +972,31 @@ func transformLabelJoin(tfa *transformFuncArg) ([]*timeseries, error) {
 	return rvs, nil
 }

+func transformLabelTransform(tfa *transformFuncArg) ([]*timeseries, error) {
+	args := tfa.args
+	if err := expectTransformArgsNum(args, 4); err != nil {
+		return nil, err
+	}
+	label, err := getString(args[1], 1)
+	if err != nil {
+		return nil, err
+	}
+	regex, err := getString(args[2], 2)
+	if err != nil {
+		return nil, err
+	}
+	replacement, err := getString(args[3], 3)
+	if err != nil {
+		return nil, err
+	}
+
+	r, err := compileRegexp(regex)
+	if err != nil {
+		return nil, fmt.Errorf(`cannot compile regex %q: %s`, regex, err)
+	}
+	return labelReplace(args[0], label, r, label, replacement)
+}
+
 func transformLabelReplace(tfa *transformFuncArg) ([]*timeseries, error) {
 	args := tfa.args
 	if err := expectTransformArgsNum(args, 5); err != nil {
@@ -842,11 +1023,12 @@ func transformLabelReplace(tfa *transformFuncArg) ([]*timeseries, error) {
 	if err != nil {
 		return nil, fmt.Errorf(`cannot compile regex %q: %s`, regex, err)
 	}
+	return labelReplace(args[0], srcLabel, r, dstLabel, replacement)
+}

+func labelReplace(tss []*timeseries, srcLabel string, r *regexp.Regexp, dstLabel, replacement string) ([]*timeseries, error) {
 	replacementBytes := []byte(replacement)
-
-	rvs := args[0]
-	for _, ts := range rvs {
+	for _, ts := range tss {
 		mn := &ts.MetricName
 		dstValue := getDstValue(mn, dstLabel)
 		srcValue := mn.GetTagValue(srcLabel)
@@ -856,6 +1038,33 @@ func transformLabelReplace(tfa *transformFuncArg) ([]*timeseries, error) {
 			mn.RemoveTag(dstLabel)
 		}
 	}
+	return tss, nil
+}
+
+func transformLabelValue(tfa *transformFuncArg) ([]*timeseries, error) {
+	args := tfa.args
+	if err := expectTransformArgsNum(args, 2); err != nil {
+		return nil, err
+	}
+	labelName, err := getString(args[1], 1)
+	if err != nil {
+		return nil, fmt.Errorf("cannot get label name: %s", err)
+	}
+	rvs := args[0]
+	for _, ts := range rvs {
+		ts.MetricName.ResetMetricGroup()
+		labelValue := ts.MetricName.GetTagValue(labelName)
+		v, err := strconv.ParseFloat(string(labelValue), 64)
+		if err != nil {
+			v = nan
+		}
+		values := ts.Values
+		for i := range values {
+			values[i] = v
+		}
+	}
+	// Do not remove timeseries with only NaN values, so `default` could be applied to them:
+	// label_value(q, "label") default 123
 	return rvs, nil
 }

@@ -1052,7 +1261,10 @@ func transformTimestamp(tfa *transformFuncArg) ([]*timeseries, error) {
 		ts.MetricName.ResetMetricGroup()
 		values := ts.Values
 		for i, t := range ts.Timestamps {
-			values[i] = float64(t) / 1e3
+			v := values[i]
+			if !math.IsNaN(v) {
+				values[i] = float64(t) / 1e3
+			}
 		}
 	}
 	return rvs, nil
--- a/app/vmstorage/Makefile
+++ b/app/vmstorage/Makefile
@@ -0,0 +1,38 @@
+# All these commands must run from repository root.
+
+run-vmstorage:
+	mkdir -p vmstorage-data
+	DOCKER_OPTS='-v $(shell pwd)/vmstorage-data:/vmstorage-data' \
+	APP_NAME=vmstorage \
+	ARGS='-retentionPeriod=12' \
+	$(MAKE) run-via-docker
+
+vmstorage:
+	APP_NAME=vmstorage $(MAKE) app-local
+
+vmstorage-race:
+	APP_NAME=vmstorage RACE=-race $(MAKE) app-local
+
+vmstorage-prod:
+	APP_NAME=vmstorage $(MAKE) app-via-docker
+
+vmstorage-prod-race:
+	APP_NAME=vmstorage RACE=-race $(MAKE) app-via-docker
+
+vmstorage-pure:
+	APP_NAME=vmstorage $(MAKE) app-local-pure
+
+vmstorage-pure-prod:
+	APP_NAME=vmstorage APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
+
+package-vmstorage:
+	APP_NAME=vmstorage $(MAKE) package-via-docker
+
+package-vmstorage-race:
+	APP_NAME=vmstorage RACE=-race $(MAKE) package-via-docker
+
+publish-vmstorage:
+	APP_NAME=vmstorage $(MAKE) publish-via-docker
+
+publish-vmstorage-race:
+	APP_NAME=vmstorage RACE=-race $(MAKE) publish-via-docker
--- a/app/vmstorage/README.md
+++ b/app/vmstorage/README.md
@@ -1,5 +1,5 @@
 `vmstorage` performs the following tasks:

- Accepts inserts from `vminsert` and stores them to local storage.
+- Accepts inserts from `vminsert` nodes and stores them to local storage.

- Performs select requests from `vmselect`.
+- Performs select requests from `vmselect` nodes.
--- a/app/vmstorage/deployment/Dockerfile
+++ b/app/vmstorage/deployment/Dockerfile
@@ -0,0 +1,7 @@
+FROM scratch
+COPY --from=local/certs:1.0.3 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+COPY bin/vmstorage-prod .
+EXPOSE 8482
+EXPOSE 8400
+EXPOSE 8401
+ENTRYPOINT ["/vmstorage-prod"]
--- a/app/vmstorage/main.go
+++ b/app/vmstorage/main.go
@@ -1,4 +1,4 @@
-package vmstorage
+package main

 import (
 	"flag"
@@ -8,122 +8,94 @@ import (
 	"sync"
 	"time"

-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/transport"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/syncwg"
 	"github.com/VictoriaMetrics/metrics"
 )

 var (
+	httpListenAddr  = flag.String("httpListenAddr", ":8482", "Address to listen for http connections")
 	retentionPeriod = flag.Int("retentionPeriod", 1, "Retention period in months")
+	storageDataPath = flag.String("storageDataPath", "vmstorage-data", "Path to storage data")
+	vminsertAddr    = flag.String("vminsertAddr", ":8400", "TCP address to accept connections from vminsert services")
+	vmselectAddr    = flag.String("vmselectAddr", ":8401", "TCP address to accept connections from vmselect services")
 	snapshotAuthKey = flag.String("snapshotAuthKey", "", "authKey, which must be passed in query string to /snapshot* pages")

-	precisionBits = flag.Int("precisionBits", 64, "The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss")
-
-	// DataPath is a path to storage data.
-	DataPath = flag.String("storageDataPath", "victoria-metrics-data", "Path to storage data")
+	bigMergeConcurrency   = flag.Int("bigMergeConcurrency", 0, "The maximum number of CPU cores to use for big merges. Default value is used if set to 0")
+	smallMergeConcurrency = flag.Int("smallMergeConcurrency", 0, "The maximum number of CPU cores to use for small merges. Default value is used if set to 0")
 )

-// Init initializes vmstorage.
-func Init() {
-	if err := encoding.CheckPrecisionBits(uint8(*precisionBits)); err != nil {
-		logger.Fatalf("invalid `-precisionBits`: %s", err)
-	}
-	logger.Infof("opening storage at %q with retention period %d months", *DataPath, *retentionPeriod)
+func main() {
+	flag.Parse()
+	buildinfo.Init()
+	logger.Init()
+
+	storage.SetBigMergeWorkersCount(*bigMergeConcurrency)
+	storage.SetSmallMergeWorkersCount(*smallMergeConcurrency)
+
+	logger.Infof("opening storage at %q with retention period %d months", *storageDataPath, *retentionPeriod)
 	startTime := time.Now()
-	strg, err := storage.OpenStorage(*DataPath, *retentionPeriod)
+	strg, err := storage.OpenStorage(*storageDataPath, *retentionPeriod)
 	if err != nil {
-		logger.Fatalf("cannot open a storage at %s with retention period %d months: %s", *DataPath, *retentionPeriod, err)
+		logger.Fatalf("cannot open a storage at %s with retention period %d months: %s", *storageDataPath, *retentionPeriod, err)
 	}
-	Storage = strg

 	var m storage.Metrics
-	Storage.UpdateMetrics(&m)
+	strg.UpdateMetrics(&m)
 	tm := &m.TableMetrics
 	partsCount := tm.SmallPartsCount + tm.BigPartsCount
 	blocksCount := tm.SmallBlocksCount + tm.BigBlocksCount
 	rowsCount := tm.SmallRowsCount + tm.BigRowsCount
-	logger.Infof("successfully opened storage %q in %s; partsCount: %d; blocksCount: %d; rowsCount: %d",
-		*DataPath, time.Since(startTime), partsCount, blocksCount, rowsCount)
+	sizeBytes := tm.SmallSizeBytes + tm.BigSizeBytes
+	logger.Infof("successfully opened storage %q in %s; partsCount: %d; blocksCount: %d; rowsCount: %d; sizeBytes: %d",
+		*storageDataPath, time.Since(startTime), partsCount, blocksCount, rowsCount, sizeBytes)

-	registerStorageMetrics(Storage)
-}
+	registerStorageMetrics(strg)

-// Storage is a storage.
-//
-// Every storage call must be wrapped into WG.Add(1) ... WG.Done()
-// for proper graceful shutdown when Stop is called.
-var Storage *storage.Storage
+	srv, err := transport.NewServer(*vminsertAddr, *vmselectAddr, strg)
+	if err != nil {
+		logger.Fatalf("cannot create a server with vminsertAddr=%s, vmselectAddr=%s: %s", *vminsertAddr, *vmselectAddr, err)
+	}

-// WG must be incremented before Storage call.
-//
-// Use syncwg instead of sync, since Add is called from concurrent goroutines.
-var WG syncwg.WaitGroup
+	go srv.RunVMInsert()
+	go srv.RunVMSelect()

-// AddRows adds mrs to the storage.
-func AddRows(mrs []storage.MetricRow) error {
-	WG.Add(1)
-	err := Storage.AddRows(mrs, uint8(*precisionBits))
-	WG.Done()
-	return err
-}
+	requestHandler := newRequestHandler(strg)
+	go func() {
+		httpserver.Serve(*httpListenAddr, requestHandler)
+	}()

-// DeleteMetrics deletes metrics matching tfss.
-//
-// Returns the number of deleted metrics.
-func DeleteMetrics(tfss []*storage.TagFilters) (int, error) {
-	WG.Add(1)
-	n, err := Storage.DeleteMetrics(tfss)
-	WG.Done()
-	return n, err
-}
+	sig := procutil.WaitForSigterm()
+	logger.Infof("service received signal %s", sig)

-// SearchTagKeys searches for tag keys
-func SearchTagKeys(maxTagKeys int) ([]string, error) {
-	WG.Add(1)
-	keys, err := Storage.SearchTagKeys(maxTagKeys)
-	WG.Done()
-	return keys, err
-}
+	logger.Infof("gracefully shutting down the service")
+	startTime = time.Now()
+	srv.MustClose()
+	logger.Infof("successfully shut down the service in %s", time.Since(startTime))

-// SearchTagValues searches for tag values for the given tagKey
-func SearchTagValues(tagKey []byte, maxTagValues int) ([]string, error) {
-	WG.Add(1)
-	values, err := Storage.SearchTagValues(tagKey, maxTagValues)
-	WG.Done()
-	return values, err
-}
-
-// GetSeriesCount returns the number of time series in the storage.
-func GetSeriesCount() (uint64, error) {
-	WG.Add(1)
-	n, err := Storage.GetSeriesCount()
-	WG.Done()
-	return n, err
-}
-
-// Stop stops the vmstorage
-func Stop() {
-	logger.Infof("gracefully closing the storage at %s", *DataPath)
-	startTime := time.Now()
-	WG.WaitAndBlock()
-	Storage.MustClose()
+	logger.Infof("gracefully closing the storage at %s", *storageDataPath)
+	startTime = time.Now()
+	strg.MustClose()
 	logger.Infof("successfully closed the storage in %s", time.Since(startTime))

-	logger.Infof("the storage has been stopped")
+	fs.MustStopDirRemover()
+
+	logger.Infof("the vmstorage has been stopped")
 }

-// RequestHandler is a storage request handler.
-func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
-	path := r.URL.Path
-	prometheusCompatibleResponse := false
-	if path == "/api/v1/admin/tsdb/snapshot" {
-		// Handle Prometheus API - https://prometheus.io/docs/prometheus/latest/querying/api/#snapshot .
-		prometheusCompatibleResponse = true
-		path = "/snapshot/create"
+func newRequestHandler(strg *storage.Storage) httpserver.RequestHandler {
+	return func(w http.ResponseWriter, r *http.Request) bool {
+		return requestHandler(w, r, strg)
 	}
+}
+
+func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storage) bool {
+	path := r.URL.Path
 	if !strings.HasPrefix(path, "/snapshot") {
 		return false
 	}
@@ -137,22 +109,18 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 	switch path {
 	case "/create":
 		w.Header().Set("Content-Type", "application/json")
-		snapshotPath, err := Storage.CreateSnapshot()
+		snapshotPath, err := strg.CreateSnapshot()
 		if err != nil {
 			msg := fmt.Sprintf("cannot create snapshot: %s", err)
 			logger.Errorf("%s", msg)
 			fmt.Fprintf(w, `{"status":"error","msg":%q}`, msg)
 			return true
 		}
-		if prometheusCompatibleResponse {
-			fmt.Fprintf(w, `{"status":"success","data":{"name":%q}}`, snapshotPath)
-		} else {
-			fmt.Fprintf(w, `{"status":"ok","snapshot":%q}`, snapshotPath)
-		}
+		fmt.Fprintf(w, `{"status":"ok","snapshot":%q}`, snapshotPath)
 		return true
 	case "/list":
 		w.Header().Set("Content-Type", "application/json")
-		snapshots, err := Storage.ListSnapshots()
+		snapshots, err := strg.ListSnapshots()
 		if err != nil {
 			msg := fmt.Sprintf("cannot list snapshots: %s", err)
 			logger.Errorf("%s", msg)
@@ -171,7 +139,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 	case "/delete":
 		w.Header().Set("Content-Type", "application/json")
 		snapshotName := r.FormValue("snapshot")
-		if err := Storage.DeleteSnapshot(snapshotName); err != nil {
+		if err := strg.DeleteSnapshot(snapshotName); err != nil {
 			msg := fmt.Sprintf("cannot delete snapshot %q: %s", snapshotName, err)
 			logger.Errorf("%s", msg)
 			fmt.Fprintf(w, `{"status":"error","msg":%q}`, msg)
@@ -181,7 +149,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 		return true
 	case "/delete_all":
 		w.Header().Set("Content-Type", "application/json")
-		snapshots, err := Storage.ListSnapshots()
+		snapshots, err := strg.ListSnapshots()
 		if err != nil {
 			msg := fmt.Sprintf("cannot list snapshots: %s", err)
 			logger.Errorf("%s", msg)
@@ -189,7 +157,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 			return true
 		}
 		for _, snapshotName := range snapshots {
-			if err := Storage.DeleteSnapshot(snapshotName); err != nil {
+			if err := strg.DeleteSnapshot(snapshotName); err != nil {
 				msg := fmt.Sprintf("cannot delete snapshot %q: %s", snapshotName, err)
 				logger.Errorf("%s", msg)
 				fmt.Fprintf(w, `{"status":"error","msg":%q}`, msg)
@@ -282,9 +250,30 @@ func registerStorageMetrics(strg *storage.Storage) {
 		return float64(idbm().PartsRefCount)
 	})

+	metrics.NewGauge(`vm_new_timeseries_created_total`, func() float64 {
+		return float64(idbm().NewTimeseriesCreated)
+	})
 	metrics.NewGauge(`vm_missing_tsids_for_metric_id_total`, func() float64 {
 		return float64(idbm().MissingTSIDsForMetricID)
 	})
+	metrics.NewGauge(`vm_recent_hour_metric_ids_search_calls_total`, func() float64 {
+		return float64(idbm().RecentHourMetricIDsSearchCalls)
+	})
+	metrics.NewGauge(`vm_recent_hour_metric_ids_search_hits_total`, func() float64 {
+		return float64(idbm().RecentHourMetricIDsSearchHits)
+	})
+	metrics.NewGauge(`vm_date_metric_ids_search_calls_total`, func() float64 {
+		return float64(idbm().DateMetricIDsSearchCalls)
+	})
+	metrics.NewGauge(`vm_date_metric_ids_search_hits_total`, func() float64 {
+		return float64(idbm().DateMetricIDsSearchHits)
+	})
+	metrics.NewGauge(`vm_index_blocks_with_metric_ids_processed_total`, func() float64 {
+		return float64(idbm().IndexBlocksWithMetricIDsProcessed)
+	})
+	metrics.NewGauge(`vm_index_blocks_with_metric_ids_incorrect_order_total`, func() float64 {
+		return float64(idbm().IndexBlocksWithMetricIDsIncorrectOrder)
+	})

 	metrics.NewGauge(`vm_assisted_merges_total{type="storage/small"}`, func() float64 {
 		return float64(tm().SmallAssistedMerges)
@@ -320,6 +309,39 @@ func registerStorageMetrics(strg *storage.Storage) {
 		return float64(idbm().BlocksCount)
 	})

+	metrics.NewGauge(`vm_data_size_bytes{type="storage/big"}`, func() float64 {
+		return float64(tm().BigSizeBytes)
+	})
+	metrics.NewGauge(`vm_data_size_bytes{type="storage/small"}`, func() float64 {
+		return float64(tm().SmallSizeBytes)
+	})
+	metrics.NewGauge(`vm_data_size_bytes{type="indexdb"}`, func() float64 {
+		return float64(idbm().SizeBytes)
+	})
+
+	metrics.NewGauge(`vm_rows_ignored_total{reason="big_timestamp"}`, func() float64 {
+		return float64(m().TooBigTimestampRows)
+	})
+	metrics.NewGauge(`vm_rows_ignored_total{reason="small_timestamp"}`, func() float64 {
+		return float64(m().TooSmallTimestampRows)
+	})
+
+	metrics.NewGauge(`vm_concurrent_addrows_limit_reached_total`, func() float64 {
+		return float64(m().AddRowsConcurrencyLimitReached)
+	})
+	metrics.NewGauge(`vm_concurrent_addrows_limit_timeout_total`, func() float64 {
+		return float64(m().AddRowsConcurrencyLimitTimeout)
+	})
+	metrics.NewGauge(`vm_concurrent_addrows_dropped_rows_total`, func() float64 {
+		return float64(m().AddRowsConcurrencyDroppedRows)
+	})
+	metrics.NewGauge(`vm_concurrent_addrows_capacity`, func() float64 {
+		return float64(m().AddRowsConcurrencyCapacity)
+	})
+	metrics.NewGauge(`vm_concurrent_addrows_current`, func() float64 {
+		return float64(m().AddRowsConcurrencyCurrent)
+	})
+
 	metrics.NewGauge(`vm_rows{type="storage/big"}`, func() float64 {
 		return float64(tm().BigRowsCount)
 	})
@@ -330,6 +352,24 @@ func registerStorageMetrics(strg *storage.Storage) {
 		return float64(idbm().ItemsCount)
 	})

+	metrics.NewGauge(`vm_date_range_search_calls_total`, func() float64 {
+		return float64(idbm().DateRangeSearchCalls)
+	})
+	metrics.NewGauge(`vm_date_range_hits_total`, func() float64 {
+		return float64(idbm().DateRangeSearchHits)
+	})
+
+	metrics.NewGauge(`vm_missing_metric_names_for_metric_id_total`, func() float64 {
+		return float64(idbm().MissingMetricNamesForMetricID)
+	})
+
+	metrics.NewGauge(`vm_date_metric_id_cache_syncs_total`, func() float64 {
+		return float64(m().DateMetricIDCacheSyncsCount)
+	})
+	metrics.NewGauge(`vm_date_metric_id_cache_resets_total`, func() float64 {
+		return float64(m().DateMetricIDCacheResetsCount)
+	})
+
 	metrics.NewGauge(`vm_cache_entries{type="storage/tsid"}`, func() float64 {
 		return float64(m().TSIDCacheSize)
 	})
@@ -342,6 +382,9 @@ func registerStorageMetrics(strg *storage.Storage) {
 	metrics.NewGauge(`vm_cache_entries{type="storage/date_metricID"}`, func() float64 {
 		return float64(m().DateMetricIDCacheSize)
 	})
+	metrics.NewGauge(`vm_cache_entries{type="storage/hour_metric_ids"}`, func() float64 {
+		return float64(m().HourMetricIDCacheSize)
+	})
 	metrics.NewGauge(`vm_cache_entries{type="storage/bigIndexBlocks"}`, func() float64 {
 		return float64(tm().BigIndexBlocksCacheSize)
 	})
@@ -357,24 +400,33 @@ func registerStorageMetrics(strg *storage.Storage) {
 	metrics.NewGauge(`vm_cache_entries{type="indexdb/tagFilters"}`, func() float64 {
 		return float64(idbm().TagCacheSize)
 	})
+	metrics.NewGauge(`vm_cache_entries{type="indexdb/uselessTagFilters"}`, func() float64 {
+		return float64(idbm().UselessTagFiltersCacheSize)
+	})
 	metrics.NewGauge(`vm_cache_entries{type="storage/regexps"}`, func() float64 {
 		return float64(storage.RegexpCacheSize())
 	})

 	metrics.NewGauge(`vm_cache_size_bytes{type="storage/tsid"}`, func() float64 {
-		return float64(m().TSIDCacheBytesSize)
+		return float64(m().TSIDCacheSizeBytes)
 	})
 	metrics.NewGauge(`vm_cache_size_bytes{type="storage/metricIDs"}`, func() float64 {
-		return float64(m().MetricIDCacheBytesSize)
+		return float64(m().MetricIDCacheSizeBytes)
 	})
 	metrics.NewGauge(`vm_cache_size_bytes{type="storage/metricName"}`, func() float64 {
-		return float64(m().MetricNameCacheBytesSize)
+		return float64(m().MetricNameCacheSizeBytes)
 	})
 	metrics.NewGauge(`vm_cache_size_bytes{type="storage/date_metricID"}`, func() float64 {
-		return float64(m().DateMetricIDCacheBytesSize)
+		return float64(m().DateMetricIDCacheSizeBytes)
+	})
+	metrics.NewGauge(`vm_cache_size_bytes{type="storage/hour_metric_ids"}`, func() float64 {
+		return float64(m().HourMetricIDCacheSizeBytes)
 	})
 	metrics.NewGauge(`vm_cache_size_bytes{type="indexdb/tagFilters"}`, func() float64 {
-		return float64(idbm().TagCacheBytesSize)
+		return float64(idbm().TagCacheSizeBytes)
+	})
+	metrics.NewGauge(`vm_cache_size_bytes{type="indexdb/uselessTagFilters"}`, func() float64 {
+		return float64(idbm().UselessTagFiltersCacheSizeBytes)
 	})

 	metrics.NewGauge(`vm_cache_requests_total{type="storage/tsid"}`, func() float64 {
@@ -386,9 +438,6 @@ func registerStorageMetrics(strg *storage.Storage) {
 	metrics.NewGauge(`vm_cache_requests_total{type="storage/metricName"}`, func() float64 {
 		return float64(m().MetricNameCacheRequests)
 	})
-	metrics.NewGauge(`vm_cache_requests_total{type="storage/date_metricID"}`, func() float64 {
-		return float64(m().DateMetricIDCacheRequests)
-	})
 	metrics.NewGauge(`vm_cache_requests_total{type="storage/bigIndexBlocks"}`, func() float64 {
 		return float64(tm().BigIndexBlocksCacheRequests)
 	})
@@ -404,6 +453,9 @@ func registerStorageMetrics(strg *storage.Storage) {
 	metrics.NewGauge(`vm_cache_requests_total{type="indexdb/tagFilters"}`, func() float64 {
 		return float64(idbm().TagCacheRequests)
 	})
+	metrics.NewGauge(`vm_cache_requests_total{type="indexdb/uselessTagFilters"}`, func() float64 {
+		return float64(idbm().UselessTagFiltersCacheRequests)
+	})
 	metrics.NewGauge(`vm_cache_requests_total{type="storage/regexps"}`, func() float64 {
 		return float64(storage.RegexpCacheRequests())
 	})
@@ -417,9 +469,6 @@ func registerStorageMetrics(strg *storage.Storage) {
 	metrics.NewGauge(`vm_cache_misses_total{type="storage/metricName"}`, func() float64 {
 		return float64(m().MetricNameCacheMisses)
 	})
-	metrics.NewGauge(`vm_cache_misses_total{type="storage/date_metricID"}`, func() float64 {
-		return float64(m().DateMetricIDCacheMisses)
-	})
 	metrics.NewGauge(`vm_cache_misses_total{type="storage/bigIndexBlocks"}`, func() float64 {
 		return float64(tm().BigIndexBlocksCacheMisses)
 	})
@@ -435,6 +484,9 @@ func registerStorageMetrics(strg *storage.Storage) {
 	metrics.NewGauge(`vm_cache_misses_total{type="indexdb/tagFilters"}`, func() float64 {
 		return float64(idbm().TagCacheMisses)
 	})
+	metrics.NewGauge(`vm_cache_misses_total{type="indexdb/uselessTagFilters"}`, func() float64 {
+		return float64(idbm().UselessTagFiltersCacheMisses)
+	})
 	metrics.NewGauge(`vm_cache_misses_total{type="storage/regexps"}`, func() float64 {
 		return float64(storage.RegexpCacheMisses())
 	})
@@ -449,7 +501,4 @@ func registerStorageMetrics(strg *storage.Storage) {
 	metrics.NewGauge(`vm_cache_collisions_total{type="storage/metricName"}`, func() float64 {
 		return float64(m().MetricNameCacheCollisions)
 	})
-	metrics.NewGauge(`vm_cache_collisions_total{type="storage/date_metricID"}`, func() float64 {
-		return float64(m().DateMetricIDCacheCollisions)
-	})
 }
--- a/app/vmstorage/transport/server.go
+++ b/app/vmstorage/transport/server.go
@@ -0,0 +1,811 @@
+package transport
+
+import (
+	"flag"
+	"fmt"
+	"io"
+	"net"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/consts"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/metrics"
+)
+
+var (
+	maxTagKeysPerSearch   = flag.Int("search.maxTagKeys", 100e3, "The maximum number of tag keys returned per search")
+	maxTagValuesPerSearch = flag.Int("search.maxTagValues", 100e3, "The maximum number of tag values returned per search")
+	maxMetricsPerSearch   = flag.Int("search.maxUniqueTimeseries", 300e3, "The maximum number of unique time series each search can scan")
+
+	precisionBits         = flag.Int("precisionBits", 64, "The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss")
+	disableRPCCompression = flag.Bool(`rpc.disableCompression`, false, "Disable compression of RPC traffic. This reduces CPU usage at the cost of higher network bandwidth usage")
+)
+
+// Server processes connections from vminsert and vmselect.
+type Server struct {
+	storage *storage.Storage
+
+	vminsertLN net.Listener
+	vmselectLN net.Listener
+
+	vminsertWG sync.WaitGroup
+	vmselectWG sync.WaitGroup
+
+	vminsertConnsMap connsMap
+	vmselectConnsMap connsMap
+
+	stopFlag uint64
+}
+
+type connsMap struct {
+	mu sync.Mutex
+	m  map[net.Conn]struct{}
+}
+
+func (cm *connsMap) Init() {
+	cm.m = make(map[net.Conn]struct{})
+}
+
+func (cm *connsMap) Add(c net.Conn) {
+	cm.mu.Lock()
+	cm.m[c] = struct{}{}
+	cm.mu.Unlock()
+}
+
+func (cm *connsMap) Delete(c net.Conn) {
+	cm.mu.Lock()
+	delete(cm.m, c)
+	cm.mu.Unlock()
+}
+
+func (cm *connsMap) CloseAll() {
+	cm.mu.Lock()
+	for c := range cm.m {
+		_ = c.Close()
+	}
+	cm.mu.Unlock()
+}
+
+// NewServer returns new Server.
+func NewServer(vminsertAddr, vmselectAddr string, storage *storage.Storage) (*Server, error) {
+	vminsertLN, err := netutil.NewTCPListener("vminsert", vminsertAddr)
+	if err != nil {
+		return nil, fmt.Errorf("unable to listen vminsertAddr %s: %s", vminsertAddr, err)
+	}
+	vmselectLN, err := netutil.NewTCPListener("vmselect", vmselectAddr)
+	if err != nil {
+		return nil, fmt.Errorf("unable to listen vmselectAddr %s: %s", vmselectAddr, err)
+	}
+	if err := encoding.CheckPrecisionBits(uint8(*precisionBits)); err != nil {
+		return nil, fmt.Errorf("invalid -precisionBits: %s", err)
+	}
+
+	// Set network-level write timeouts to reasonable values in order to protect
+	// from broken networks.
+	// Do not set read timeouts, since they are managed separately -
+	// search for SetReadDeadline in this file.
+	vminsertLN.WriteTimeout = time.Minute
+	vmselectLN.WriteTimeout = time.Minute
+
+	s := &Server{
+		storage: storage,
+
+		vminsertLN: vminsertLN,
+		vmselectLN: vmselectLN,
+	}
+	s.vminsertConnsMap.Init()
+	s.vmselectConnsMap.Init()
+	return s, nil
+}
+
+// RunVMInsert runs a server accepting connections from vminsert.
+func (s *Server) RunVMInsert() {
+	logger.Infof("accepting vminsert conns at %s", s.vminsertLN.Addr())
+	for {
+		c, err := s.vminsertLN.Accept()
+		if err != nil {
+			if pe, ok := err.(net.Error); ok && pe.Temporary() {
+				continue
+			}
+			if s.isStopping() {
+				return
+			}
+			logger.Panicf("FATAL: cannot process vminsert conns at %s: %s", s.vminsertLN.Addr(), err)
+		}
+		logger.Infof("accepted vminsert conn from %s", c.RemoteAddr())
+
+		vminsertConns.Inc()
+		s.vminsertConnsMap.Add(c)
+		s.vminsertWG.Add(1)
+		go func() {
+			defer func() {
+				s.vminsertConnsMap.Delete(c)
+				vminsertConns.Dec()
+				s.vminsertWG.Done()
+			}()
+
+			// There is no need in response compression, since
+			// vmstorage doesn't send anything back to vminsert.
+			compressionLevel := 0
+			bc, err := handshake.VMInsertServer(c, compressionLevel)
+			if err != nil {
+				if s.isStopping() {
+					// c is stopped inside Server.MustClose
+					return
+				}
+				logger.Errorf("cannot perform vminsert handshake with client %q: %s", c.RemoteAddr(), err)
+				_ = c.Close()
+				return
+			}
+			defer func() {
+				if !s.isStopping() {
+					logger.Infof("closing vminsert conn from %s", c.RemoteAddr())
+				}
+				_ = bc.Close()
+			}()
+
+			logger.Infof("processing vminsert conn from %s", c.RemoteAddr())
+			if err := s.processVMInsertConn(bc); err != nil {
+				if s.isStopping() {
+					return
+				}
+				vminsertConnErrors.Inc()
+				logger.Errorf("cannot process vminsert conn from %s: %s", c.RemoteAddr(), err)
+			}
+		}()
+	}
+}
+
+var (
+	vminsertConns      = metrics.NewCounter("vm_vminsert_conns")
+	vminsertConnErrors = metrics.NewCounter("vm_vminsert_conn_errors_total")
+)
+
+// RunVMSelect runs a server accepting connections from vmselect.
+func (s *Server) RunVMSelect() {
+	logger.Infof("accepting vmselect conns at %s", s.vmselectLN.Addr())
+	for {
+		c, err := s.vmselectLN.Accept()
+		if err != nil {
+			if pe, ok := err.(net.Error); ok && pe.Temporary() {
+				continue
+			}
+			if s.isStopping() {
+				return
+			}
+			logger.Panicf("FATAL: cannot process vmselect conns at %s: %s", s.vmselectLN.Addr(), err)
+		}
+		logger.Infof("accepted vmselect conn from %s", c.RemoteAddr())
+
+		vmselectConns.Inc()
+		s.vmselectConnsMap.Add(c)
+		s.vmselectWG.Add(1)
+		go func() {
+			defer func() {
+				s.vmselectConnsMap.Delete(c)
+				vmselectConns.Dec()
+				s.vmselectWG.Done()
+			}()
+
+			// Compress responses to vmselect even if they already contain compressed blocks.
+			// Responses contain uncompressed metric names, which should compress well
+			// when the response contains high number of time series.
+			// Additionally, recently added metric blocks are usually uncompressed, so the compression
+			// should save network bandwidth.
+			compressionLevel := 1
+			if *disableRPCCompression {
+				compressionLevel = 0
+			}
+			bc, err := handshake.VMSelectServer(c, compressionLevel)
+			if err != nil {
+				if s.isStopping() {
+					// c is closed inside Server.MustClose
+					return
+				}
+				logger.Errorf("cannot perform vmselect handshake with client %q: %s", c.RemoteAddr(), err)
+				_ = c.Close()
+				return
+			}
+
+			defer func() {
+				if !s.isStopping() {
+					logger.Infof("closing vmselect conn from %s", c.RemoteAddr())
+				}
+				_ = bc.Close()
+			}()
+
+			logger.Infof("processing vmselect conn from %s", c.RemoteAddr())
+			if err := s.processVMSelectConn(bc); err != nil {
+				if s.isStopping() {
+					return
+				}
+				vmselectConnErrors.Inc()
+				logger.Errorf("cannot process vmselect conn %s: %s", c.RemoteAddr(), err)
+			}
+		}()
+	}
+}
+
+var (
+	vmselectConns      = metrics.NewCounter("vm_vmselect_conns")
+	vmselectConnErrors = metrics.NewCounter("vm_vmselect_conn_errors_total")
+)
+
+// MustClose gracefully closes the server,
+// so it no longer touches s.storage after returning.
+func (s *Server) MustClose() {
+	// Mark the server as stoping.
+	s.setIsStopping()
+
+	// Stop accepting new connections from vminsert and vmselect.
+	if err := s.vminsertLN.Close(); err != nil {
+		logger.Panicf("FATAL: cannot close vminsert listener: %s", err)
+	}
+	if err := s.vmselectLN.Close(); err != nil {
+		logger.Panicf("FATAL: cannot close vmselect listener: %s", err)
+	}
+
+	// Close existing connections from vminsert, so the goroutines
+	// processing these connections are finished.
+	s.vminsertConnsMap.CloseAll()
+
+	// Close existing connections from vmselect, so the goroutines
+	// processing these connections are finished.
+	s.vmselectConnsMap.CloseAll()
+
+	// Wait until all the goroutines processing vminsert and vmselect conns
+	// are finished.
+	s.vminsertWG.Wait()
+	s.vmselectWG.Wait()
+}
+
+func (s *Server) setIsStopping() {
+	atomic.StoreUint64(&s.stopFlag, 1)
+}
+
+func (s *Server) isStopping() bool {
+	return atomic.LoadUint64(&s.stopFlag) != 0
+}
+
+func (s *Server) processVMInsertConn(r io.Reader) error {
+	sizeBuf := make([]byte, 8)
+	var buf []byte
+	var mrs []storage.MetricRow
+	for {
+		if _, err := io.ReadFull(r, sizeBuf); err != nil {
+			if err == io.EOF {
+				// Remote end gracefully closed the connection.
+				return nil
+			}
+			return fmt.Errorf("cannot read packet size: %s", err)
+		}
+		packetSize := encoding.UnmarshalUint64(sizeBuf)
+		if packetSize > consts.MaxInsertPacketSize {
+			return fmt.Errorf("too big packet size: %d; shouldn't exceed %d", packetSize, consts.MaxInsertPacketSize)
+		}
+		buf = bytesutil.Resize(buf, int(packetSize))
+		if n, err := io.ReadFull(r, buf); err != nil {
+			return fmt.Errorf("cannot read packet with size %d: %s; read only %d bytes", packetSize, err, n)
+		}
+		vminsertPacketsRead.Inc()
+
+		// Read metric rows from the packet.
+		mrs = mrs[:0]
+		tail := buf
+		for len(tail) > 0 {
+			if len(mrs) < cap(mrs) {
+				mrs = mrs[:len(mrs)+1]
+			} else {
+				mrs = append(mrs, storage.MetricRow{})
+			}
+			mr := &mrs[len(mrs)-1]
+			var err error
+			tail, err = mr.Unmarshal(tail)
+			if err != nil {
+				return fmt.Errorf("cannot unmarshal MetricRow: %s", err)
+			}
+		}
+		vminsertMetricsRead.Add(len(mrs))
+		if err := s.storage.AddRows(mrs, uint8(*precisionBits)); err != nil {
+			return fmt.Errorf("cannot store metrics: %s", err)
+		}
+	}
+}
+
+var (
+	vminsertPacketsRead = metrics.NewCounter("vm_vminsert_packets_read_total")
+	vminsertMetricsRead = metrics.NewCounter("vm_vminsert_metrics_read_total")
+)
+
+func (s *Server) processVMSelectConn(bc *handshake.BufferedConn) error {
+	ctx := &vmselectRequestCtx{
+		bc:      bc,
+		sizeBuf: make([]byte, 8),
+	}
+	for {
+		if err := s.processVMSelectRequest(ctx); err != nil {
+			if err == io.EOF {
+				// Remote client gracefully closed the connection.
+				return nil
+			}
+			return fmt.Errorf("cannot process vmselect request: %s", err)
+		}
+		if err := bc.Flush(); err != nil {
+			return fmt.Errorf("cannot flush compressed buffers: %s", err)
+		}
+	}
+}
+
+type vmselectRequestCtx struct {
+	bc      *handshake.BufferedConn
+	sizeBuf []byte
+	dataBuf []byte
+
+	sq   storage.SearchQuery
+	tfss []*storage.TagFilters
+	sr   storage.Search
+}
+
+func (ctx *vmselectRequestCtx) readUint32() (uint32, error) {
+	ctx.sizeBuf = bytesutil.Resize(ctx.sizeBuf, 4)
+	if _, err := io.ReadFull(ctx.bc, ctx.sizeBuf); err != nil {
+		if err == io.EOF {
+			return 0, err
+		}
+		return 0, fmt.Errorf("cannot read uint32: %s", err)
+	}
+	n := encoding.UnmarshalUint32(ctx.sizeBuf)
+	return n, nil
+}
+
+func (ctx *vmselectRequestCtx) readDataBufBytes(maxDataSize int) error {
+	ctx.sizeBuf = bytesutil.Resize(ctx.sizeBuf, 8)
+	if _, err := io.ReadFull(ctx.bc, ctx.sizeBuf); err != nil {
+		if err == io.EOF {
+			return err
+		}
+		return fmt.Errorf("cannot read data size: %s", err)
+	}
+	dataSize := encoding.UnmarshalUint64(ctx.sizeBuf)
+	if dataSize > uint64(maxDataSize) {
+		return fmt.Errorf("too big data size: %d; it mustn't exceed %d bytes", dataSize, maxDataSize)
+	}
+	ctx.dataBuf = bytesutil.Resize(ctx.dataBuf, int(dataSize))
+	if dataSize == 0 {
+		return nil
+	}
+	if n, err := io.ReadFull(ctx.bc, ctx.dataBuf); err != nil {
+		return fmt.Errorf("cannot read data with size %d: %s; read only %d bytes", dataSize, err, n)
+	}
+	return nil
+}
+
+func (ctx *vmselectRequestCtx) readBool() (bool, error) {
+	ctx.dataBuf = bytesutil.Resize(ctx.dataBuf, 1)
+	if _, err := io.ReadFull(ctx.bc, ctx.dataBuf); err != nil {
+		if err == io.EOF {
+			return false, err
+		}
+		return false, fmt.Errorf("cannot read bool: %s", err)
+	}
+	v := ctx.dataBuf[0] != 0
+	return v, nil
+}
+
+func (ctx *vmselectRequestCtx) writeDataBufBytes() error {
+	if err := ctx.writeUint64(uint64(len(ctx.dataBuf))); err != nil {
+		return fmt.Errorf("cannot write data size: %s", err)
+	}
+	if len(ctx.dataBuf) == 0 {
+		return nil
+	}
+	if _, err := ctx.bc.Write(ctx.dataBuf); err != nil {
+		return fmt.Errorf("cannot write data with size %d: %s", len(ctx.dataBuf), err)
+	}
+	return nil
+}
+
+func (ctx *vmselectRequestCtx) writeString(s string) error {
+	ctx.dataBuf = append(ctx.dataBuf[:0], s...)
+	return ctx.writeDataBufBytes()
+}
+
+func (ctx *vmselectRequestCtx) writeUint64(n uint64) error {
+	ctx.sizeBuf = encoding.MarshalUint64(ctx.sizeBuf[:0], n)
+	if _, err := ctx.bc.Write(ctx.sizeBuf); err != nil {
+		return fmt.Errorf("cannot write uint64 %d: %s", n, err)
+	}
+	return nil
+}
+
+const maxRPCNameSize = 128
+
+var zeroTime time.Time
+
+func (s *Server) processVMSelectRequest(ctx *vmselectRequestCtx) error {
+	// Read rpcName
+	// Do not set deadline on reading rpcName, since it may take a
+	// lot of time for idle connection.
+	if err := ctx.readDataBufBytes(maxRPCNameSize); err != nil {
+		if err == io.EOF {
+			// Remote client gracefully closed the connection.
+			return err
+		}
+		return fmt.Errorf("cannot read rpcName: %s", err)
+	}
+
+	// Limit the time required for reading request args.
+	if err := ctx.bc.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
+		return fmt.Errorf("cannot set read deadline for reading request args: %s", err)
+	}
+	defer func() {
+		_ = ctx.bc.SetReadDeadline(zeroTime)
+	}()
+
+	switch string(ctx.dataBuf) {
+	case "search_v3":
+		return s.processVMSelectSearchQuery(ctx)
+	case "labelValues":
+		return s.processVMSelectLabelValues(ctx)
+	case "labelEntries":
+		return s.processVMSelectLabelEntries(ctx)
+	case "labels":
+		return s.processVMSelectLabels(ctx)
+	case "seriesCount":
+		return s.processVMSelectSeriesCount(ctx)
+	case "deleteMetrics_v2":
+		return s.processVMSelectDeleteMetrics(ctx)
+	default:
+		return fmt.Errorf("unsupported rpcName: %q", ctx.dataBuf)
+	}
+}
+
+const maxTagFiltersSize = 64 * 1024
+
+func (s *Server) processVMSelectDeleteMetrics(ctx *vmselectRequestCtx) error {
+	vmselectDeleteMetricsRequests.Inc()
+
+	// Read request
+	if err := ctx.readDataBufBytes(maxTagFiltersSize); err != nil {
+		return fmt.Errorf("cannot read labelName: %s", err)
+	}
+	tail, err := ctx.sq.Unmarshal(ctx.dataBuf)
+	if err != nil {
+		return fmt.Errorf("cannot unmarshal SearchQuery: %s", err)
+	}
+	if len(tail) > 0 {
+		return fmt.Errorf("unexpected non-zero tail left after unmarshaling SearchQuery: (len=%d) %q", len(tail), tail)
+	}
+
+	// Setup ctx.tfss
+	if err := ctx.setupTfss(); err != nil {
+		// Send the error message to vmselect.
+		errMsg := err.Error()
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Delete the given metrics.
+	deletedCount, err := s.storage.DeleteMetrics(ctx.tfss)
+	if err != nil {
+		if err := ctx.writeString(err.Error()); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Send an empty error message to vmselect.
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send empty error message: %s", err)
+	}
+	// Send deletedCount to vmselect.
+	if err := ctx.writeUint64(uint64(deletedCount)); err != nil {
+		return fmt.Errorf("cannot send deletedCount=%d: %s", deletedCount, err)
+	}
+	return nil
+}
+
+func (s *Server) processVMSelectLabels(ctx *vmselectRequestCtx) error {
+	vmselectLabelsRequests.Inc()
+
+	// Read request
+	accountID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read accountID: %s", err)
+	}
+	projectID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read projectID: %s", err)
+	}
+
+	// Search for tag keys
+	labels, err := s.storage.SearchTagKeys(accountID, projectID, *maxTagKeysPerSearch)
+	if err != nil {
+		// Send the error message to vmselect.
+		errMsg := fmt.Sprintf("error during labels search: %s", err)
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Send an empty error message to vmselect.
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send empty error message: %s", err)
+	}
+
+	// Send labels to vmselect
+	for _, label := range labels {
+		if len(label) == 0 {
+			// Do this substitution in order to prevent clashing with 'end of response' marker.
+			label = "__name__"
+		}
+		if err := ctx.writeString(label); err != nil {
+			return fmt.Errorf("cannot write label %q: %s", label, err)
+		}
+	}
+
+	// Send 'end of response' marker
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send 'end of response' marker")
+	}
+	return nil
+}
+
+const maxLabelValueSize = 16 * 1024
+
+func (s *Server) processVMSelectLabelValues(ctx *vmselectRequestCtx) error {
+	vmselectLabelValuesRequests.Inc()
+
+	// Read request
+	accountID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read accountID: %s", err)
+	}
+	projectID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read projectID: %s", err)
+	}
+	if err := ctx.readDataBufBytes(maxLabelValueSize); err != nil {
+		return fmt.Errorf("cannot read labelName: %s", err)
+	}
+	labelName := ctx.dataBuf
+
+	// Search for tag values
+	labelValues, err := s.storage.SearchTagValues(accountID, projectID, labelName, *maxTagValuesPerSearch)
+	if err != nil {
+		// Send the error message to vmselect.
+		errMsg := fmt.Sprintf("error during label values search for labelName=%q: %s", labelName, err)
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Send an empty error message to vmselect.
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send empty error message: %s", err)
+	}
+
+	return writeLabelValues(ctx, labelValues)
+}
+
+func writeLabelValues(ctx *vmselectRequestCtx, labelValues []string) error {
+	for _, labelValue := range labelValues {
+		if len(labelValue) == 0 {
+			// Skip empty label values, since they have no sense for prometheus.
+			continue
+		}
+		if err := ctx.writeString(labelValue); err != nil {
+			return fmt.Errorf("cannot write labelValue %q: %s", labelValue, err)
+		}
+	}
+	// Send 'end of label values' marker
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send 'end of response' marker")
+	}
+	return nil
+}
+
+func (s *Server) processVMSelectLabelEntries(ctx *vmselectRequestCtx) error {
+	vmselectLabelEntriesRequests.Inc()
+
+	// Read request
+	accountID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read accountID: %s", err)
+	}
+	projectID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read projectID: %s", err)
+	}
+
+	// Perform the request
+	labelEntries, err := s.storage.SearchTagEntries(accountID, projectID, *maxTagKeysPerSearch, *maxTagValuesPerSearch)
+	if err != nil {
+		// Send the error message to vmselect.
+		errMsg := fmt.Sprintf("error during label entries search: %s", err)
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Send an empty error message to vmselect.
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send empty error message: %s", err)
+	}
+
+	// Send labelEntries to vmselect
+	for i := range labelEntries {
+		e := &labelEntries[i]
+		label := e.Key
+		if label == "" {
+			// Do this substitution in order to prevent clashing with 'end of response' marker.
+			label = "__name__"
+		}
+		if err := ctx.writeString(label); err != nil {
+			return fmt.Errorf("cannot write label %q: %s", label, err)
+		}
+		if err := writeLabelValues(ctx, e.Values); err != nil {
+			return fmt.Errorf("cannot write label values for %q: %s", label, err)
+		}
+	}
+
+	// Send 'end of response' marker
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send 'end of response' marker")
+	}
+	return nil
+}
+
+func (s *Server) processVMSelectSeriesCount(ctx *vmselectRequestCtx) error {
+	vmselectSeriesCountRequests.Inc()
+
+	// Read request
+	accountID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read accountID: %s", err)
+	}
+	projectID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read projectID: %s", err)
+	}
+
+	// Execute the request
+	n, err := s.storage.GetSeriesCount(accountID, projectID)
+	if err != nil {
+		// Send the error message to vmselect.
+		errMsg := fmt.Sprintf("error during obtaining series count: %s", err)
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Send an empty error message to vmselect.
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send empty error message: %s", err)
+	}
+
+	// Send series count to vmselect.
+	if err := ctx.writeUint64(n); err != nil {
+		return fmt.Errorf("cannot write series count to vmselect: %s", err)
+	}
+	return nil
+}
+
+// maxSearchQuerySize is the maximum size of SearchQuery packet in bytes.
+const maxSearchQuerySize = 1024 * 1024
+
+func (s *Server) processVMSelectSearchQuery(ctx *vmselectRequestCtx) error {
+	vmselectSearchQueryRequests.Inc()
+
+	// Read search query.
+	if err := ctx.readDataBufBytes(maxSearchQuerySize); err != nil {
+		return fmt.Errorf("cannot read searchQuery: %s", err)
+	}
+	tail, err := ctx.sq.Unmarshal(ctx.dataBuf)
+	if err != nil {
+		return fmt.Errorf("cannot unmarshal SearchQuery: %s", err)
+	}
+	if len(tail) > 0 {
+		return fmt.Errorf("unexpected non-zero tail left after unmarshaling SearchQuery: (len=%d) %q", len(tail), tail)
+	}
+	fetchData, err := ctx.readBool()
+	if err != nil {
+		return fmt.Errorf("cannot read `fetchData` bool: %s", err)
+	}
+
+	// Setup search.
+	if err := ctx.setupTfss(); err != nil {
+		// Send the error message to vmselect.
+		errMsg := err.Error()
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+	tr := storage.TimeRange{
+		MinTimestamp: ctx.sq.MinTimestamp,
+		MaxTimestamp: ctx.sq.MaxTimestamp,
+	}
+	ctx.sr.Init(s.storage, ctx.tfss, tr, fetchData, *maxMetricsPerSearch)
+	defer ctx.sr.MustClose()
+	if err := ctx.sr.Error(); err != nil {
+		// Send the error message to vmselect.
+		errMsg := fmt.Sprintf("search error: %s", err)
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Send empty error message to vmselect.
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send empty error message: %s", err)
+	}
+
+	// Send found blocks to vmselect.
+	for ctx.sr.NextMetricBlock() {
+		mb := ctx.sr.MetricBlock
+
+		vmselectMetricBlocksRead.Inc()
+		vmselectMetricRowsRead.Add(mb.Block.RowsCount())
+
+		ctx.dataBuf = mb.Marshal(ctx.dataBuf[:0])
+		if err := ctx.writeDataBufBytes(); err != nil {
+			return fmt.Errorf("cannot send MetricBlock: %s", err)
+		}
+	}
+	if err := ctx.sr.Error(); err != nil {
+		return fmt.Errorf("search error: %s", err)
+	}
+
+	// Send 'end of response' marker
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send 'end of response' marker")
+	}
+	return nil
+}
+
+var (
+	vmselectDeleteMetricsRequests = metrics.NewCounter("vm_vmselect_delete_metrics_requests_total")
+	vmselectLabelsRequests        = metrics.NewCounter("vm_vmselect_labels_requests_total")
+	vmselectLabelValuesRequests   = metrics.NewCounter("vm_vmselect_label_values_requests_total")
+	vmselectLabelEntriesRequests  = metrics.NewCounter("vm_vmselect_label_entries_requests_total")
+	vmselectSeriesCountRequests   = metrics.NewCounter("vm_vmselect_series_count_requests_total")
+	vmselectSearchQueryRequests   = metrics.NewCounter("vm_vmselect_search_query_requests_total")
+	vmselectMetricBlocksRead      = metrics.NewCounter("vm_vmselect_metric_blocks_read_total")
+	vmselectMetricRowsRead        = metrics.NewCounter("vm_vmselect_metric_rows_read_total")
+)
+
+func (ctx *vmselectRequestCtx) setupTfss() error {
+	tfss := ctx.tfss[:0]
+	for _, tagFilters := range ctx.sq.TagFilterss {
+		if len(tfss) < cap(tfss) {
+			tfss = tfss[:len(tfss)+1]
+		} else {
+			tfss = append(tfss, &storage.TagFilters{})
+		}
+		tfs := tfss[len(tfss)-1]
+		tfs.Reset(ctx.sq.AccountID, ctx.sq.ProjectID)
+		for i := range tagFilters {
+			tf := &tagFilters[i]
+			if err := tfs.Add(tf.Key, tf.Value, tf.IsNegative, tf.IsRegexp); err != nil {
+				return fmt.Errorf("cannot parse tag filter %s: %s", tf, err)
+			}
+		}
+	}
+	ctx.tfss = tfss
+	return nil
+}
--- a/dashboards/victoriametrics.json
+++ b/dashboards/victoriametrics.json
--- a/Show More
+++ b/Show More