lib/fs: add MustStopDirRemover for waiting until pending directories are removed on graceful shutdown

This patch is mainly required for laggy NFS. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/162
app/vmselect/promql: ignore grouping by destination label in count_values, since such a grouping is performed automatically
2026-05-17 08:36:55 +03:00 · 2019-09-05 11:17:17 +03:00 · 2019-09-04 19:59:02 +03:00 · 2019-09-04 18:13:51 +03:00 · 2019-09-04 17:17:52 +03:00 · 2019-09-04 16:34:29 +03:00
435 changed files with 46272 additions and 5861 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -0,0 +1,38 @@
+name: main
+on:
+  - push
+jobs:
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Setup Go
+        uses: actions/setup-go@v1
+        with:
+          go-version: 1.12
+        id: go
+      - name: Code checkout
+        uses: actions/checkout@v1
+      - name: Dependencies
+        env:
+          GO111MODULE: off
+        run: |
+          go get -v golang.org/x/lint/golint
+          go get -u github.com/kisielk/errcheck
+      - name: Build
+        env:
+          GO111MODULE: on
+        run: |
+            export PATH=$PATH:$(go env GOPATH)/bin # temporary fix. See https://github.com/actions/setup-go/issues/14
+            make check-all
+            git diff --exit-code
+            make test-full
+            make test-pure
+            make vminsert vmselect vmstorage
+            make vminsert-pure vmselect-pure vmstorage-pure
+            GOOS=freebsd go build -mod=vendor ./app/vminsert
+            GOOS=freebsd go build -mod=vendor ./app/vmselect
+            GOOS=freebsd go build -mod=vendor ./app/vmstorage
+            GOOS=darwin go build -mod=vendor ./app/vminsert
+            GOOS=darwin go build -mod=vendor ./app/vmselect
+            GOOS=darwin go build -mod=vendor ./app/vmstorage
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,10 @@
 /victoria-metrics-data
 /vmstorage-data
 /vmselect-cache
+.DS_Store
+
+
+### terraform
+terraform.tfstate
+terraform.tfstate.*
+.terraform/
--- a/74
+++ b/74
@@ -1,7 +1,7 @@
 PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics

 BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
-	      git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | sha1sum | grep -oP '^.{8}')))
+	      git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -c 10-17)))

 PKG_TAG ?= $(shell git tag -l --points-at HEAD)
 ifeq ($(PKG_TAG),)
@@ -11,24 +11,45 @@ endif
 GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(shell date -u +'%Y%m%d-%H%M%S')-$(BUILDINFO_TAG)'

 all: \
-	victoria-metrics-prod
+	vminsert \
+	vmselect \
+	vmstorage
+
+all-pure: \
+	vminsert-pure \
+	vmselect-pure \
+	vmstorage-pure

 include app/*/Makefile
 include deployment/*/Makefile
+include deployment/*/helm/Makefile

 clean:
 	rm -rf bin/*

-release: victoria-metrics-prod
-	cd bin && tar czf victoria-metrics-$(PKG_TAG).tar.gz victoria-metrics-prod
+publish: \
+	publish-vmstorage \
+	publish-vmselect \
+	publish-vminsert
+
+package: \
+	package-vmstorage \
+	package-vmselect \
+	package-vminsert
+
+release: \
+	vminsert-prod \
+	vmselect-prod \
+	vmstorage-prod
+	cd bin && tar czf victoria-metrics-$(PKG_TAG).tar.gz vminsert-prod vmselect-prod vmstorage-prod

 fmt:
-	go fmt $(PKG_PREFIX)/lib/...
-	go fmt $(PKG_PREFIX)/app/...
+	GO111MODULE=on gofmt -l -w -s ./lib
+	GO111MODULE=on gofmt -l -w -s ./app

 vet:
-	go vet $(PKG_PREFIX)/lib/...
-	go vet $(PKG_PREFIX)/app/...
+	GO111MODULE=on go vet -mod=vendor ./lib/...
+	GO111MODULE=on go vet -mod=vendor ./app/...

 lint: install-golint
 	golint lib/...
@@ -46,19 +67,46 @@ errcheck: install-errcheck
 install-errcheck:
 	which errcheck || GO111MODULE=off go get -u github.com/kisielk/errcheck

+check-all: fmt vet lint errcheck golangci-lint
+
 test:
-	go test $(PKG_PREFIX)/lib/...
+	GO111MODULE=on go test -mod=vendor ./lib/... ./app/...
+
+test-pure:
+	GO111MODULE=on CGO_ENABLED=0 go test -mod=vendor ./lib/... ./app/...
+
+test-full:
+	GO111MODULE=on go test -mod=vendor -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...

 benchmark:
-	go test -bench=. $(PKG_PREFIX)/lib/...
+	GO111MODULE=on go test -mod=vendor -bench=. ./lib/...
+	GO111MODULE=on go test -mod=vendor -bench=. ./app/...
+
+benchmark-pure:
+	GO111MODULE=on CGO_ENABLED=0 go test -mod=vendor -bench=. ./lib/...
+	GO111MODULE=on CGO_ENABLED=0 go test -mod=vendor -bench=. ./app/...

 vendor-update:
-	go get -u
-	go mod tidy
-	go mod vendor
+	GO111MODULE=on go get -u ./lib/...
+	GO111MODULE=on go get -u ./app/...
+	GO111MODULE=on go mod tidy
+	GO111MODULE=on go mod vendor
+
+app-local:
+	CGO_ENABLED=1 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
+
+app-local-pure:
+	CGO_ENABLED=0 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-pure$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)

 quicktemplate-gen: install-qtc
 	qtc

 install-qtc:
 	which qtc || GO111MODULE=off go get -u github.com/valyala/quicktemplate/qtc
+
+
+golangci-lint: install-golangci-lint
+	golangci-lint run --exclude '(SA4003|SA1019):' -D errcheck
+
+install-golangci-lint:
+	which golangci-lint || GO111MODULE=off go get -u github.com/golangci/golangci-lint/cmd/golangci-lint
--- a/README.md
+++ b/README.md
@@ -1,386 +1,213 @@
-<img  text-align="center" alt="Victoria Metrics" src="logo.png">
+<img alt="Victoria Metrics" src="logo.png">

-## Single-node VictoriaMetrics
+# Cluster version of VictoriaMetrics

-[![Latest Release](https://img.shields.io/github/release/VictoriaMetrics/VictoriaMetrics.svg?style=flat-square)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
+VictoriaMetrics is fast, cost-effective and scalable time series database. It can be used as a long-term remote storage for Prometheus.

-VictoriaMetrics is a long-term remote storage for Prometheus.
-It is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
-[docker images](https://hub.docker.com/r/valyala/victoria-metrics/) and
-in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
-
-Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
+It is recommended using [single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics) instead of cluster version
+for ingestion rates lower than 10 million of data points per second.
+Single-node version [scales perfectly](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae)
+with the number of CPU cores, RAM and available storage space.
+Single-node version is easier to configure and operate comparing to cluster version, so think twice before sticking to cluster version.


 ## Prominent features

-* Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
-  Additionally, VictoriaMetrics extends PromQL with opt-in [useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL).
-* High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
-  and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
-  [Outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
-* [Uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) when working with millions of unique time series (aka high cardinality).
-* High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
-  may be crammed into a limited storage comparing to TimescaleDB.
-* Optimized for storage with high-latency IO and low iops (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
-* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, Uber M3, Cortex, InfluxDB or TimescaleDB.
-  See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
-* Easy operation:
-  * VictoriaMetrics consists of a single executable without external dependencies.
-  * All the configuration is done via explicit command-line flags with reasonable defaults.
-  * All the data is stored in a single directory pointed by `-storageDataPath` flag.
-  * Easy backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
-* Storage is protected from corruption on unclean shutdown (i.e. hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
-* Supports metrics' ingestion and backfilling via the following protocols:
-  * [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
-  * [InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
-  * [Graphite plaintext protocol](https://graphite.readthedocs.io/en/latest/feeding-carbon.html) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
-    if `-graphiteListenAddr` is set.
-  * [OpenTSDB put message](http://opentsdb.net/docs/build/html/api_telnet/put.html) if `-opentsdbListenAddr` is set.
-* Ideally works with big amounts of time series data from IoT sensors, connected car sensors and industrial sensors.
-* Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
+- Supports all the features of [single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics).
+- Scales horizontally to multiple nodes.
+- Supports multiple independent namespaces for time series data (aka multi-tenancy).


-## Operation
+## Architecture overview
+
+VictoriaMetrics cluster consists of the following services:
+
+- `vmstorage` - stores the data
+- `vminsert` - proxies the ingested data to `vmstorage` shards using consistent hashing
+- `vmselect` - performs incoming queries using the data from `vmstorage`
+
+Each service may scale independently and may run on the most suitable hardware.
+
+<img src="https://docs.google.com/drawings/d/e/2PACX-1vTvk2raU9kFgZ84oF-OKolrGwHaePhHRsZEcfQ1I_EC5AB_XPWwB392XshxPramLJ8E4bqptTnFn5LL/pub?w=1104&amp;h=746">


-### Table of contents
+## Building from sources

-* [How to build from sources](#how-to-build-from-sources)
-* [How to start VictoriaMetrics](#how-to-start-victoriametrics)
-* [Prometheus setup](#prometheus-setup)
-* [Grafana setup](#grafana-setup)
-* [How to send data from InfluxDB-compatible agents such as Telegraf](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
-* [How to send data from Graphite-compatible agents such as StatsD](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
-* [How to send data from OpenTSDB-compatible agents](#how-to-send-data-from-opentsdb-compatible-agents)
-* [How to apply new config / ugrade VictoriaMetrics](#how-to-apply-new-config--upgrade-victoriametrics)
-* [How to work with snapshots](#how-to-work-with-snapshots)
-* [How to delete time series](#how-to-delete-time-series)
-* [How to export time series](#how-to-export-time-series)
-* [Federation](#federation)
-* [Capacity planning](#capacity-planning)
-* [High Availability](#high-availability)
-* [Multiple retentions](#multiple-retentions)
-* [Scalability and cluster version](#scalability-and-cluster-version)
-* [Security](#security)
-* [Tuning](#tuning)
-* [Monitoring](#monitoring)
-* [Troubleshooting](#troubleshooting)
-* [Community and contributions](#community-and-contributions)
-* [Reporting bugs](#reporting-bugs)
+Source code for cluster version is available at [cluster branch](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).


-### How to build from sources
+### Development Builds

-We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
-[docker images](https://hub.docker.com/r/valyala/victoria-metrics/) instead of building VictoriaMetrics
-from sources. Building from sources is reasonable when developing an additional features specific
-to your needs.
+1. [Install go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
+2. Run `make` from the repository root. It should build `vmstorage`, `vmselect`
+   and `vminsert` binaries and put them into the `bin` folder.


-#### Development build
+### Production builds

-1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
-2. Run `go build ./app/victoria-metrics` from the root folder of the repository.
-   It will build `victoria-metrics` binary in the root folder of the repository.
+There is no need in installing Go on a host system since binaries are built
+inside [the official docker container for Go](https://hub.docker.com/_/golang).
+This makes reproducible builds.
+So [install docker](https://docs.docker.com/install/) and run the following command:

-#### Production build
+```
+make vminsert-prod vmselect-prod vmstorage-prod
+```

-1. [Install docker](https://docs.docker.com/install/).
-2. Run `make victoria-metrics-prod` from the root folder of the respository.
-   It will build `victoria-metrics-prod` binary and put it into the `bin` folder.
+Production binaries are built into statically linked binaries for `GOARCH=amd64`, `GOOS=linux`.
+They are put into `bin` folder with `-prod` suffixes:
+```
+$ make vminsert-prod vmselect-prod vmstorage-prod
+$ ls -1 bin
+vminsert-prod
+vmselect-prod
+vmstorage-prod
+```

-#### Building docker images
+### Building docker images
+
+Run `make package`. It will build the following docker images locally:
+
+* `victoriametrics/vminsert:<PKG_TAG>`
+* `victoriametrics/vmselect:<PKG_TAG>`
+* `victoriametrics/vmstorage:<PKG_TAG>`

-Run `make package-victoria-metrics`. It will build `valyala/victoria-metrics:<PKG_TAG>` docker image locally.
 `<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
 The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package`.



-### How to start VictoriaMetrics
+## Operation

-Just start VictoriaMetrics executable or docker image with the desired command-line flags.
+### Cluster setup

-The following command line flags are used the most:
+A minimal cluster must contain the following nodes:

-* `-storageDataPath` - path to data directory. VictoriaMetrics stores all the data in this directory.
-* `-retentionPeriod` - retention period in months for the data. Older data is automatically deleted.
-* `-httpListenAddr` - TCP address to listen to for http requests. By default it listens port `8428` on all the network interfaces.
-* `-graphiteListenAddr` - TCP and UDP address to listen to for Graphite data. By default it is disabled.
-* `-opentsdbListenAddr` - TCP and UDP address to listen to for OpenTSDB data. By default it is disabled.
+* a single `vmstorage` node with `-retentionPeriod` and `-storageDataPath` flags
+* a single `vminsert` node with `-storageNode=<vmstorage_host>:8400`
+* a single `vmselect` node with `-storageNode=<vmstorage_host>:8401`

-Pass `-help` to see all the available flags with description and default values.
+It is recommended to run at least two nodes for each service
+for high availability purposes.

+An http load balancer must be put in front of `vminsert` and `vmselect` nodes:
+- requests starting with `/insert` must be routed to port `8480` on `vminsert` nodes.
+- requests starting with `/select` must be routed to port `8481` on `vmselect` nodes.

-### Prometheus setup
+Ports may be altered by setting `-httpListenAddr` on the corresponding nodes.

-Add the following lines to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`):

-```yml
-remote_write:
-  - url: http://<victoriametrics-addr>:8428/api/v1/write
-    queue_config:
-      max_samples_per_send: 10000
-```
+### URL format

-Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
-Then apply the new config via the following command:
+* URLs for data ingestion: `/insert/<accountID>/<suffix>`, where:
+  - `<accountID>` is an arbitrary number identifying namespace for data ingestion (aka tenant)
+  - `<suffix>` may have the following values:
+     - `prometheus` - for inserting data with [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
+     - `influx/write` or `influx/api/v2/write` - for inserting data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)

-```
-kill -HUP `pidof prometheus`
-```
+* URLs for querying: `/select/<accountID>/prometheus/<suffix>`, where:
+  - `<accountID>` is an arbitrary number identifying data namespace for the query (aka tenant)
+  - `<suffix>` may have the following values:
+    - `api/v1/query` - performs [PromQL instant query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries)
+    - `api/v1/query_range` - performs [PromQL range query](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries)
+    - `api/v1/series` - performs [series query](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)
+    - `api/v1/labels` - returns a [list of label names](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names)
+    - `api/v1/label/<label_name>/values` - returns values for the given `<label_name>` according [to API](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values)
+    - `federate` - returns [federated metrics](https://prometheus.io/docs/prometheus/latest/federation/)
+    - `api/v1/export` - exports raw data. See [this article](https://medium.com/@valyala/analyzing-prometheus-data-with-external-tools-5f3e5e147639) for details

-Prometheus writes incoming data to local storage and to remote storage in parallel.
-This means the data remains available in local storage for `--storage.tsdb.retention.time` duration
-if remote storage stops working.
+* `vmstorage` nodes provide the following HTTP endpoints on `8482` port:
+  - `/snapshot/create` - create [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282),
+    which can be used for backups in background. Snapshots are created in `<storageDataPath>/snapshots` folder, where `<storageDataPath>` is the corresponding
+    command-line flag value.
+  - `/snapshot/list` - list available snasphots.
+  - `/snapshot/delete?snapshot=<id>` - delete the given snapshot.
+  - `/snapshot/delete_all` - delete all the snapshots.

-If you plan sending data to VictoriaMetrics from multiple Prometheus instances, then add the following lines into `global` section
-of [Prometheus config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file):
+  Snapshots may be created independently on each `vmstorage` node. There is no need in synchronizing snapshots' creation
+  across `vmstorage` nodes.

-```yml
-global:
-  external_labels:
-    datacenter: dc-123
-```

-This instructs Prometheus to add `datacenter=dc-123` label to each time series sent to remote storage.
-The label name may be arbitrary - `datacenter` is just an example. The label value must be unique
-across Prometheus instances, so time series may be filtered and grouped by this label.
+### Cluster resizing

+* `vminsert` and `vmselect` nodes are stateless and may be added / removed at any time.
+  Do not forget updating the list of these nodes on http load balancer.
+* `vmstorage` nodes own the ingested data, so they cannot be removed without data loss.

-### Grafana setup
+Steps to add `vmstorage` node:

-Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following Url:
+1. Start new `vmstorage` node with the same `-retentionPeriod` as existing nodes in the cluster.
+2. Gradually restart all the `vmselect` nodes with new `-storageNode` arg containing `<new_vmstorage_host>:8401`.
+3. Gradually restart all the `vminsert` nodes with new `-storageNode` arg containing `<new_vmstorage_host>:8400`.

-```
-http://<victoriametrics-addr>:8428
-```

-Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
+### Cluster availability

-Then build graphs with the created datasource using [Prometheus query language](https://prometheus.io/docs/prometheus/latest/querying/basics/).
-VictoriaMetrics supports native PromQL and [extends it with useful features](ExtendedPromQL).
+* HTTP load balancer must stop routing requests to unavailable `vminsert` and `vmselect` nodes.
+* The cluster remains available if at least a single `vmstorage` node exists:

+  - `vminsert` re-routes incoming data from unavailable `vmstorage` nodes to healthy `vmstorage` nodes
+  - `vmselect` continues serving partial responses if at least a single `vmstorage` node is available.

-### How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)?

-Just use `http://<victoriametric-addr>:8428` url instead of InfluxDB url in agents' configs.
-For instance, put the following lines into `Telegraf` config, so it sends data to VictoriaMetrics instead of InfluxDB:
+### Updating / reconfiguring cluster nodes

-```
-[[outputs.influxdb]]
-  urls = ["http://<victoriametrics-addr>:8428"]
-```
+All the node types - `vminsert`, `vmselect` and `vmstorage` - may be updated via graceful shutdown.
+Send `SIGINT` signal to the corresponding process, wait until it finishes and then start new version
+with new configs.

-Do not forget substituting `<victoriametrics-addr>` with the real address where VictoriaMetrics runs.
+Cluster should remain in working state if at least a single node of each type remains available during
+the update process. See [cluster availability](#cluster-availability) section for details.

-VictoriaMetrics maps Influx data using the following rules:
-* [`db` query arg](https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint) is mapped into `db` label value
-* Field names are mapped to time series names prefixed by `{measurement}.` value
-* Field values are mapped to time series values
-* Tags are mapped to Prometheus labels as-is

+### Helm

-### How to send data from Graphite-compatible agents such as [StatsD](https://github.com/etsy/statsd)?
+Helm chart simplifies managing cluster version of VictoriaMetrics in Kubernetes.
+It is available in the `deployment/k8s/helm/victoria-metrics` folder.

-1) Enable Graphite receiver in VictoriaMetrics by setting `-graphiteListenAddr` command line flag. For instance,
-the following command will enable Graphite receiver in VictoriaMetrics on TCP and UDP port `2003`:
+1. Install Cluster: `helm install -n <NAME> deployment/k8s/helm/victoria-mertrics` or `ENV=<NAME> make helm-install`.
+2. Upgrade Cluster: `helm upgrade <NAME> deployment/k8s/helm/victoria-mertrics` or `ENV=<NAME> make helm-upgrade`.
+3. Delete Cluster: `helm del --purge <NAME>` or `ENV=<NAME> make helm-delete`.

-```
-/path/to/victoria-metrics-prod ... -graphiteListenAddr=:2003
-```
+Upgrade follows `Cluster resizing procedure` under the hood.

-2) Use the configured address in Graphite-compatible agents. For instance, set `graphiteHost`
-to the VictoriaMetrics host in `StatsD` configs.

+### Replication and data safety

-### How to send data from OpenTSDB-compatible agents?
+VictoriaMetrics offloads replication to the underlying storage pointed by `-storageDataPath`.
+It is recommended storing data on [Google Compute Engine persistent disks](https://cloud.google.com/compute/docs/disks/#pdspecs),
+since they are protected from data loss and data corruption. They also provide consistently high performance
+and [may be resized](https://cloud.google.com/compute/docs/disks/add-persistent-disk) without downtime.
+HDD-based persistent disks should be enough for the majority of use cases.

-1) Enable OpenTSDB receiver in VictoriaMetrics by setting `-opentsdbListenAddr` command line flag. For instance,
-the following command will enable OpenTSDB receiver in VictoriaMetrics on TCP and UDP port `4242`:
+It is recommended using durable replicated persistent volumes in Kubernetes.

-```
-/path/to/victoria-metrics-prod ... -opentsdbListenAddr=:4242
-```

-2) Send data to the given address from OpenTSDB-compatible agents.
+### Backups

+It is recommended performing periodical backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282)
+for protecting from user errors such as accidental data deletion.

-### How to apply new config / upgrade VictoriaMetrics?
+The following steps must be performed for each `vmstorage` node for creating a backup:

-VictoriaMetrics must be restarted in order to upgrade or apply new config:
+1. Create an instant snapshot by navigating to `/snapshot/create` HTTP handler. It will create snapshot and return its name.
+2. Archive the created snapshot from `<-storageDataPath>/snapshots/<snapshot_name>` folder using any suitable tool that follows symlinks. For instance,
+   `cp -L`, `rsync -L` or `scp -r`. The archival process doesn't interfere with `vmstorage` work, so it may be performed at any suitable time.
+   Incremental backups are possible with `rsync --delete`, which should [remove extraneous files from backup dir](https://askubuntu.com/questions/476041/how-do-i-make-rsync-delete-files-that-have-been-deleted-from-the-source-folder).
+3. Delete unused snapshots via `/snapshot/delete?snapshot=<snapshot_name>` or `/snapshot/delete_all` in order to free up occupied storage space.

-1) Send `SIGINT` signal to VictoriaMetrics process in order to gracefully stop it.
-2) Wait until the process stops. This can take a few seconds.
-3) Start the upgraded VictoriaMetrics with new config.
+There is no need in synchronizing backups among all the `vmstorage` nodes.

+Restoring from backup:

-### How to work with snapshots?
-
-Navigate to `http://<victoriametrics-addr>:8428/snapshot/create` in order to create an instant snapshot.
-The page will return the following JSON response:
-
-```
-{"status":"ok","snapshot":"<snapshot-name>"}
-```
-
-Snapshots are created under `<-storageDataPath>/snapshots` directory, where `<-storageDataPath>`
-is the command-line flag value. Snapshots can be archived to backup storage via `rsync -L`, `scp -r`
-or any similar tool that follows symlinks during copying.
-
-The `http://<victoriametrics-addr>:8428/snapshot/list` page contains the list of available snapshots.
-
-Navigate to `http://<victoriametrics-addr>:8428/snapshot/delete?snapshot=<snapshot-name>` in order
-to delete `<snapshot-name>` snapshot.
-
-Navigate to `http://<victoriametrics-addr>:8428/snapshot/delete_all` in order to delete all the snapshots.
-
-
-### How to delete time series?
-
-Send a request to `http://<victoriametrics-addr>:8428/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`,
-where `<timeseries_selector_for_delete>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
-for metrics to delete. After that all the time series matching the given selector are deleted. Storage space for
-the deleted time series isn't freed instantly - it is freed during subsequent merges of data files.
-
-
-### How to export time series?
-
-Send a request to `http://<victoriametrics-addr>:8428/api/v1/export?match[]=<timeseries_selector_for_export>`,
-where `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
-for metrics to export. The response would contain all the data for the selected time series in [JSON streaming format](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON).
-Each JSON line would contain data for a single time series. An example output:
-
-```
-{"metric":{"__name__":"up","job":"node_exporter","instance":"localhost:9100"},"values":[0,0,0],"timestamps":[1549891472010,1549891487724,1549891503438]}
-{"metric":{"__name__":"up","job":"prometheus","instance":"localhost:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
-```
-
-Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
-unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
-
-
-### Federation
-
-VictoriaMetrics exports [Prometheus-compatible federation data](https://prometheus.io/docs/prometheus/latest/federation/)
-at `http://<victoriametrics-addr>:8428/federate?match[]=<timeseries_selector_for_federation>`.
-
-Optional `start` and `end` args may be added to the request in order to scrape the last point for each selected time series on the `[start ... end]` interval.
-`start` and `end` may contain either unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values. By default the last point
-on the interval `[now - max_lookback ... now]` is scraped for each time series. Default value for `max_lookback` is `5m` (5 minutes), but can be overriden.
-For instance, `/federate?match[]=up&max_lookback=1h` would return last points on the `[now - 1h ... now]` interval. This may be useful for time series federation
-with scrape intervals exceeding `5m`.
-
-
-### Capacity planning
-
-Rough estimation of the required resources:
-
-* RAM size: less than 1KB per active time series. So, ~1GB of RAM is required for 1M active time series.
-  Time series is considered active if new data points have been added to it recently or if it has been recently queried.
-  VictoriaMetrics stores various caches in RAM. Memory size for these caches may be limited with `-memory.allowedPercent` flag.
-* CPU cores: a CPU core per 300K inserted data points per second. So, ~4 CPU cores are required for processing
-  the insert stream of 1M data points per second.
-  If you see lower numbers per CPU core, then it is likely active time series info doesn't fit caches,
-  so you need more RAM for lowering CPU usage.
-* Storage size: less than a byte per data point on average. So, ~260GB is required for storing a month-long insert stream
-  of 100K data points per second.
-  The actual storage size heavily depends on data randomness (entropy). Higher randomness means higher storage size requirements.
-
-
-### High availability
-
-1) Install multiple VictoriaMetrics instances in distinct datacenters.
-2) Add addresses of these instances to `remote_write` section in Prometheus config:
-
-```yml
-remote_write:
-  - url: http://<victoriametrics-addr-1>:8428/api/v1/write
-    queue_config:
-      max_samples_per_send: 10000
-  # ...
-  - url: http://<victoriametrics-addr-N>:8428/api/v1/write
-    queue_config:
-      max_samples_per_send: 10000
-```
-
-3) Apply the updated config:
-
-```
-kill -HUP `pidof prometheus`
-```
-
-4) Now Prometheus should write data into all the configured `remote_write` urls in parallel.
-5) Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas.
-6) Set up Prometheus datasource in Grafana that points to Promxy.
-
-
-### Multiple retentions
-
-Just start multiple VictoriaMetrics instances with distinct values for the following flags:
-
-* `-retentionPeriod`
-* `-storageDataPath`, so the data for each retention period is saved in a separate directory
-* `-httpListenAddr`, so clients may reach VictoriaMetrics instance with proper retention
-
-
-### Scalability and cluster version
-
-Though single-node VictoriaMetrics cannot scale to multiple nodes, it is optimized for resource usage - storage size / bandwidth / IOPS, RAM, CPU.
-This means that a single-node VictoriaMetrics may scale vertically and substitute moderately sized cluster built with competing solutions
-such as Thanos, Uber M3, InfluxDB or TimescaleDB.
-
-So try single-node VictoriaMetrics at first and then [switch to cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster) if you still need
-horizontally scalable long-term remote storage for really large Prometheus deployments.
-[Contact us](mailto:info@victoriametrics.com) for paid support.
-
-
-### Security
-
-Do not forget protecting sensitive endpoints in VictoriaMetrics when exposing it to untrusted networks such as internet.
-Consider setting the following command-line flags:
-
-* `-tls`, `-tlsCertFile` and `-tlsKeyFile` for switching from HTTP to HTTPS.
-* `-httpAuth.username` and `-httpAuth.password` for protecting all the HTTP endpoints
-  with [HTTP Basic Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication).
-* `-deleteAuthKey` for protecting `/api/v1/admin/tsdb/delete_series` endpoint. See [how to delete time series](#how-to-delete-time-series).
-* `-snapshotAuthKey` for protecting `/snapshot*` endpoints. See [how to work with snapshots](#how-to-work-with-snapshots).
-
-Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
-For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`.
-
-
-### Tuning
-
-* There is no need in VictoriaMetrics tuning, since it uses reasonable defaults for command-line flags,
-  which are automatically adjusted for the available CPU and RAM resources.
-* There is no need in Operating System tuning, since VictoriaMetrics is optimized for default OS settings.
-  The only option is increasing the limit on [the number open files in the OS](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a),
-  so Prometheus instances could establish more connections to VictoriaMetrics.
-
-
-### Monitoring
-
-VictoriaMetrics exports internal metrics in Prometheus format on the `/metrics` page.
-Add this page to Prometheus' scrape config in order to collect VictoriaMetrics metrics.
-There is [an official Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229).
-
-
-### Troubleshooting
-
-* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
-  then it is likely you have too many active time series for the current amount of RAM.
-  It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
-  ingestion performance.
-  Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
-  option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
+1. Stop `vmstorage` node with `kill -INT`.
+2. Delete all the contents of the directory pointed by `-storageDataPath` command-line flag.
+3. Copy all the contents of the backup directory to `-storageDataPath` directory.
+4. Start `vmstorage` node.


 ## Community and contributions

-Feel free asking any questions regarding VictoriaMetrics [here](https://groups.google.com/forum/#!forum/victorametrics-users).
-
 We are open to third-party pull requests provided they follow [KISS design principle](https://en.wikipedia.org/wiki/KISS_principle):

 - Prefer simple code and architecture.
@@ -392,6 +219,17 @@ We are open to third-party pull requests provided they follow [KISS design princ

 Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people.

+Due to `KISS` cluster version of VictoriaMetrics has no the following "features" popular in distributed computing world:
+
+- Fragile gossip protocols. See [failed attempt in Thanos](https://github.com/improbable-eng/thanos/blob/030bc345c12c446962225221795f4973848caab5/docs/proposals/completed/201809_gossip-removal.md).
+- Hard-to-understand-and-implement-properly [Paxos protocols](https://www.quora.com/In-distributed-systems-what-is-a-simple-explanation-of-the-Paxos-algorithm).
+- Complex replication schemes, which may go nuts in unforesseen edge cases. The replication is offloaded to the underlying durable replicated storage
+  such as [persistent disks in Google Compute Engine](https://cloud.google.com/compute/docs/disks/#pdspecs).
+- Automatic data reshuffling between storage nodes, which may hurt cluster performance and availability.
+- Automatic cluster resizing, which may cost you a lot of money if improperly configured.
+- Automatic discovering and addition of new nodes in the cluster, which may mix data between dev and prod clusters :)
+- Automatic leader election, which may result in split brain disaster on network errors.
+

 ## Reporting bugs

--- a/app/victoria-metrics/Makefile
+++ b/app/victoria-metrics/Makefile
@@ -1,21 +0,0 @@
-# All these commands must run from repository root.
-
-victoria-metrics-prod:
-	APP_NAME=victoria-metrics $(MAKE) app-via-docker
-
-package-victoria-metrics:
-	APP_NAME=victoria-metrics \
-	$(MAKE) package-via-docker
-
-publish-victoria-metrics:
-	APP_NAME=victoria-metrics $(MAKE) publish-via-docker
-
-run-victoria-metrics:
-	mkdir -p victoria-metrics-data
-	DOCKER_OPTS='-v $(shell pwd)/victoria-metrics-data:/victoria-metrics-data -p 8428:8428 -p 2003:2003 -p 2003:2003/udp' \
-	APP_NAME=victoria-metrics \
-	ARGS='-graphiteListenAddr=:2003 -opentsdbListenAddr=:4242 -retentionPeriod=12 -search.maxUniqueTimeseries=1000000 -search.maxQueryDuration=10m' \
-	$(MAKE) run-via-docker
-
-victoria-metrics-arm:
-	CC=arm-linux-gnueabi-gcc CGO_ENABLED=1 GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/victoria-metrics-arm ./app/victoria-metrics
--- a/app/victoria-metrics/main.go
+++ b/app/victoria-metrics/main.go
@@ -1,60 +0,0 @@
-package main
-
-import (
-	"flag"
-	"net/http"
-	"time"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
-)
-
-var httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections")
-
-func main() {
-	flag.Parse()
-	buildinfo.Init()
-	logger.Init()
-	logger.Infof("starting VictoraMetrics at %q...", *httpListenAddr)
-	startTime := time.Now()
-	vmstorage.Init()
-	vmselect.Init()
-	vminsert.Init()
-
-	go httpserver.Serve(*httpListenAddr, requestHandler)
-	logger.Infof("started VictoriaMetrics in %s", time.Since(startTime))
-
-	sig := procutil.WaitForSigterm()
-	logger.Infof("received signal %s", sig)
-
-	logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
-	startTime = time.Now()
-	if err := httpserver.Stop(*httpListenAddr); err != nil {
-		logger.Fatalf("cannot stop the webservice: %s", err)
-	}
-	vminsert.Stop()
-	logger.Infof("successfully shut down the webservice in %s", time.Since(startTime))
-
-	vmstorage.Stop()
-	vmselect.Stop()
-
-	logger.Infof("the VictoriaMetrics has been stopped in %s", time.Since(startTime))
-}
-
-func requestHandler(w http.ResponseWriter, r *http.Request) bool {
-	if vminsert.RequestHandler(w, r) {
-		return true
-	}
-	if vmselect.RequestHandler(w, r) {
-		return true
-	}
-	if vmstorage.RequestHandler(w, r) {
-		return true
-	}
-	return false
-}
--- a/app/vminsert/Makefile
+++ b/app/vminsert/Makefile
@@ -0,0 +1,36 @@
+# All these commands must run from repository root.
+
+run-vminsert:
+	APP_NAME=vminsert \
+	ARGS='-storageNode=localhost:8400' \
+	$(MAKE) run-via-docker
+
+vminsert:
+	APP_NAME=vminsert $(MAKE) app-local
+
+vminsert-race:
+	APP_NAME=vminsert RACE=-race $(MAKE) app-local
+
+vminsert-prod:
+	APP_NAME=vminsert $(MAKE) app-via-docker
+
+vminsert-prod-race:
+	APP_NAME=vminsert RACE=-race $(MAKE) app-via-docker
+
+vminsert-pure:
+	APP_NAME=vminsert $(MAKE) app-local-pure
+
+vminsert-pure-prod:
+	APP_NAME=vminsert APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
+
+package-vminsert:
+	APP_NAME=vminsert $(MAKE) package-via-docker
+
+package-vminsert-race:
+	APP_NAME=vminsert RACE=-race $(MAKE) package-via-docker
+
+publish-vminsert:
+	APP_NAME=vminsert $(MAKE) publish-via-docker
+
+publish-vminsert-race:
+	APP_NAME=vminsert RACE=-race $(MAKE) publish-via-docker
--- a/app/vminsert/README.md
+++ b/app/vminsert/README.md
@@ -1 +1 @@
-`vminsert` routes the ingested data to `vmstorage`.
+`vminsert` routes the ingested data to `vmstorage` nodes.
--- a/app/vminsert/common/gzip_reader.go
+++ b/app/vminsert/common/gzip_reader.go
@@ -0,0 +1,30 @@
+package common
+
+import (
+	"compress/gzip"
+	"io"
+	"sync"
+)
+
+// GetGzipReader returns new gzip reader from the pool.
+//
+// Return back the gzip reader when it no longer needed with PutGzipReader.
+func GetGzipReader(r io.Reader) (*gzip.Reader, error) {
+	v := gzipReaderPool.Get()
+	if v == nil {
+		return gzip.NewReader(r)
+	}
+	zr := v.(*gzip.Reader)
+	if err := zr.Reset(r); err != nil {
+		return nil, err
+	}
+	return zr, nil
+}
+
+// PutGzipReader returns back gzip reader obtained via GetGzipReader.
+func PutGzipReader(zr *gzip.Reader) {
+	_ = zr.Close()
+	gzipReaderPool.Put(zr)
+}
+
+var gzipReaderPool sync.Pool
--- a/app/vminsert/common/insert_ctx.go
+++ b/app/vminsert/common/insert_ctx.go
@@ -1,106 +0,0 @@
-package common
-
-import (
-	"fmt"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
-)
-
-// InsertCtx contains common bits for data points insertion.
-type InsertCtx struct {
-	Labels []prompb.Label
-
-	mrs            []storage.MetricRow
-	metricNamesBuf []byte
-}
-
-// Reset resets ctx for future fill with rowsLen rows.
-func (ctx *InsertCtx) Reset(rowsLen int) {
-	for _, label := range ctx.Labels {
-		label.Name = nil
-		label.Value = nil
-	}
-	ctx.Labels = ctx.Labels[:0]
-
-	for i := range ctx.mrs {
-		mr := &ctx.mrs[i]
-		mr.MetricNameRaw = nil
-	}
-	ctx.mrs = ctx.mrs[:0]
-
-	if n := rowsLen - cap(ctx.mrs); n > 0 {
-		ctx.mrs = append(ctx.mrs[:cap(ctx.mrs)], make([]storage.MetricRow, n)...)
-	}
-	ctx.mrs = ctx.mrs[:rowsLen]
-	ctx.metricNamesBuf = ctx.metricNamesBuf[:0]
-}
-
-func (ctx *InsertCtx) marshalMetricNameRaw(prefix []byte, labels []prompb.Label) []byte {
-	start := len(ctx.metricNamesBuf)
-	ctx.metricNamesBuf = append(ctx.metricNamesBuf, prefix...)
-	ctx.metricNamesBuf = storage.MarshalMetricNameRaw(ctx.metricNamesBuf, labels)
-	metricNameRaw := ctx.metricNamesBuf[start:]
-	return metricNameRaw[:len(metricNameRaw):len(metricNameRaw)]
-}
-
-// WriteDataPoint writes (timestamp, value) with the given prefix and lables into ctx buffer.
-func (ctx *InsertCtx) WriteDataPoint(prefix []byte, labels []prompb.Label, timestamp int64, value float64) {
-	metricNameRaw := ctx.marshalMetricNameRaw(prefix, labels)
-	ctx.addRow(metricNameRaw, timestamp, value)
-}
-
-// WriteDataPointExt writes (timestamp, value) with the given metricNameRaw and labels into ctx buffer.
-//
-// It returns metricNameRaw for the given labels if len(metricNameRaw) == 0.
-func (ctx *InsertCtx) WriteDataPointExt(metricNameRaw []byte, labels []prompb.Label, timestamp int64, value float64) []byte {
-	if len(metricNameRaw) == 0 {
-		metricNameRaw = ctx.marshalMetricNameRaw(nil, labels)
-	}
-	ctx.addRow(metricNameRaw, timestamp, value)
-	return metricNameRaw
-}
-
-func (ctx *InsertCtx) addRow(metricNameRaw []byte, timestamp int64, value float64) {
-	mrs := ctx.mrs
-	if cap(mrs) > len(mrs) {
-		mrs = mrs[:len(mrs)+1]
-	} else {
-		mrs = append(mrs, storage.MetricRow{})
-	}
-	mr := &mrs[len(mrs)-1]
-	ctx.mrs = mrs
-	mr.MetricNameRaw = metricNameRaw
-	mr.Timestamp = timestamp
-	mr.Value = value
-}
-
-// AddLabel adds (name, value) label to ctx.Labels.
-//
-// name and value must exist until ctx.Labels is used.
-func (ctx *InsertCtx) AddLabel(name, value string) {
-	labels := ctx.Labels
-	if cap(labels) > len(labels) {
-		labels = labels[:len(labels)+1]
-	} else {
-		labels = append(labels, prompb.Label{})
-	}
-	label := &labels[len(labels)-1]
-
-	// Do not copy name and value contents for performance reasons.
-	// This reduces GC overhead on the number of objects and allocations.
-	label.Name = bytesutil.ToUnsafeBytes(name)
-	label.Value = bytesutil.ToUnsafeBytes(value)
-
-	ctx.Labels = labels
-}
-
-// FlushBufs flushes buffered rows to the underlying storage.
-func (ctx *InsertCtx) FlushBufs() error {
-	if err := vmstorage.AddRows(ctx.mrs); err != nil {
-		return fmt.Errorf("cannot store metrics: %s", err)
-	}
-	return nil
-}
--- a/app/vminsert/common/lines_reader.go
+++ b/app/vminsert/common/lines_reader.go
@@ -0,0 +1,68 @@
+package common
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+)
+
+// The maximum size of a single line returned by ReadLinesBlock.
+const maxLineSize = 256 * 1024
+
+// Default size in bytes of a single block returned by ReadLinesBlock.
+const defaultBlockSize = 64 * 1024
+
+// ReadLinesBlock reads a block of lines delimited by '\n' from tailBuf and r into dstBuf.
+//
+// Trailing chars after the last newline are put into tailBuf.
+//
+// Returns (dstBuf, tailBuf).
+func ReadLinesBlock(r io.Reader, dstBuf, tailBuf []byte) ([]byte, []byte, error) {
+	if cap(dstBuf) < defaultBlockSize {
+		dstBuf = bytesutil.Resize(dstBuf, defaultBlockSize)
+	}
+	dstBuf = append(dstBuf[:0], tailBuf...)
+	tailBuf = tailBuf[:0]
+again:
+	n, err := r.Read(dstBuf[len(dstBuf):cap(dstBuf)])
+	// Check for error only if zero bytes read from r, i.e. no forward progress made.
+	// Otherwise process the read data.
+	if n == 0 {
+		if err == nil {
+			return dstBuf, tailBuf, fmt.Errorf("no forward progress made")
+		}
+		if err == io.EOF && len(dstBuf) > 0 {
+			// Missing newline in the end of stream. This is OK,
+			// so suppress io.EOF for now. It will be returned during the next
+			// call to ReadLinesBlock.
+			// This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/60 .
+			return dstBuf, tailBuf, nil
+		}
+		return dstBuf, tailBuf, err
+	}
+	dstBuf = dstBuf[:len(dstBuf)+n]
+
+	// Search for the last newline in dstBuf and put the rest into tailBuf.
+	nn := bytes.LastIndexByte(dstBuf[len(dstBuf)-n:], '\n')
+	if nn < 0 {
+		// Didn't found at least a single line.
+		if len(dstBuf) > maxLineSize {
+			return dstBuf, tailBuf, fmt.Errorf("too long line: more than %d bytes", maxLineSize)
+		}
+		if cap(dstBuf) < 2*len(dstBuf) {
+			// Increase dsbBuf capacity, so more data could be read into it.
+			dstBufLen := len(dstBuf)
+			dstBuf = bytesutil.Resize(dstBuf, 2*cap(dstBuf))
+			dstBuf = dstBuf[:dstBufLen]
+		}
+		goto again
+	}
+
+	// Found at least a single line. Return it.
+	nn += len(dstBuf) - n
+	tailBuf = append(tailBuf[:0], dstBuf[nn+1:]...)
+	dstBuf = dstBuf[:nn]
+	return dstBuf, tailBuf, nil
+}
--- a/app/vminsert/common/lines_reader_test.go
+++ b/app/vminsert/common/lines_reader_test.go
@@ -0,0 +1,213 @@
+package common
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"reflect"
+	"testing"
+)
+
+func TestReadLinesBlockFailure(t *testing.T) {
+	f := func(s string) {
+		t.Helper()
+		r := bytes.NewBufferString(s)
+		if _, _, err := ReadLinesBlock(r, nil, nil); err == nil {
+			t.Fatalf("expecting non-nil error")
+		}
+		sbr := &singleByteReader{
+			b: []byte(s),
+		}
+		if _, _, err := ReadLinesBlock(sbr, nil, nil); err == nil {
+			t.Fatalf("expecting non-nil error")
+		}
+		fr := &failureReader{}
+		if _, _, err := ReadLinesBlock(fr, nil, nil); err == nil {
+			t.Fatalf("expecting non-nil error")
+		}
+	}
+
+	// empty string
+	f("")
+
+	// too long string
+	b := make([]byte, maxLineSize+1)
+	f(string(b))
+}
+
+type failureReader struct{}
+
+func (fr *failureReader) Read(p []byte) (int, error) {
+	return 0, fmt.Errorf("some error")
+}
+
+func TestReadLinesBlockMultiLinesSingleByteReader(t *testing.T) {
+	f := func(s string, linesExpected []string) {
+		t.Helper()
+
+		r := &singleByteReader{
+			b: []byte(s),
+		}
+		var err error
+		var dstBuf, tailBuf []byte
+		var lines []string
+		for {
+			dstBuf, tailBuf, err = ReadLinesBlock(r, dstBuf, tailBuf)
+			if err != nil {
+				if err == io.EOF {
+					break
+				}
+				t.Fatalf("unexpected error in ReadLinesBlock(%q): %s", s, err)
+			}
+			lines = append(lines, string(dstBuf))
+		}
+		if !reflect.DeepEqual(lines, linesExpected) {
+			t.Fatalf("unexpected lines after reading %q: got %q; want %q", s, lines, linesExpected)
+		}
+	}
+
+	f("", nil)
+	f("foo", []string{"foo"})
+	f("foo\n", []string{"foo"})
+	f("foo\nbar", []string{"foo", "bar"})
+	f("\nfoo\nbar", []string{"", "foo", "bar"})
+	f("\nfoo\nbar\n", []string{"", "foo", "bar"})
+	f("\nfoo\nbar\n\n", []string{"", "foo", "bar", ""})
+}
+
+func TestReadLinesBlockMultiLinesBytesBuffer(t *testing.T) {
+	f := func(s string, linesExpected []string) {
+		t.Helper()
+
+		r := bytes.NewBufferString(s)
+		var err error
+		var dstBuf, tailBuf []byte
+		var lines []string
+		for {
+			dstBuf, tailBuf, err = ReadLinesBlock(r, dstBuf, tailBuf)
+			if err != nil {
+				if err == io.EOF {
+					break
+				}
+				t.Fatalf("unexpected error in ReadLinesBlock(%q): %s", s, err)
+			}
+			lines = append(lines, string(dstBuf))
+		}
+		if !reflect.DeepEqual(lines, linesExpected) {
+			t.Fatalf("unexpected lines after reading %q: got %q; want %q", s, lines, linesExpected)
+		}
+	}
+
+	f("", nil)
+	f("foo", []string{"foo"})
+	f("foo\n", []string{"foo"})
+	f("foo\nbar", []string{"foo", "bar"})
+	f("\nfoo\nbar", []string{"\nfoo", "bar"})
+	f("\nfoo\nbar\n", []string{"\nfoo\nbar"})
+	f("\nfoo\nbar\n\n", []string{"\nfoo\nbar\n"})
+}
+
+func TestReadLinesBlockSuccessSingleByteReader(t *testing.T) {
+	f := func(s, dstBufExpected, tailBufExpected string) {
+		t.Helper()
+
+		r := &singleByteReader{
+			b: []byte(s),
+		}
+		dstBuf, tailBuf, err := ReadLinesBlock(r, nil, nil)
+		if err != nil {
+			t.Fatalf("unexpected error: %s", err)
+		}
+		if string(dstBuf) != dstBufExpected {
+			t.Fatalf("unexpected dstBuf; got %q; want %q; tailBuf=%q", dstBuf, dstBufExpected, tailBuf)
+		}
+		if string(tailBuf) != tailBufExpected {
+			t.Fatalf("unexpected tailBuf; got %q; want %q; dstBuf=%q", tailBuf, tailBufExpected, dstBuf)
+		}
+
+		// Verify the same with non-empty dstBuf and tailBuf
+		r = &singleByteReader{
+			b: []byte(s),
+		}
+		dstBuf, tailBuf, err = ReadLinesBlock(r, dstBuf, tailBuf[:0])
+		if err != nil {
+			t.Fatalf("non-empty bufs: unexpected error: %s", err)
+		}
+		if string(dstBuf) != dstBufExpected {
+			t.Fatalf("non-empty bufs: unexpected dstBuf; got %q; want %q; tailBuf=%q", dstBuf, dstBufExpected, tailBuf)
+		}
+		if string(tailBuf) != tailBufExpected {
+			t.Fatalf("non-empty bufs: unexpected tailBuf; got %q; want %q; dstBuf=%q", tailBuf, tailBufExpected, dstBuf)
+		}
+	}
+
+	f("\n", "", "")
+	f("foo\n", "foo", "")
+	f("\nfoo", "", "")
+	f("foo\nbar", "foo", "")
+	f("foo\nbar\nbaz", "foo", "")
+	f("foo", "foo", "")
+
+	// The maximum line size
+	b := make([]byte, maxLineSize+10)
+	b[maxLineSize] = '\n'
+	f(string(b), string(b[:maxLineSize]), "")
+}
+
+func TestReadLinesBlockSuccessBytesBuffer(t *testing.T) {
+	f := func(s, dstBufExpected, tailBufExpected string) {
+		t.Helper()
+
+		r := bytes.NewBufferString(s)
+		dstBuf, tailBuf, err := ReadLinesBlock(r, nil, nil)
+		if err != nil {
+			t.Fatalf("unexpected error: %s", err)
+		}
+		if string(dstBuf) != dstBufExpected {
+			t.Fatalf("unexpected dstBuf; got %q; want %q; tailBuf=%q", dstBuf, dstBufExpected, tailBuf)
+		}
+		if string(tailBuf) != tailBufExpected {
+			t.Fatalf("unexpected tailBuf; got %q; want %q; dstBuf=%q", tailBuf, tailBufExpected, dstBuf)
+		}
+
+		// Verify the same with non-empty dstBuf and tailBuf
+		r = bytes.NewBufferString(s)
+		dstBuf, tailBuf, err = ReadLinesBlock(r, dstBuf, tailBuf[:0])
+		if err != nil {
+			t.Fatalf("non-empty bufs: unexpected error: %s", err)
+		}
+		if string(dstBuf) != dstBufExpected {
+			t.Fatalf("non-empty bufs: unexpected dstBuf; got %q; want %q; tailBuf=%q", dstBuf, dstBufExpected, tailBuf)
+		}
+		if string(tailBuf) != tailBufExpected {
+			t.Fatalf("non-empty bufs: unexpected tailBuf; got %q; want %q; dstBuf=%q", tailBuf, tailBufExpected, dstBuf)
+		}
+	}
+
+	f("\n", "", "")
+	f("foo\n", "foo", "")
+	f("\nfoo", "", "foo")
+	f("foo\nbar", "foo", "bar")
+	f("foo\nbar\nbaz", "foo\nbar", "baz")
+
+	// The maximum line size
+	b := make([]byte, maxLineSize+10)
+	b[maxLineSize] = '\n'
+	f(string(b), string(b[:maxLineSize]), string(b[maxLineSize+1:]))
+}
+
+type singleByteReader struct {
+	b []byte
+}
+
+func (sbr *singleByteReader) Read(p []byte) (int, error) {
+	if len(sbr.b) == 0 {
+		return 0, io.EOF
+	}
+	n := copy(p, sbr.b[:1])
+	sbr.b = sbr.b[n:]
+	if len(sbr.b) == 0 {
+		return n, io.EOF
+	}
+	return n, nil
+}
--- a/app/vminsert/concurrencylimiter/concurrencylimiter.go
+++ b/app/vminsert/concurrencylimiter/concurrencylimiter.go
@@ -1,34 +1,75 @@
 package concurrencylimiter

 import (
+	"flag"
 	"fmt"
+	"net/http"
 	"runtime"
 	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
+	"github.com/VictoriaMetrics/metrics"
 )

+var maxConcurrentInserts = flag.Int("maxConcurrentInserts", runtime.GOMAXPROCS(-1)*4, "The maximum number of concurrent inserts")
+
 var (
-	// ch is the channel for limiting concurrent inserts.
-	// Put an item into it before performing an insert and remove
-	// the item after the insert is complete.
-	ch = make(chan struct{}, runtime.GOMAXPROCS(-1)*2)
+	// ch is the channel for limiting concurrent calls to Do.
+	ch chan struct{}

 	// waitDuration is the amount of time to wait until at least a single
-	// concurrent insert out of cap(Ch) inserts is complete.
+	// concurrent Do call out of cap(ch) inserts is complete.
 	waitDuration = time.Second * 30
 )

+// Init initializes concurrencylimiter.
+//
+// Init must be called after flag.Parse call.
+func Init() {
+	ch = make(chan struct{}, *maxConcurrentInserts)
+}
+
 // Do calls f with the limited concurrency.
 func Do(f func() error) error {
-	// Limit the number of conurrent inserts in order to prevent from excess
+	// Limit the number of conurrent f calls in order to prevent from excess
 	// memory usage and CPU trashing.
-	t := time.NewTimer(waitDuration)
 	select {
 	case ch <- struct{}{}:
-		t.Stop()
+		err := f()
+		<-ch
+		return err
+	default:
+	}
+
+	// All the workers are busy.
+	// Sleep for up to waitDuration.
+	concurrencyLimitReached.Inc()
+	t := timerpool.Get(waitDuration)
+	select {
+	case ch <- struct{}{}:
+		timerpool.Put(t)
 		err := f()
 		<-ch
 		return err
 	case <-t.C:
-		return fmt.Errorf("the server is overloaded with %d concurrent inserts; either increase the number of CPUs or reduce the load", cap(ch))
+		timerpool.Put(t)
+		concurrencyLimitTimeout.Inc()
+		return &httpserver.ErrorWithStatusCode{
+			Err:        fmt.Errorf("the server is overloaded with %d concurrent inserts; either increase -maxConcurrentInserts or reduce the load", cap(ch)),
+			StatusCode: http.StatusServiceUnavailable,
+		}
 	}
 }
+
+var (
+	concurrencyLimitReached = metrics.NewCounter(`vm_concurrent_insert_limit_reached_total`)
+	concurrencyLimitTimeout = metrics.NewCounter(`vm_concurrent_insert_limit_timeout_total`)
+
+	_ = metrics.NewGauge(`vm_concurrent_insert_capacity`, func() float64 {
+		return float64(cap(ch))
+	})
+	_ = metrics.NewGauge(`vm_concurrent_insert_current`, func() float64 {
+		return float64(len(ch))
+	})
+)
--- a/app/victoria-metrics/deployment/Dockerfile
+++ b/app/victoria-metrics/deployment/Dockerfile
@@ -1,5 +1,5 @@
 FROM scratch
 COPY --from=local/certs:1.0.2 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
-COPY bin/victoria-metrics-prod .
-EXPOSE 8428
-ENTRYPOINT ["/victoria-metrics-prod"]
+COPY bin/vminsert-prod .
+EXPOSE 8480
+ENTRYPOINT ["/vminsert-prod"]
--- a/app/vminsert/graphite/parser.go
+++ b/app/vminsert/graphite/parser.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"strings"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/fastjson/fastfloat"
 )

@@ -34,13 +36,8 @@ func (rs *Rows) Reset() {
 // See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
 //
 // s must be unchanged until rs is in use.
-func (rs *Rows) Unmarshal(s string) error {
-	var err error
-	rs.Rows, rs.tagsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
-	if err != nil {
-		return err
-	}
-	return err
+func (rs *Rows) Unmarshal(s string) {
+	rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
 }

 // Row is a single graphite row.
@@ -83,49 +80,61 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
 		tags := tagsPool[tagsStart:]
 		r.Tags = tags[:len(tags):len(tags)]
 	}
+	if len(r.Metric) == 0 {
+		return tagsPool, fmt.Errorf("metric cannot be empty")
+	}

 	n = strings.IndexByte(tail, ' ')
 	if n < 0 {
-		return tagsPool, fmt.Errorf("cannot find whitespace between value and timestamp in %q", s)
+		// There is no timestamp. Use default timestamp instead.
+		r.Value = fastfloat.ParseBestEffort(tail)
+		return tagsPool, nil
 	}
 	r.Value = fastfloat.ParseBestEffort(tail[:n])
 	r.Timestamp = fastfloat.ParseInt64BestEffort(tail[n+1:])
 	return tagsPool, nil
 }

-func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag, error) {
+func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
 	for len(s) > 0 {
 		n := strings.IndexByte(s, '\n')
-		if n == 0 {
-			// Skip empty line
-			s = s[1:]
-			continue
-		}
-		if cap(dst) > len(dst) {
-			dst = dst[:len(dst)+1]
-		} else {
-			dst = append(dst, Row{})
-		}
-		r := &dst[len(dst)-1]
 		if n < 0 {
 			// The last line.
-			var err error
-			tagsPool, err = r.unmarshal(s, tagsPool)
-			if err != nil {
-				return dst, tagsPool, err
-			}
-			return dst, tagsPool, nil
-		}
-		var err error
-		tagsPool, err = r.unmarshal(s[:n], tagsPool)
-		if err != nil {
-			return dst, tagsPool, err
+			return unmarshalRow(dst, s, tagsPool)
 		}
+		dst, tagsPool = unmarshalRow(dst, s[:n], tagsPool)
 		s = s[n+1:]
 	}
-	return dst, tagsPool, nil
+	return dst, tagsPool
 }

+func unmarshalRow(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
+	if len(s) > 0 && s[len(s)-1] == '\r' {
+		s = s[:len(s)-1]
+	}
+	if len(s) == 0 {
+		// Skip empty line
+		return dst, tagsPool
+	}
+
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	var err error
+	tagsPool, err = r.unmarshal(s, tagsPool)
+	if err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal Graphite line %q: %s", s, err)
+		invalidLines.Inc()
+	}
+	return dst, tagsPool
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="graphite"}`)
+
 func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
 	for {
 		if cap(dst) > len(dst) {
@@ -141,12 +150,20 @@ func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
 			if err := tag.unmarshal(s); err != nil {
 				return dst[:len(dst)-1], err
 			}
+			if len(tag.Key) == 0 || len(tag.Value) == 0 {
+				// Skip empty tag
+				dst = dst[:len(dst)-1]
+			}
 			return dst, nil
 		}
 		if err := tag.unmarshal(s[:n]); err != nil {
 			return dst[:len(dst)-1], err
 		}
 		s = s[n+1:]
+		if len(tag.Key) == 0 || len(tag.Value) == 0 {
+			// Skip empty tag
+			dst = dst[:len(dst)-1]
+		}
 	}
 }

@@ -168,9 +185,6 @@ func (t *Tag) unmarshal(s string) error {
 		return fmt.Errorf("missing tag value for %q", s)
 	}
 	t.Key = s[:n]
-	if len(t.Key) == 0 {
-		return fmt.Errorf("tag key cannot be empty for %q", s)
-	}
 	t.Value = s[n+1:]
 	return nil
 }
--- a/app/vminsert/graphite/parser_test.go
+++ b/app/vminsert/graphite/parser_test.go
@@ -9,48 +9,42 @@ func TestRowsUnmarshalFailure(t *testing.T) {
 	f := func(s string) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
 		}

 		// Try again
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
 		}
 	}

+	// Missing metric
+	f(" 123 455")
+
 	// Missing value
 	f("aaa")

-	// Missing timestamp
-	f("aaa 1123")
-
-	// Invalid multiline
-	f("aaa\nbbb 123 34")
-
 	// missing tag
 	f("aa; 12 34")

 	// missing tag value
 	f("aa;bb 23 34")
-	f("aa;=dsd 234 45")
 }

 func TestRowsUnmarshalSuccess(t *testing.T) {
 	f := func(s string, rowsExpected *Rows) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}

 		// Try unmarshaling again
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}
@@ -63,7 +57,9 @@ func TestRowsUnmarshalSuccess(t *testing.T) {

 	// Empty line
 	f("", &Rows{})
+	f("\r", &Rows{})
 	f("\n\n", &Rows{})
+	f("\n\r\n", &Rows{})

 	// Single line
 	f("foobar -123.456 789", &Rows{
@@ -81,6 +77,14 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 		}},
 	})

+	// Missing timestamp
+	f("aaa 1123", &Rows{
+		Rows: []Row{{
+			Metric: "aaa",
+			Value:  1123,
+		}},
+	})
+
 	// Tags
 	f("foo;bar=baz 1 2", &Rows{
 		Rows: []Row{{
@@ -93,7 +97,8 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			Timestamp: 2,
 		}},
 	})
-	f("foo;bar=baz;aa=;x=y 1 2", &Rows{
+	// Empty tags
+	f("foo;bar=baz;aa=;x=y;=z 1 2", &Rows{
 		Rows: []Row{{
 			Metric: "foo",
 			Tags: []Tag{
@@ -101,10 +106,6 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 					Key:   "bar",
 					Value: "baz",
 				},
-				{
-					Key:   "aa",
-					Value: "",
-				},
 				{
 					Key:   "x",
 					Value: "y",
@@ -116,7 +117,27 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	})

 	// Multi lines
-	f("foo 0.3 2\nbar.baz 0.34 43\n", &Rows{
+	f("foo 0.3 2\naaa 3\nbar.baz 0.34 43\n", &Rows{
+		Rows: []Row{
+			{
+				Metric:    "foo",
+				Value:     0.3,
+				Timestamp: 2,
+			},
+			{
+				Metric: "aaa",
+				Value:  3,
+			},
+			{
+				Metric:    "bar.baz",
+				Value:     0.34,
+				Timestamp: 43,
+			},
+		},
+	})
+
+	// Multi lines with invalid line
+	f("foo 0.3 2\naaa\nbar.baz 0.34 43\n", &Rows{
 		Rows: []Row{
 			{
 				Metric:    "foo",
--- a/app/vminsert/graphite/parser_timing_test.go
+++ b/app/vminsert/graphite/parser_timing_test.go
@@ -16,8 +16,9 @@ cpu.usage_irq 0.34432 1234556768
 	b.RunParallel(func(pb *testing.PB) {
 		var rows Rows
 		for pb.Next() {
-			if err := rows.Unmarshal(s); err != nil {
-				panic(fmt.Errorf("cannot unmarshal %q: %s", s, err))
+			rows.Unmarshal(s)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of rows unmarshaled: got %d; want 4", len(rows.Rows)))
 			}
 		}
 	})
--- a/app/vminsert/graphite/request_handler.go
+++ b/app/vminsert/graphite/request_handler.go
@@ -1,7 +1,6 @@
 package graphite

 import (
-	"bytes"
 	"fmt"
 	"io"
 	"net"
@@ -11,52 +10,72 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
 	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/fastjson/fastfloat"
 )

-var rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="graphite"}`)
+var (
+	rowsInserted  = tenantmetrics.NewCounterMap(`vm_rows_inserted_total{type="graphite"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="graphite"}`)
+)

 // insertHandler processes remote write for graphite plaintext protocol.
 //
 // See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
-func insertHandler(r io.Reader) error {
+func insertHandler(at *auth.Token, r io.Reader) error {
 	return concurrencylimiter.Do(func() error {
-		return insertHandlerInternal(r)
+		return insertHandlerInternal(at, r)
 	})
 }

-func insertHandlerInternal(r io.Reader) error {
+func insertHandlerInternal(at *auth.Token, r io.Reader) error {
 	ctx := getPushCtx()
 	defer putPushCtx(ctx)
 	for ctx.Read(r) {
-		if err := ctx.InsertRows(); err != nil {
+		if err := ctx.InsertRows(at); err != nil {
 			return err
 		}
 	}
 	return ctx.Error()
 }

-func (ctx *pushCtx) InsertRows() error {
+func (ctx *pushCtx) InsertRows(at *auth.Token) error {
 	rows := ctx.Rows.Rows
 	ic := &ctx.Common
-	ic.Reset(len(rows))
+	ic.Reset()
+	atCopy := *at
 	for i := range rows {
 		r := &rows[i]
 		ic.Labels = ic.Labels[:0]
 		ic.AddLabel("", r.Metric)
 		for j := range r.Tags {
 			tag := &r.Tags[j]
+			if atCopy.AccountID == 0 {
+				// Multi-tenancy support via custom tags.
+				// Do not allow overriding AccountID and ProjectID from atCopy for security reasons.
+				if tag.Key == "VictoriaMetrics_AccountID" {
+					atCopy.AccountID = uint32(fastfloat.ParseUint64BestEffort(tag.Value))
+				}
+				if atCopy.ProjectID == 0 && tag.Key == "VictoriaMetrics_ProjectID" {
+					atCopy.ProjectID = uint32(fastfloat.ParseUint64BestEffort(tag.Value))
+				}
+			}
 			ic.AddLabel(tag.Key, tag.Value)
 		}
-		ic.WriteDataPoint(nil, ic.Labels, r.Timestamp, r.Value)
+		if err := ic.WriteDataPoint(&atCopy, ic.Labels, r.Timestamp, r.Value); err != nil {
+			return err
+		}
 	}
-	rowsInserted.Add(len(rows))
+	// Assume that all the rows for a single connection belong to the same (AccountID, ProjectID).
+	rowsInserted.Get(&atCopy).Add(len(rows))
+	rowsPerInsert.Update(float64(len(rows)))
 	return ic.FlushBufs()
 }

-const maxReadPacketSize = 4 * 1024 * 1024
-
 const flushTimeout = 3 * time.Second

 func (ctx *pushCtx) Read(r io.Reader) bool {
@@ -71,50 +90,45 @@ func (ctx *pushCtx) Read(r io.Reader) bool {
 			return false
 		}
 	}
-	lr := io.LimitReader(r, maxReadPacketSize)
-	ctx.reqBuf.Reset()
-	ctx.reqBuf.B = append(ctx.reqBuf.B[:0], ctx.tailBuf...)
-	n, err := io.CopyBuffer(&ctx.reqBuf, lr, ctx.copyBuf[:])
-	if err != nil {
-		if ne, ok := err.(net.Error); ok && ne.Timeout() {
+	ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlock(r, ctx.reqBuf, ctx.tailBuf)
+	if ctx.err != nil {
+		if ne, ok := ctx.err.(net.Error); ok && ne.Timeout() {
 			// Flush the read data on timeout and try reading again.
+			ctx.err = nil
 		} else {
-			graphiteReadErrors.Inc()
-			ctx.err = fmt.Errorf("cannot read graphite plaintext protocol data: %s", err)
+			if ctx.err != io.EOF {
+				graphiteReadErrors.Inc()
+				ctx.err = fmt.Errorf("cannot read graphite plaintext protocol data: %s", ctx.err)
+			}
 			return false
 		}
-	} else if n < maxReadPacketSize {
-		// Mark the end of stream.
-		ctx.err = io.EOF
+	}
+	ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))
+
+	// Fill missing timestamps with the current timestamp rounded to seconds.
+	currentTimestamp := time.Now().Unix()
+	rows := ctx.Rows.Rows
+	for i := range rows {
+		r := &rows[i]
+		if r.Timestamp == 0 {
+			r.Timestamp = currentTimestamp
+		}
 	}

-	// Parse all the rows until the last newline in ctx.reqBuf.B
-	nn := bytes.LastIndexByte(ctx.reqBuf.B, '\n')
-	ctx.tailBuf = ctx.tailBuf[:0]
-	if nn >= 0 {
-		ctx.tailBuf = append(ctx.tailBuf[:0], ctx.reqBuf.B[nn+1:]...)
-		ctx.reqBuf.B = ctx.reqBuf.B[:nn]
-	}
-	if err = ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf.B)); err != nil {
-		graphiteUnmarshalErrors.Inc()
-		ctx.err = fmt.Errorf("cannot unmarshal graphite plaintext protocol data with size %d: %s", len(ctx.reqBuf.B), err)
-		return false
+	// Convert timestamps from seconds to milliseconds.
+	for i := range rows {
+		rows[i].Timestamp *= 1e3
 	}

-	// Convert timestamps from seconds to milliseconds
-	for i := range ctx.Rows.Rows {
-		ctx.Rows.Rows[i].Timestamp *= 1e3
-	}
 	return true
 }

 type pushCtx struct {
 	Rows   Rows
-	Common common.InsertCtx
+	Common netstorage.InsertCtx

-	reqBuf  bytesutil.ByteBuffer
+	reqBuf  []byte
 	tailBuf []byte
-	copyBuf [16 * 1024]byte

 	err error
 }
@@ -128,17 +142,16 @@ func (ctx *pushCtx) Error() error {

 func (ctx *pushCtx) reset() {
 	ctx.Rows.Reset()
-	ctx.Common.Reset(0)
-	ctx.reqBuf.Reset()
+	ctx.Common.Reset()
+	ctx.reqBuf = ctx.reqBuf[:0]
 	ctx.tailBuf = ctx.tailBuf[:0]

 	ctx.err = nil
 }

 var (
-	graphiteReadCalls       = metrics.NewCounter(`vm_read_calls_total{name="graphite"}`)
-	graphiteReadErrors      = metrics.NewCounter(`vm_read_errors_total{name="graphite"}`)
-	graphiteUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="graphite"}`)
+	graphiteReadCalls  = metrics.NewCounter(`vm_read_calls_total{name="graphite"}`)
+	graphiteReadErrors = metrics.NewCounter(`vm_read_errors_total{name="graphite"}`)
 )

 func getPushCtx() *pushCtx {
--- a/app/vminsert/graphite/server.go
+++ b/app/vminsert/graphite/server.go
@@ -7,8 +7,10 @@ import (
 	"sync"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
 	"github.com/VictoriaMetrics/metrics"
 )

@@ -23,7 +25,7 @@ var (
 // Serve starts graphite server on the given addr.
 func Serve(addr string) {
 	logger.Infof("starting TCP Graphite server at %q", addr)
-	lnTCP, err := net.Listen("tcp4", addr)
+	lnTCP, err := netutil.NewTCPListener("graphite", addr)
 	if err != nil {
 		logger.Fatalf("cannot start TCP Graphite server at %q: %s", addr, err)
 	}
@@ -70,7 +72,8 @@ func serveTCP(ln net.Listener) {
 		}
 		go func() {
 			writeRequestsTCP.Inc()
-			if err := insertHandler(c); err != nil {
+			var at auth.Token // TODO: properly initialize auth token
+			if err := insertHandler(&at, c); err != nil {
 				writeErrorsTCP.Inc()
 				logger.Errorf("error in TCP Graphite conn %q<->%q: %s", c.LocalAddr(), c.RemoteAddr(), err)
 			}
@@ -88,6 +91,7 @@ func serveUDP(ln net.PacketConn) {
 			defer wg.Done()
 			var bb bytesutil.ByteBuffer
 			bb.B = bytesutil.Resize(bb.B, 64*1024)
+			var at auth.Token // TODO: properly initialize auth token
 			for {
 				bb.Reset()
 				bb.B = bb.B[:cap(bb.B)]
@@ -108,7 +112,7 @@ func serveUDP(ln net.PacketConn) {
 				}
 				bb.B = bb.B[:n]
 				writeRequestsUDP.Inc()
-				if err := insertHandler(bb.NewReader()); err != nil {
+				if err := insertHandler(&at, bb.NewReader()); err != nil {
 					writeErrorsUDP.Inc()
 					logger.Errorf("error in UDP Graphite conn %q<->%q: %s", ln.LocalAddr(), addr, err)
 					continue
--- a/app/vminsert/influx/parser.go
+++ b/app/vminsert/influx/parser.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"strings"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/fastjson/fastfloat"
 )

@@ -41,13 +43,8 @@ func (rs *Rows) Reset() {
 // See https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/
 //
 // s must be unchanged until rs is in use.
-func (rs *Rows) Unmarshal(s string) error {
-	var err error
-	rs.Rows, rs.tagsPool, rs.fieldsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0], rs.fieldsPool[:0])
-	if err != nil {
-		return err
-	}
-	return err
+func (rs *Rows) Unmarshal(s string) {
+	rs.Rows, rs.tagsPool, rs.fieldsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0], rs.fieldsPool[:0])
 }

 // Row is a single influx row.
@@ -65,9 +62,8 @@ func (r *Row) reset() {
 	r.Timestamp = 0
 }

-func (r *Row) unmarshal(s string, tagsPool []Tag, fieldsPool []Field) ([]Tag, []Field, error) {
+func (r *Row) unmarshal(s string, tagsPool []Tag, fieldsPool []Field, noEscapeChars bool) ([]Tag, []Field, error) {
 	r.reset()
-	noEscapeChars := strings.IndexByte(s, '\\') < 0
 	n := nextUnescapedChar(s, ' ', noEscapeChars)
 	if n < 0 {
 		return tagsPool, fieldsPool, fmt.Errorf("cannot find Whitespace I in %q", s)
@@ -141,9 +137,6 @@ func (tag *Tag) unmarshal(s string, noEscapeChars bool) error {
 		return fmt.Errorf("missing tag value for %q", s)
 	}
 	tag.Key = unescapeTagValue(s[:n], noEscapeChars)
-	if len(tag.Key) == 0 {
-		return fmt.Errorf("tag key cannot be empty")
-	}
 	tag.Value = unescapeTagValue(s[n+1:], noEscapeChars)
 	return nil
 }
@@ -177,39 +170,51 @@ func (f *Field) unmarshal(s string, noEscapeChars, hasQuotedFields bool) error {
 	return nil
 }

-func unmarshalRows(dst []Row, s string, tagsPool []Tag, fieldsPool []Field) ([]Row, []Tag, []Field, error) {
+func unmarshalRows(dst []Row, s string, tagsPool []Tag, fieldsPool []Field) ([]Row, []Tag, []Field) {
+	noEscapeChars := strings.IndexByte(s, '\\') < 0
 	for len(s) > 0 {
 		n := strings.IndexByte(s, '\n')
-		if n == 0 {
-			// Skip empty line
-			s = s[1:]
-			continue
-		}
-		if cap(dst) > len(dst) {
-			dst = dst[:len(dst)+1]
-		} else {
-			dst = append(dst, Row{})
-		}
-		r := &dst[len(dst)-1]
 		if n < 0 {
 			// The last line.
-			var err error
-			tagsPool, fieldsPool, err = r.unmarshal(s, tagsPool, fieldsPool)
-			if err != nil {
-				return dst, tagsPool, fieldsPool, err
-			}
-			return dst, tagsPool, fieldsPool, nil
-		}
-		var err error
-		tagsPool, fieldsPool, err = r.unmarshal(s[:n], tagsPool, fieldsPool)
-		if err != nil {
-			return dst, tagsPool, fieldsPool, err
+			return unmarshalRow(dst, s, tagsPool, fieldsPool, noEscapeChars)
 		}
+		dst, tagsPool, fieldsPool = unmarshalRow(dst, s[:n], tagsPool, fieldsPool, noEscapeChars)
 		s = s[n+1:]
 	}
-	return dst, tagsPool, fieldsPool, nil
+	return dst, tagsPool, fieldsPool
 }

+func unmarshalRow(dst []Row, s string, tagsPool []Tag, fieldsPool []Field, noEscapeChars bool) ([]Row, []Tag, []Field) {
+	if len(s) > 0 && s[len(s)-1] == '\r' {
+		s = s[:len(s)-1]
+	}
+	if len(s) == 0 {
+		// Skip empty line
+		return dst, tagsPool, fieldsPool
+	}
+	if s[0] == '#' {
+		// Skip comment
+		return dst, tagsPool, fieldsPool
+	}
+
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	var err error
+	tagsPool, fieldsPool, err = r.unmarshal(s, tagsPool, fieldsPool, noEscapeChars)
+	if err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal Influx line %q: %s; skipping it", s, err)
+		invalidLines.Inc()
+	}
+	return dst, tagsPool, fieldsPool
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="influx"}`)
+
 func unmarshalTags(dst []Tag, s string, noEscapeChars bool) ([]Tag, error) {
 	for {
 		if cap(dst) > len(dst) {
@@ -221,14 +226,22 @@ func unmarshalTags(dst []Tag, s string, noEscapeChars bool) ([]Tag, error) {
 		n := nextUnescapedChar(s, ',', noEscapeChars)
 		if n < 0 {
 			if err := tag.unmarshal(s, noEscapeChars); err != nil {
-				return dst, err
+				return dst[:len(dst)-1], err
+			}
+			if len(tag.Key) == 0 || len(tag.Value) == 0 {
+				// Skip empty tag
+				dst = dst[:len(dst)-1]
 			}
 			return dst, nil
 		}
 		if err := tag.unmarshal(s[:n], noEscapeChars); err != nil {
-			return dst, err
+			return dst[:len(dst)-1], err
 		}
 		s = s[n+1:]
+		if len(tag.Key) == 0 || len(tag.Value) == 0 {
+			// Skip empty tag
+			dst = dst[:len(dst)-1]
+		}
 	}
 }

@@ -293,8 +306,10 @@ func parseFieldValue(s string, hasQuotedFields bool) (float64, error) {
 		if len(s) < 2 || s[len(s)-1] != '"' {
 			return 0, fmt.Errorf("missing closing quote for quoted field value %s", s)
 		}
-		// Quoted string is translated to empty value.
-		return 0, nil
+		// Try converting quoted string to number, since sometimes Influx agents
+		// send numbers as strings.
+		s = s[1 : len(s)-1]
+		return fastfloat.ParseBestEffort(s), nil
 	}
 	ch := s[len(s)-1]
 	if ch == 'i' {
--- a/app/vminsert/influx/parser_test.go
+++ b/app/vminsert/influx/parser_test.go
@@ -74,13 +74,15 @@ func TestRowsUnmarshalFailure(t *testing.T) {
 	f := func(s string) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("expecting zero rows; got %d rows", len(rows.Rows))
 		}

 		// Try again
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("expecting zero rows; got %d rows", len(rows.Rows))
 		}
 	}

@@ -94,12 +96,8 @@ func TestRowsUnmarshalFailure(t *testing.T) {
 	// Missing tag value
 	f("foo,bar")
 	f("foo,bar baz")
-	f("foo,bar= baz")
 	f("foo,bar=123, 123")

-	// Missing tag name
-	f("foo,=bar baz=234")
-
 	// Missing field value
 	f("foo bar")
 	f("foo bar=")
@@ -122,17 +120,13 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	f := func(s string, rowsExpected *Rows) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}

 		// Try unmarshaling again
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}
@@ -146,6 +140,12 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	// Empty line
 	f("", &Rows{})
 	f("\n\n", &Rows{})
+	f("\n\r\n", &Rows{})
+
+	// Comment
+	f("\n# foobar\n", &Rows{})
+	f("#foobar baz", &Rows{})
+	f("#foobar baz\n#sss", &Rows{})

 	// Minimal line without tags and timestamp
 	f("foo bar=123", &Rows{
@@ -157,6 +157,15 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			}},
 		}},
 	})
+	f("# comment\nfoo bar=123\r\n#comment2 sdsf dsf", &Rows{
+		Rows: []Row{{
+			Measurement: "foo",
+			Fields: []Field{{
+				Key:   "bar",
+				Value: 123,
+			}},
+		}},
+	})
 	f("foo bar=123\n", &Rows{
 		Rows: []Row{{
 			Measurement: "foo",
@@ -216,7 +225,7 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	})

 	// Line with empty tag values
-	f("foo,tag1=xyz,tagN=,tag2=43as bar=123", &Rows{
+	f("foo,tag1=xyz,tagN=,tag2=43as,=xxx bar=123", &Rows{
 		Rows: []Row{{
 			Measurement: "foo",
 			Tags: []Tag{
@@ -224,10 +233,6 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 					Key:   "tag1",
 					Value: "xyz",
 				},
-				{
-					Key:   "tagN",
-					Value: "",
-				},
 				{
 					Key:   "tag2",
 					Value: "43as",
@@ -241,17 +246,27 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	})

 	// Line with multiple tags, multiple fields and timestamp
-	f(`system,host=ip-172-16-10-144 uptime_format="3 days, 21:01" 1557761040000000000`, &Rows{
+	f(`system,host=ip-172-16-10-144 uptime_format="3 days, 21:01",quoted_float="-1.23",quoted_int="123" 1557761040000000000`, &Rows{
 		Rows: []Row{{
 			Measurement: "system",
 			Tags: []Tag{{
 				Key:   "host",
 				Value: "ip-172-16-10-144",
 			}},
-			Fields: []Field{{
-				Key:   "uptime_format",
-				Value: 0,
-			}},
+			Fields: []Field{
+				{
+					Key:   "uptime_format",
+					Value: 0,
+				},
+				{
+					Key:   "quoted_float",
+					Value: -1.23,
+				},
+				{
+					Key:   "quoted_int",
+					Value: 123,
+				},
+			},
 			Timestamp: 1557761040000000000,
 		}},
 	})
@@ -299,11 +314,11 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	})

 	// Escape chars
-	f(`fo\,bar\=baz,x\==\\a\,\=\q\  \\\a\=\,=4.34`, &Rows{
+	f(`fo\,bar\=baz,x\=\b=\\a\,\=\q\  \\\a\=\,=4.34`, &Rows{
 		Rows: []Row{{
 			Measurement: `fo,bar=baz`,
 			Tags: []Tag{{
-				Key:   `x=`,
+				Key:   `x=\b`,
 				Value: `\a,=\q `,
 			}},
 			Fields: []Field{{
@@ -338,4 +353,78 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			},
 		},
 	})
+
+	// Multiple lines with invalid line in the middle.
+	f("foo,tag=xyz field=1.23 48934\n"+
+		"invalid line\n"+
+		"bar x=-1i\n\n", &Rows{
+		Rows: []Row{
+			{
+				Measurement: "foo",
+				Tags: []Tag{{
+					Key:   "tag",
+					Value: "xyz",
+				}},
+				Fields: []Field{{
+					Key:   "field",
+					Value: 1.23,
+				}},
+				Timestamp: 48934,
+			},
+			{
+				Measurement: "bar",
+				Fields: []Field{{
+					Key:   "x",
+					Value: -1,
+				}},
+			},
+		},
+	})
+
+	// No newline after the second line.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/82
+	f("foo,tag=xyz field=1.23 48934\n"+
+		"bar x=-1i", &Rows{
+		Rows: []Row{
+			{
+				Measurement: "foo",
+				Tags: []Tag{{
+					Key:   "tag",
+					Value: "xyz",
+				}},
+				Fields: []Field{{
+					Key:   "field",
+					Value: 1.23,
+				}},
+				Timestamp: 48934,
+			},
+			{
+				Measurement: "bar",
+				Fields: []Field{{
+					Key:   "x",
+					Value: -1,
+				}},
+			},
+		},
+	})
+
+	f("x,y=z,g=p:\\ \\ 5432\\,\\ gp\\ mon\\ [lol]\\ con10\\ cmd5\\ SELECT f=1", &Rows{
+		Rows: []Row{{
+			Measurement: "x",
+			Tags: []Tag{
+				{
+					Key:   "y",
+					Value: "z",
+				},
+				{
+					Key:   "g",
+					Value: "p:  5432, gp mon [lol] con10 cmd5 SELECT",
+				},
+			},
+			Fields: []Field{{
+				Key:   "f",
+				Value: 1,
+			}},
+		}},
+	})
 }
--- a/app/vminsert/influx/parser_timing_test.go
+++ b/app/vminsert/influx/parser_timing_test.go
@@ -6,14 +6,19 @@ import (
 )

 func BenchmarkRowsUnmarshal(b *testing.B) {
-	s := `cpu usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 1234556768`
+	s := `cpu usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 1234556768
+cpu usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 123455676344
+aaa usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 123455676344
+bbb usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 123455676344
+`
 	b.SetBytes(int64(len(s)))
 	b.ReportAllocs()
 	b.RunParallel(func(pb *testing.PB) {
 		var rows Rows
 		for pb.Next() {
-			if err := rows.Unmarshal(s); err != nil {
-				panic(fmt.Errorf("cannot unmarshal %q: %s", s, err))
+			rows.Unmarshal(s)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of rows parsed; got %d; want 4", len(rows.Rows)))
 			}
 		}
 	})
--- a/app/vminsert/influx/request_handler.go
+++ b/app/vminsert/influx/request_handler.go
@@ -1,8 +1,7 @@
 package influx

 import (
-	"bytes"
-	"compress/gzip"
+	"flag"
 	"fmt"
 	"io"
 	"net/http"
@@ -12,32 +11,43 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
 	"github.com/VictoriaMetrics/metrics"
 )

-var rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="influx"}`)
+var (
+	measurementFieldSeparator = flag.String("influxMeasurementFieldSeparator", "_", "Separator for `{measurement}{separator}{field_name}` metric name when inserted via Influx line protocol")
+	skipSingleField           = flag.Bool("influxSkipSingleField", false, "Uses `{measurement}` instead of `{measurement}{separator}{field_name}` for metic name if Influx line contains only a single field")
+)
+
+var (
+	rowsInserted  = tenantmetrics.NewCounterMap(`vm_rows_inserted_total{type="influx"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="influx"}`)
+)

 // InsertHandler processes remote write for influx line protocol.
 //
 // See https://github.com/influxdata/influxdb/blob/4cbdc197b8117fee648d62e2e5be75c6575352f0/tsdb/README.md
-func InsertHandler(req *http.Request) error {
+func InsertHandler(at *auth.Token, req *http.Request) error {
 	return concurrencylimiter.Do(func() error {
-		return insertHandlerInternal(req)
+		return insertHandlerInternal(at, req)
 	})
 }

-func insertHandlerInternal(req *http.Request) error {
+func insertHandlerInternal(at *auth.Token, req *http.Request) error {
 	influxReadCalls.Inc()

 	r := req.Body
 	if req.Header.Get("Content-Encoding") == "gzip" {
-		zr, err := getGzipReader(r)
+		zr, err := common.GetGzipReader(r)
 		if err != nil {
 			return fmt.Errorf("cannot read gzipped influx line protocol data: %s", err)
 		}
-		defer putGzipReader(zr)
+		defer common.PutGzipReader(zr)
 		r = zr
 	}

@@ -64,97 +74,76 @@ func insertHandlerInternal(req *http.Request) error {
 	ctx := getPushCtx()
 	defer putPushCtx(ctx)
 	for ctx.Read(r, tsMultiplier) {
-		if err := ctx.InsertRows(db); err != nil {
+		if err := ctx.InsertRows(at, db); err != nil {
 			return err
 		}
 	}
 	return ctx.Error()
 }

-func (ctx *pushCtx) InsertRows(db string) error {
+func (ctx *pushCtx) InsertRows(at *auth.Token, db string) error {
 	rows := ctx.Rows.Rows
-	rowsLen := 0
-	for i := range rows {
-		rowsLen += len(rows[i].Tags)
-	}
 	ic := &ctx.Common
-	ic.Reset(rowsLen)
+	ic.Reset()
+	rowsTotal := 0
 	for i := range rows {
 		r := &rows[i]
 		ic.Labels = ic.Labels[:0]
-		ic.AddLabel("db", db)
+		hasDBLabel := false
 		for j := range r.Tags {
 			tag := &r.Tags[j]
+			if tag.Key == "db" {
+				hasDBLabel = true
+			}
 			ic.AddLabel(tag.Key, tag.Value)
 		}
-		ctx.metricNameBuf = storage.MarshalMetricNameRaw(ctx.metricNameBuf[:0], ic.Labels)
+		if len(db) > 0 && !hasDBLabel {
+			ic.AddLabel("db", db)
+		}
+		ic.MetricNameBuf = storage.MarshalMetricNameRaw(ic.MetricNameBuf[:0], at.AccountID, at.ProjectID, ic.Labels)
+		metricNameBufLen := len(ic.MetricNameBuf)
 		ctx.metricGroupBuf = append(ctx.metricGroupBuf[:0], r.Measurement...)
-		ctx.metricGroupBuf = append(ctx.metricGroupBuf, '.')
+		skipFieldKey := len(r.Fields) == 1 && *skipSingleField
+		if !skipFieldKey {
+			ctx.metricGroupBuf = append(ctx.metricGroupBuf, *measurementFieldSeparator...)
+		}
 		metricGroupPrefixLen := len(ctx.metricGroupBuf)
+		ic.AddLabel("", "placeholder")
+		placeholderLabel := &ic.Labels[len(ic.Labels)-1]
 		for j := range r.Fields {
 			f := &r.Fields[j]
-			ctx.metricGroupBuf = append(ctx.metricGroupBuf[:metricGroupPrefixLen], f.Key...)
+			if !skipFieldKey {
+				ctx.metricGroupBuf = append(ctx.metricGroupBuf[:metricGroupPrefixLen], f.Key...)
+			}
 			metricGroup := bytesutil.ToUnsafeString(ctx.metricGroupBuf)
-			ic.Labels = ic.Labels[:0]
+			ic.Labels = ic.Labels[:len(ic.Labels)-1]
 			ic.AddLabel("", metricGroup)
-			ic.WriteDataPoint(ctx.metricNameBuf, ic.Labels[:1], r.Timestamp, f.Value)
+			ic.MetricNameBuf = storage.MarshalMetricLabelRaw(ic.MetricNameBuf[:metricNameBufLen], placeholderLabel)
+			storageNodeIdx := ic.GetStorageNodeIdx(at, ic.Labels)
+			if err := ic.WriteDataPointExt(at, storageNodeIdx, ic.MetricNameBuf, r.Timestamp, f.Value); err != nil {
+				return err
+			}
 		}
-		rowsInserted.Add(len(r.Fields))
+		rowsTotal += len(r.Fields)
 	}
+	rowsInserted.Get(at).Add(rowsTotal)
+	rowsPerInsert.Update(float64(rowsTotal))
 	return ic.FlushBufs()
 }

-func getGzipReader(r io.Reader) (*gzip.Reader, error) {
-	v := gzipReaderPool.Get()
-	if v == nil {
-		return gzip.NewReader(r)
-	}
-	zr := v.(*gzip.Reader)
-	if err := zr.Reset(r); err != nil {
-		return nil, err
-	}
-	return zr, nil
-}
-
-func putGzipReader(zr *gzip.Reader) {
-	_ = zr.Close()
-	gzipReaderPool.Put(zr)
-}
-
-var gzipReaderPool sync.Pool
-
-const maxReadPacketSize = 4 * 1024 * 1024
-
 func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
 	if ctx.err != nil {
 		return false
 	}
-	lr := io.LimitReader(r, maxReadPacketSize)
-	ctx.reqBuf.Reset()
-	ctx.reqBuf.B = append(ctx.reqBuf.B[:0], ctx.tailBuf...)
-	n, err := io.CopyBuffer(&ctx.reqBuf, lr, ctx.copyBuf[:])
-	if err != nil {
-		influxReadErrors.Inc()
-		ctx.err = fmt.Errorf("cannot read influx line protocol data: %s", err)
-		return false
-	}
-	if n < maxReadPacketSize {
-		// Mark the end of stream.
-		ctx.err = io.EOF
-	}
-
-	// Parse all the rows until the last newline in ctx.reqBuf.B
-	nn := bytes.LastIndexByte(ctx.reqBuf.B, '\n')
-	ctx.tailBuf = ctx.tailBuf[:0]
-	if nn >= 0 {
-		ctx.tailBuf = append(ctx.tailBuf[:0], ctx.reqBuf.B[nn+1:]...)
-		ctx.reqBuf.B = ctx.reqBuf.B[:nn]
-	}
-	if err = ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf.B)); err != nil {
-		influxUnmarshalErrors.Inc()
-		ctx.err = fmt.Errorf("cannot unmarshal influx line protocol data with size %d: %s", len(ctx.reqBuf.B), err)
+	ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlock(r, ctx.reqBuf, ctx.tailBuf)
+	if ctx.err != nil {
+		if ctx.err != io.EOF {
+			influxReadErrors.Inc()
+			ctx.err = fmt.Errorf("cannot read influx line protocol data: %s", ctx.err)
+		}
 		return false
 	}
+	ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))

 	// Adjust timestamps according to tsMultiplier
 	currentTs := time.Now().UnixNano() / 1e6
@@ -169,6 +158,7 @@ func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
 		}
 	} else if tsMultiplier < 0 {
 		tsMultiplier = -tsMultiplier
+		currentTs -= currentTs % tsMultiplier
 		for i := range ctx.Rows.Rows {
 			row := &ctx.Rows.Rows[i]
 			if row.Timestamp == 0 {
@@ -182,19 +172,16 @@ func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
 }

 var (
-	influxReadCalls       = metrics.NewCounter(`vm_read_calls_total{name="influx"}`)
-	influxReadErrors      = metrics.NewCounter(`vm_read_errors_total{name="influx"}`)
-	influxUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="influx"}`)
+	influxReadCalls  = metrics.NewCounter(`vm_read_calls_total{name="influx"}`)
+	influxReadErrors = metrics.NewCounter(`vm_read_errors_total{name="influx"}`)
 )

 type pushCtx struct {
 	Rows   Rows
-	Common common.InsertCtx
+	Common netstorage.InsertCtx

-	reqBuf         bytesutil.ByteBuffer
+	reqBuf         []byte
 	tailBuf        []byte
-	copyBuf        [16 * 1024]byte
-	metricNameBuf  []byte
 	metricGroupBuf []byte

 	err error
@@ -209,11 +196,9 @@ func (ctx *pushCtx) Error() error {

 func (ctx *pushCtx) reset() {
 	ctx.Rows.Reset()
-	ctx.Common.Reset(0)
-
-	ctx.reqBuf.Reset()
+	ctx.Common.Reset()
+	ctx.reqBuf = ctx.reqBuf[:0]
 	ctx.tailBuf = ctx.tailBuf[:0]
-	ctx.metricNameBuf = ctx.metricNameBuf[:0]
 	ctx.metricGroupBuf = ctx.metricGroupBuf[:0]

 	ctx.err = nil
--- a/app/vminsert/main.go
+++ b/app/vminsert/main.go
@@ -1,69 +1,137 @@
-package vminsert
+package main

 import (
 	"flag"
 	"fmt"
 	"net/http"
-	"strings"
+	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/graphite"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/influx"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdb"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdbhttp"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/prometheus"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 )

 var (
-	graphiteListenAddr   = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
-	opentsdbListenAddr   = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB put messages. Usually :4242 must be set. Doesn't work if empty")
-	maxInsertRequestSize = flag.Int("maxInsertRequestSize", 32*1024*1024, "The maximum size of a single insert request in bytes")
+	graphiteListenAddr     = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
+	opentsdbListenAddr     = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB put messages. Usually :4242 must be set. Doesn't work if empty")
+	opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
+	httpListenAddr         = flag.String("httpListenAddr", ":8480", "Address to listen for http connections")
+	maxInsertRequestSize   = flag.Int("maxInsertRequestSize", 32*1024*1024, "The maximum size of a single insert request in bytes")
+	maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 30, "The maximum number of labels accepted per time series. Superflouos labels are dropped")
+	storageNodes           = flagutil.NewArray("storageNode", "Address of vmstorage nodes; usage: -storageNode=vmstorage-host1:8400 -storageNode=vmstorage-host2:8400")
 )

-// Init initializes vminsert.
-func Init() {
+func main() {
+	flag.Parse()
+	buildinfo.Init()
+	logger.Init()
+
+	logger.Infof("initializing netstorage for storageNodes %s...", *storageNodes)
+	startTime := time.Now()
+	if len(*storageNodes) == 0 {
+		logger.Fatalf("missing -storageNode arg")
+	}
+	netstorage.InitStorageNodes(*storageNodes)
+	logger.Infof("successfully initialized netstorage in %s", time.Since(startTime))
+
+	storage.SetMaxLabelsPerTimeseries(*maxLabelsPerTimeseries)
+
+	concurrencylimiter.Init()
 	if len(*graphiteListenAddr) > 0 {
 		go graphite.Serve(*graphiteListenAddr)
 	}
 	if len(*opentsdbListenAddr) > 0 {
 		go opentsdb.Serve(*opentsdbListenAddr)
 	}
-}
+	if len(*opentsdbHTTPListenAddr) > 0 {
+		go opentsdbhttp.Serve(*opentsdbHTTPListenAddr, int64(*maxInsertRequestSize))
+	}
+
+	go func() {
+		httpserver.Serve(*httpListenAddr, requestHandler)
+	}()
+
+	sig := procutil.WaitForSigterm()
+	logger.Infof("service received signal %s", sig)
+
+	logger.Infof("gracefully shutting down the service at %q", *httpListenAddr)
+	startTime = time.Now()
+	if err := httpserver.Stop(*httpListenAddr); err != nil {
+		logger.Fatalf("cannot stop the service: %s", err)
+	}
+	logger.Infof("successfully shut down the service in %s", time.Since(startTime))

-// Stop stops vminsert.
-func Stop() {
 	if len(*graphiteListenAddr) > 0 {
 		graphite.Stop()
 	}
 	if len(*opentsdbListenAddr) > 0 {
 		opentsdb.Stop()
 	}
+	if len(*opentsdbHTTPListenAddr) > 0 {
+		opentsdbhttp.Stop()
+	}
+
+	logger.Infof("shutting down neststorage...")
+	startTime = time.Now()
+	netstorage.Stop()
+	logger.Infof("successfully stopped netstorage in %s", time.Since(startTime))
+
+	fs.MustStopDirRemover()
+
+	logger.Infof("the vminsert has been stopped")
 }

-// RequestHandler is a handler for Prometheus remote storage write API
-func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
-	path := strings.Replace(r.URL.Path, "//", "/", -1)
-	switch path {
-	case "/api/v1/write":
+func requestHandler(w http.ResponseWriter, r *http.Request) bool {
+	p, err := httpserver.ParsePath(r.URL.Path)
+	if err != nil {
+		httpserver.Errorf(w, "cannot parse path %q: %s", r.URL.Path, err)
+		return true
+	}
+	if p.Prefix != "insert" {
+		// This is not our link.
+		return false
+	}
+	at, err := auth.NewToken(p.AuthToken)
+	if err != nil {
+		httpserver.Errorf(w, "auth error: %s", err)
+		return true
+	}
+
+	switch p.Suffix {
+	case "prometheus/", "prometheus", "prometheus/api/v1/write":
 		prometheusWriteRequests.Inc()
-		if err := prometheus.InsertHandler(r, int64(*maxInsertRequestSize)); err != nil {
+		if err := prometheus.InsertHandler(at, r, int64(*maxInsertRequestSize)); err != nil {
 			prometheusWriteErrors.Inc()
 			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
 			return true
 		}
 		w.WriteHeader(http.StatusNoContent)
 		return true
-	case "/write", "/api/v2/write":
+	case "influx/write", "influx/api/v2/write":
 		influxWriteRequests.Inc()
-		if err := influx.InsertHandler(r); err != nil {
+		if err := influx.InsertHandler(at, r); err != nil {
 			influxWriteErrors.Inc()
 			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
 			return true
 		}
 		w.WriteHeader(http.StatusNoContent)
 		return true
-	case "/query":
-		// Emulate fake response for influx query
+	case "influx/query":
+		// Emulate fake response for influx query.
+		// This is required for TSBS benchmark.
 		influxQueryRequests.Inc()
 		fmt.Fprintf(w, `{"results":[{"series":[{"values":[]}]}]}`)
 		return true
@@ -74,11 +142,11 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 }

 var (
-	prometheusWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/write", protocol="prometheus"}`)
-	prometheusWriteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/write", protocol="prometheus"}`)
+	prometheusWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/insert/{}/prometheus/", protocol="prometheus"}`)
+	prometheusWriteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/insert/{}/prometheus/", protocol="prometheus"}`)

-	influxWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/write", protocol="influx"}`)
-	influxWriteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/write", protocol="influx"}`)
+	influxWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/insert/{}/influx/", protocol="influx"}`)
+	influxWriteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/insert/{}/influx/", protocol="influx"}`)

-	influxQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/query", protocol="influx"}`)
+	influxQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/insert/{}/influx/query", protocol="influx"}`)
 )
--- a/app/vminsert/netstorage/insert_ctx.go
+++ b/app/vminsert/netstorage/insert_ctx.go
@@ -0,0 +1,163 @@
+package netstorage
+
+import (
+	"fmt"
+	"net/http"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/consts"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	xxhash "github.com/cespare/xxhash/v2"
+	jump "github.com/lithammer/go-jump-consistent-hash"
+)
+
+// InsertCtx is a generic context for inserting data
+type InsertCtx struct {
+	Labels        []prompb.Label
+	MetricNameBuf []byte
+
+	bufRowss  []bufRows
+	labelsBuf []byte
+}
+
+type bufRows struct {
+	buf  []byte
+	rows int
+}
+
+func (br *bufRows) pushTo(sn *storageNode) error {
+	bufLen := len(br.buf)
+	err := sn.push(br.buf, br.rows)
+	br.buf = br.buf[:0]
+	br.rows = 0
+	if err != nil {
+		return &httpserver.ErrorWithStatusCode{
+			Err:        fmt.Errorf("cannot send %d bytes to storageNode %q: %s", bufLen, sn.dialer.Addr(), err),
+			StatusCode: http.StatusServiceUnavailable,
+		}
+	}
+	return nil
+}
+
+// Reset resets ctx.
+func (ctx *InsertCtx) Reset() {
+	for _, label := range ctx.Labels {
+		label.Name = nil
+		label.Value = nil
+	}
+	ctx.Labels = ctx.Labels[:0]
+	ctx.MetricNameBuf = ctx.MetricNameBuf[:0]
+
+	if ctx.bufRowss == nil {
+		ctx.bufRowss = make([]bufRows, len(storageNodes))
+	}
+	for i := range ctx.bufRowss {
+		br := &ctx.bufRowss[i]
+		br.buf = br.buf[:0]
+		br.rows = 0
+	}
+	ctx.labelsBuf = ctx.labelsBuf[:0]
+}
+
+// AddLabel adds (name, value) label to ctx.Labels.
+//
+// name and value must exist until ctx.Labels is used.
+func (ctx *InsertCtx) AddLabel(name, value string) {
+	labels := ctx.Labels
+	if cap(labels) > len(labels) {
+		labels = labels[:len(labels)+1]
+	} else {
+		labels = append(labels, prompb.Label{})
+	}
+	label := &labels[len(labels)-1]
+
+	// Do not copy name and value contents for performance reasons.
+	// This reduces GC overhead on the number of objects and allocations.
+	label.Name = bytesutil.ToUnsafeBytes(name)
+	label.Value = bytesutil.ToUnsafeBytes(value)
+
+	ctx.Labels = labels
+}
+
+// WriteDataPoint writes (timestamp, value) data point with the given at and labels to ctx buffer.
+func (ctx *InsertCtx) WriteDataPoint(at *auth.Token, labels []prompb.Label, timestamp int64, value float64) error {
+	ctx.MetricNameBuf = storage.MarshalMetricNameRaw(ctx.MetricNameBuf[:0], at.AccountID, at.ProjectID, labels)
+	storageNodeIdx := ctx.GetStorageNodeIdx(at, labels)
+	return ctx.WriteDataPointExt(at, storageNodeIdx, ctx.MetricNameBuf, timestamp, value)
+}
+
+// WriteDataPointExt writes the given metricNameRaw with (timestmap, value) to ctx buffer with the given storageNodeIdx.
+func (ctx *InsertCtx) WriteDataPointExt(at *auth.Token, storageNodeIdx int, metricNameRaw []byte, timestamp int64, value float64) error {
+	br := &ctx.bufRowss[storageNodeIdx]
+	sn := storageNodes[storageNodeIdx]
+	bufNew := storage.MarshalMetricRow(br.buf, metricNameRaw, timestamp, value)
+	if len(bufNew) >= maxStorageNodeBufSize {
+		// Send buf to storageNode, since it is too big.
+		if err := br.pushTo(sn); err != nil {
+			return err
+		}
+		br.buf = storage.MarshalMetricRow(bufNew[:0], metricNameRaw, timestamp, value)
+	} else {
+		br.buf = bufNew
+	}
+	br.rows++
+	return nil
+}
+
+var maxStorageNodeBufSize = func() int {
+	n := 1024 * 1024
+	if n > consts.MaxInsertPacketSize {
+		n = consts.MaxInsertPacketSize
+	}
+	return n
+}()
+
+// FlushBufs flushes ctx bufs to remote storage nodes.
+func (ctx *InsertCtx) FlushBufs() error {
+	// Send per-storageNode bufs.
+	for i := range ctx.bufRowss {
+		br := &ctx.bufRowss[i]
+		if len(br.buf) == 0 {
+			continue
+		}
+		sn := storageNodes[i]
+		if err := br.pushTo(sn); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// GetStorageNodeIdx returns storage node index for the given at and labels.
+//
+// The returned index must be passed to WriteDataPoint.
+func (ctx *InsertCtx) GetStorageNodeIdx(at *auth.Token, labels []prompb.Label) int {
+	if len(storageNodes) == 1 {
+		// Fast path - only a single storage node.
+		return 0
+	}
+
+	buf := ctx.labelsBuf[:0]
+	buf = encoding.MarshalUint32(buf, at.AccountID)
+	buf = encoding.MarshalUint32(buf, at.ProjectID)
+	for i := range labels {
+		label := &labels[i]
+		buf = marshalBytesFast(buf, label.Name)
+		buf = marshalBytesFast(buf, label.Value)
+	}
+	h := xxhash.Sum64(buf)
+	ctx.labelsBuf = buf
+
+	idx := int(jump.Hash(h, int32(len(storageNodes))))
+	return idx
+}
+
+func marshalBytesFast(dst []byte, s []byte) []byte {
+	dst = encoding.MarshalUint16(dst, uint16(len(s)))
+	dst = append(dst, s...)
+	return dst
+}
--- a/app/vminsert/netstorage/netstorage.go
+++ b/app/vminsert/netstorage/netstorage.go
@@ -0,0 +1,461 @@
+package netstorage
+
+import (
+	"flag"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/consts"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/metrics"
+	xxhash "github.com/cespare/xxhash/v2"
+)
+
+var disableRPCCompression = flag.Bool(`rpc.disableCompression`, false, "Disable compression of RPC traffic. This reduces CPU usage at the cost of higher network bandwidth usage")
+
+// push pushes buf to sn.
+//
+// It falls back to sending data to another vmstorage node if sn is currently
+// unavailable.
+//
+// rows is the number of rows in the buf.
+func (sn *storageNode) push(buf []byte, rows int) error {
+	if len(buf) > consts.MaxInsertPacketSize {
+		logger.Panicf("BUG: len(buf)=%d cannot exceed %d", len(buf), consts.MaxInsertPacketSize)
+	}
+	sn.rowsPushed.Add(rows)
+
+	sn.mu.Lock()
+	defer sn.mu.Unlock()
+
+	if sn.broken {
+		// The vmstorage node is broken. Re-route buf to healthy vmstorage nodes.
+		if err := addToReroutedBuf(buf, rows); err != nil {
+			rowsLostTotal.Add(rows)
+			return err
+		}
+		sn.rowsReroutedFromHere.Add(rows)
+		return nil
+	}
+
+	if len(sn.buf)+len(buf) <= consts.MaxInsertPacketSize {
+		// Fast path: the buf contents fits sn.buf.
+		sn.buf = append(sn.buf, buf...)
+		sn.rows += rows
+		return nil
+	}
+
+	// Slow path: the buf contents doesn't fit sn.buf.
+	// Flush sn.buf to vmstorage and then add buf to sn.buf.
+	if err := sn.flushBufLocked(); err != nil {
+		// Failed to flush or re-route sn.buf to vmstorage nodes.
+		// The sn.buf is already dropped by flushBufLocked.
+		// Drop buf too, since there is litte sense in trying to rescue it.
+		rowsLostTotal.Add(rows)
+		return err
+	}
+
+	// Successful flush.
+	sn.buf = append(sn.buf, buf...)
+	sn.rows += rows
+	return nil
+}
+
+func (sn *storageNode) sendReroutedRow(buf []byte) error {
+	sn.mu.Lock()
+	defer sn.mu.Unlock()
+
+	if sn.broken {
+		return errBrokenStorageNode
+	}
+	if len(sn.buf)+len(buf) > consts.MaxInsertPacketSize {
+		return fmt.Errorf("cannot put %d bytes into vmstorage buffer, since its size cannot exceed %d bytes", len(sn.buf)+len(buf), consts.MaxInsertPacketSize)
+	}
+	sn.buf = append(sn.buf, buf...)
+	sn.rows++
+	return nil
+}
+
+var errBrokenStorageNode = fmt.Errorf("the vmstorage node is temporarily broken")
+
+func (sn *storageNode) flushBufLocked() error {
+	if err := sn.sendBufLocked(sn.buf); err == nil {
+		// Successful flush. Remove broken flag.
+		sn.broken = false
+		sn.rowsSent.Add(sn.rows)
+		sn.buf = sn.buf[:0]
+		sn.rows = 0
+		return nil
+	}
+
+	// Couldn't flush sn.buf to vmstorage. Mark sn as broken
+	// and try re-routing sn.buf to healthy vmstorage nodes.
+	sn.broken = true
+	err := addToReroutedBuf(sn.buf, sn.rows)
+	if err != nil {
+		rowsLostTotal.Add(sn.rows)
+	}
+	sn.buf = sn.buf[:0]
+	sn.rows = 0
+	return err
+}
+
+func (sn *storageNode) sendBufLocked(buf []byte) error {
+	// sizeBuf guarantees that the rows batch will be either fully
+	// read or fully discarded on the vmstorage side.
+	// sizeBuf is used for read optimization in vmstorage.
+	if sn.bc == nil {
+		if err := sn.dial(); err != nil {
+			return fmt.Errorf("cannot dial %q: %s", sn.dialer.Addr(), err)
+		}
+	}
+	if len(buf) == 0 {
+		return nil
+	}
+	deadline := time.Now().Add(30 * time.Second)
+	if err := sn.bc.SetWriteDeadline(deadline); err != nil {
+		sn.closeBrokenConn()
+		return fmt.Errorf("cannot set write deadline to %s: %s", deadline, err)
+	}
+	sn.sizeBuf = encoding.MarshalUint64(sn.sizeBuf[:0], uint64(len(buf)))
+	if _, err := sn.bc.Write(sn.sizeBuf); err != nil {
+		sn.closeBrokenConn()
+		return fmt.Errorf("cannot write data size %d: %s", len(buf), err)
+	}
+	if _, err := sn.bc.Write(buf); err != nil {
+		sn.closeBrokenConn()
+		return fmt.Errorf("cannot write data: %s", err)
+	}
+	if err := sn.bc.Flush(); err != nil {
+		sn.closeBrokenConn()
+		return fmt.Errorf("cannot flush data: %s", err)
+	}
+	return nil
+}
+
+func (sn *storageNode) dial() error {
+	c, err := sn.dialer.Dial()
+	if err != nil {
+		sn.dialErrors.Inc()
+		return err
+	}
+	compressionLevel := 1
+	if *disableRPCCompression {
+		compressionLevel = 0
+	}
+	bc, err := handshake.VMInsertClient(c, compressionLevel)
+	if err != nil {
+		_ = c.Close()
+		sn.handshakeErrors.Inc()
+		return fmt.Errorf("handshake error: %s", err)
+	}
+	sn.bc = bc
+	return nil
+}
+
+func (sn *storageNode) closeBrokenConn() {
+	_ = sn.bc.Close()
+	sn.bc = nil
+	sn.connectionErrors.Inc()
+}
+
+func (sn *storageNode) run(stopCh <-chan struct{}) {
+	t := time.NewTimer(time.Second)
+	mustStop := false
+	for !mustStop {
+		select {
+		case <-stopCh:
+			mustStop = true
+			// Make sure flushBufLocked is called last time before returning
+			// in order to send the remaining bits of data.
+		case <-t.C:
+		}
+
+		sn.mu.Lock()
+		if err := sn.flushBufLocked(); err != nil {
+			sn.closeBrokenConn()
+			logger.Errorf("cannot flush data to storageNode %q: %s", sn.dialer.Addr(), err)
+		}
+		sn.mu.Unlock()
+
+		t.Reset(time.Second)
+	}
+	t.Stop()
+}
+
+func rerouteWorker(stopCh <-chan struct{}) {
+	t := time.NewTimer(time.Second)
+	var buf []byte
+	mustStop := false
+	for !mustStop {
+		select {
+		case <-stopCh:
+			mustStop = true
+			// Make sure spreadReroutedBufToStorageNodes is called last time before returning
+			// in order to reroute the remaining data to healthy vmstorage nodes.
+		case <-t.C:
+		}
+
+		var err error
+		buf, err = spreadReroutedBufToStorageNodes(buf[:0])
+		if err != nil {
+			rerouteErrors.Inc()
+			logger.Errorf("cannot reroute data among healthy vmstorage nodes: %s", err)
+		}
+		t.Reset(time.Second)
+	}
+	t.Stop()
+}
+
+// storageNode is a client sending data to vmstorage node.
+type storageNode struct {
+	mu sync.Mutex
+
+	// Buffer with data that needs to be written to vmstorage node.
+	buf []byte
+
+	// The number of rows buf contains at the moment.
+	rows int
+
+	// Temporary buffer for encoding marshaled buf size.
+	sizeBuf []byte
+
+	// broken is set to true if the given vmstorage node is temporarily unhealthy.
+	// In this case the data is re-routed to the remaining healthy vmstorage nodes.
+	broken bool
+
+	dialer *netutil.TCPDialer
+
+	bc *handshake.BufferedConn
+
+	// The number of dial errors to vmstorage node.
+	dialErrors *metrics.Counter
+
+	// The number of handshake errors to vmstorage node.
+	handshakeErrors *metrics.Counter
+
+	// The number of connection errors to vmstorage node.
+	connectionErrors *metrics.Counter
+
+	// The number of rows pushed to storageNode with push method.
+	rowsPushed *metrics.Counter
+
+	// The number of rows sent to vmstorage node.
+	rowsSent *metrics.Counter
+
+	// The number of rows rerouted from the given vmstorage node
+	// to healthy nodes when the given node was unhealthy.
+	rowsReroutedFromHere *metrics.Counter
+
+	// The number of rows rerouted to the given vmstorage node
+	// from other nodes when they were unhealthy.
+	rowsReroutedToHere *metrics.Counter
+}
+
+// storageNodes contains a list of vmstorage node clients.
+var storageNodes []*storageNode
+
+var (
+	storageNodesWG  sync.WaitGroup
+	rerouteWorkerWG sync.WaitGroup
+)
+
+var (
+	storageNodesStopCh  = make(chan struct{})
+	rerouteWorkerStopCh = make(chan struct{})
+)
+
+// InitStorageNodes initializes vmstorage nodes' connections to the given addrs.
+func InitStorageNodes(addrs []string) {
+	if len(addrs) == 0 {
+		logger.Panicf("BUG: addrs must be non-empty")
+	}
+	if len(addrs) > 255 {
+		logger.Panicf("BUG: too much addresses: %d; max supported %d addresses", len(addrs), 255)
+	}
+
+	for _, addr := range addrs {
+		sn := &storageNode{
+			dialer: netutil.NewTCPDialer("vminsert", addr),
+
+			dialErrors:           metrics.NewCounter(fmt.Sprintf(`vm_rpc_dial_errors_total{name="vminsert", addr=%q}`, addr)),
+			handshakeErrors:      metrics.NewCounter(fmt.Sprintf(`vm_rpc_handshake_errors_total{name="vminsert", addr=%q}`, addr)),
+			connectionErrors:     metrics.NewCounter(fmt.Sprintf(`vm_rpc_connection_errors_total{name="vminsert", addr=%q}`, addr)),
+			rowsPushed:           metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_pushed_total{name="vminsert", addr=%q}`, addr)),
+			rowsSent:             metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_sent_total{name="vminsert", addr=%q}`, addr)),
+			rowsReroutedFromHere: metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_rerouted_from_here_total{name="vminsert", addr=%q}`, addr)),
+			rowsReroutedToHere:   metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_rerouted_to_here_total{name="vminsert", addr=%q}`, addr)),
+		}
+		_ = metrics.NewGauge(fmt.Sprintf(`vm_rpc_rows_pending{name="vminsert", addr=%q}`, addr), func() float64 {
+			sn.mu.Lock()
+			n := sn.rows
+			sn.mu.Unlock()
+			return float64(n)
+		})
+		_ = metrics.NewGauge(fmt.Sprintf(`vm_rpc_buf_pending_bytes{name="vminsert", addr=%q}`, addr), func() float64 {
+			sn.mu.Lock()
+			n := len(sn.buf)
+			sn.mu.Unlock()
+			return float64(n)
+		})
+		storageNodes = append(storageNodes, sn)
+		storageNodesWG.Add(1)
+		go func(addr string) {
+			sn.run(storageNodesStopCh)
+			storageNodesWG.Done()
+		}(addr)
+	}
+
+	reroutedBufMaxSize = memory.Allowed() / 8
+	rerouteWorkerWG.Add(1)
+	go func() {
+		rerouteWorker(rerouteWorkerStopCh)
+		rerouteWorkerWG.Done()
+	}()
+}
+
+// Stop gracefully stops netstorage.
+func Stop() {
+	close(rerouteWorkerStopCh)
+	rerouteWorkerWG.Wait()
+
+	close(storageNodesStopCh)
+	storageNodesWG.Wait()
+}
+
+func addToReroutedBuf(buf []byte, rows int) error {
+	reroutedLock.Lock()
+	defer reroutedLock.Unlock()
+	if len(reroutedBuf)+len(buf) > reroutedBufMaxSize {
+		reroutedBufOverflows.Inc()
+		return fmt.Errorf("%d rows dropped because of reroutedBuf overflows %d bytes", rows, reroutedBufMaxSize)
+	}
+	reroutedBuf = append(reroutedBuf, buf...)
+	reroutedRows += rows
+	reroutesTotal.Inc()
+	return nil
+}
+
+func spreadReroutedBufToStorageNodes(swapBuf []byte) ([]byte, error) {
+	healthyStorageNodes := getHealthyStorageNodes()
+	if len(healthyStorageNodes) == 0 {
+		// No more vmstorage nodes to write data to.
+		return swapBuf, fmt.Errorf("all the storage nodes are unhealthy")
+	}
+
+	reroutedLock.Lock()
+	reroutedBuf, swapBuf = swapBuf[:0], reroutedBuf
+	rows := reroutedRows
+	reroutedRows = 0
+	reroutedLock.Unlock()
+
+	if len(swapBuf) == 0 {
+		// Nothing to re-route.
+		return swapBuf, nil
+	}
+
+	var mr storage.MetricRow
+	src := swapBuf
+	rowsProcessed := 0
+	for len(src) > 0 {
+		tail, err := mr.Unmarshal(src)
+		if err != nil {
+			logger.Panicf("BUG: cannot unmarshal recently marshaled MetricRow: %s", err)
+		}
+		rowBuf := src[:len(src)-len(tail)]
+		src = tail
+
+		// Use non-consistent hashing instead of jump hash in order to re-route rows
+		// equally among healthy vmstorage nodes.
+		// This should spread the increased load among healthy vmstorage nodes.
+		h := xxhash.Sum64(mr.MetricNameRaw)
+		idx := h % uint64(len(healthyStorageNodes))
+		attempts := 0
+		for {
+			sn := healthyStorageNodes[idx]
+			err := sn.sendReroutedRow(rowBuf)
+			if err == nil {
+				sn.rowsReroutedToHere.Inc()
+				break
+			}
+
+			// Cannot send data to sn. Try sending to the next vmstorage node.
+			idx++
+			if idx >= uint64(len(healthyStorageNodes)) {
+				idx = 0
+			}
+			attempts++
+			if attempts == len(healthyStorageNodes) {
+				// There are no healthy nodes.
+				// Try returning the remaining data to reroutedBuf if it has enough free space.
+				rowsRemaining := rows - rowsProcessed
+				recovered := false
+				reroutedLock.Lock()
+				if len(rowBuf)+len(tail)+len(reroutedBuf) <= reroutedBufMaxSize {
+					swapBuf = append(swapBuf[:0], rowBuf...)
+					swapBuf = append(swapBuf, tail...)
+					swapBuf = append(swapBuf, reroutedBuf...)
+					reroutedBuf, swapBuf = swapBuf, reroutedBuf[:0]
+					reroutedRows += rowsRemaining
+					recovered = true
+				}
+				reroutedLock.Unlock()
+				if recovered {
+					return swapBuf, nil
+				}
+				rowsLostTotal.Add(rowsRemaining)
+				return swapBuf, fmt.Errorf("all the %d vmstorage nodes are unavailable; lost %d rows; last error: %s", len(storageNodes), rowsRemaining, err)
+			}
+		}
+		rowsProcessed++
+	}
+	if rowsProcessed != rows {
+		logger.Panicf("BUG: unexpected number of rows processed; got %d; want %d", rowsProcessed, rows)
+	}
+	reroutedRowsProcessed.Add(rowsProcessed)
+	return swapBuf, nil
+}
+
+var (
+	reroutedLock       sync.Mutex
+	reroutedBuf        []byte
+	reroutedRows       int
+	reroutedBufMaxSize int
+
+	reroutedRowsProcessed = metrics.NewCounter(`vm_rpc_rerouted_rows_processed_total{name="vminsert"}`)
+	reroutedBufOverflows  = metrics.NewCounter(`vm_rpc_rerouted_buf_overflows_total{name="vminsert"}`)
+	reroutesTotal         = metrics.NewCounter(`vm_rpc_reroutes_total{name="vminsert"}`)
+	_                     = metrics.NewGauge(`vm_rpc_rerouted_rows_pending{name="vminsert"}`, func() float64 {
+		reroutedLock.Lock()
+		n := reroutedRows
+		reroutedLock.Unlock()
+		return float64(n)
+	})
+	_ = metrics.NewGauge(`vm_rpc_rerouted_buf_pending_bytes{name="vminsert"}`, func() float64 {
+		reroutedLock.Lock()
+		n := len(reroutedBuf)
+		reroutedLock.Unlock()
+		return float64(n)
+	})
+
+	rerouteErrors = metrics.NewCounter(`vm_rpc_reroute_errors_total{name="vminsert"}`)
+	rowsLostTotal = metrics.NewCounter(`vm_rpc_rows_lost_total{name="vminsert"}`)
+)
+
+func getHealthyStorageNodes() []*storageNode {
+	sns := make([]*storageNode, 0, len(storageNodes)-1)
+	for _, sn := range storageNodes {
+		sn.mu.Lock()
+		if !sn.broken {
+			sns = append(sns, sn)
+		}
+		sn.mu.Unlock()
+	}
+	return sns
+}
--- a/app/vminsert/opentsdb/parser.go
+++ b/app/vminsert/opentsdb/parser.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"strings"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/fastjson/fastfloat"
 )

@@ -34,13 +36,8 @@ func (rs *Rows) Reset() {
 // See http://opentsdb.net/docs/build/html/api_telnet/put.html
 //
 // s must be unchanged until rs is in use.
-func (rs *Rows) Unmarshal(s string) error {
-	var err error
-	rs.Rows, rs.tagsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
-	if err != nil {
-		return err
-	}
-	return err
+func (rs *Rows) Unmarshal(s string) {
+	rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
 }

 // Row is a single OpenTSDB row.
@@ -69,6 +66,9 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
 		return tagsPool, fmt.Errorf("cannot find whitespace between metric and timestamp in %q", s)
 	}
 	r.Metric = s[:n]
+	if len(r.Metric) == 0 {
+		return tagsPool, fmt.Errorf("metric cannot be empty")
+	}
 	tail := s[n+1:]
 	n = strings.IndexByte(tail, ' ')
 	if n < 0 {
@@ -92,39 +92,46 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
 	return tagsPool, nil
 }

-func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag, error) {
+func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
 	for len(s) > 0 {
 		n := strings.IndexByte(s, '\n')
-		if n == 0 {
-			// Skip empty line
-			s = s[1:]
-			continue
-		}
-		if cap(dst) > len(dst) {
-			dst = dst[:len(dst)+1]
-		} else {
-			dst = append(dst, Row{})
-		}
-		r := &dst[len(dst)-1]
 		if n < 0 {
 			// The last line.
-			var err error
-			tagsPool, err = r.unmarshal(s, tagsPool)
-			if err != nil {
-				return dst, tagsPool, err
-			}
-			return dst, tagsPool, nil
-		}
-		var err error
-		tagsPool, err = r.unmarshal(s[:n], tagsPool)
-		if err != nil {
-			return dst, tagsPool, err
+			return unmarshalRow(dst, s, tagsPool)
 		}
+		dst, tagsPool = unmarshalRow(dst, s[:n], tagsPool)
 		s = s[n+1:]
 	}
-	return dst, tagsPool, nil
+	return dst, tagsPool
 }

+func unmarshalRow(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
+	if len(s) > 0 && s[len(s)-1] == '\r' {
+		s = s[:len(s)-1]
+	}
+	if len(s) == 0 {
+		// Skip empty line
+		return dst, tagsPool
+	}
+
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	var err error
+	tagsPool, err = r.unmarshal(s, tagsPool)
+	if err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal OpenTSDB line %q: %s", s, err)
+		invalidLines.Inc()
+	}
+	return dst, tagsPool
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="opentsdb"}`)
+
 func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
 	for {
 		if cap(dst) > len(dst) {
@@ -140,12 +147,20 @@ func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
 			if err := tag.unmarshal(s); err != nil {
 				return dst[:len(dst)-1], err
 			}
+			if len(tag.Key) == 0 || len(tag.Value) == 0 {
+				// Skip empty tag
+				dst = dst[:len(dst)-1]
+			}
 			return dst, nil
 		}
 		if err := tag.unmarshal(s[:n]); err != nil {
 			return dst[:len(dst)-1], err
 		}
 		s = s[n+1:]
+		if len(tag.Key) == 0 || len(tag.Value) == 0 {
+			// Skip empty tag
+			dst = dst[:len(dst)-1]
+		}
 	}
 }

@@ -167,9 +182,6 @@ func (t *Tag) unmarshal(s string) error {
 		return fmt.Errorf("missing tag value for %q", s)
 	}
 	t.Key = s[:n]
-	if len(t.Key) == 0 {
-		return fmt.Errorf("tag key cannot be empty for %q", s)
-	}
 	t.Value = s[n+1:]
 	return nil
 }
--- a/app/vminsert/opentsdb/parser_test.go
+++ b/app/vminsert/opentsdb/parser_test.go
@@ -9,19 +9,24 @@ func TestRowsUnmarshalFailure(t *testing.T) {
 	f := func(s string) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
 		}

 		// Try again
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
 		}
 	}

 	// Missing put prefix
 	f("xx")

+	// Missing metric
+	f("put  111 34")
+
 	// Missing timestamp
 	f("put aaa")

@@ -42,26 +47,19 @@ func TestRowsUnmarshalFailure(t *testing.T) {

 	// Invalid tag
 	f("put aaa 123 4.5 foo")
-	f("put aaa 123 4.5 =")
-	f("put aaa 123 4.5 =foo")
-	f("put aaa 123 4.5 =foo a=b")
 }

 func TestRowsUnmarshalSuccess(t *testing.T) {
 	f := func(s string, rowsExpected *Rows) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}

 		// Try unmarshaling again
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}
@@ -74,7 +72,9 @@ func TestRowsUnmarshalSuccess(t *testing.T) {

 	// Empty line
 	f("", &Rows{})
+	f("\r", &Rows{})
 	f("\n\n", &Rows{})
+	f("\n\r\n", &Rows{})

 	// Single line
 	f("put foobar 789 -123.456 a=b", &Rows{
@@ -88,17 +88,13 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			}},
 		}},
 	})
-	// Empty tag value
-	f("put foobar 789 -123.456 a= b=c", &Rows{
+	// Empty tag
+	f("put foobar 789 -123.456 a= b=c =d", &Rows{
 		Rows: []Row{{
 			Metric:    "foobar",
 			Value:     -123.456,
 			Timestamp: 789,
 			Tags: []Tag{
-				{
-					Key:   "a",
-					Value: "",
-				},
 				{
 					Key:   "b",
 					Value: "c",
@@ -200,4 +196,27 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			},
 		},
 	})
+	// Multi lines with invalid line
+	f("put foo 2 0.3 a=b\naaa bbb\nput bar.baz 43 0.34 a=b\n", &Rows{
+		Rows: []Row{
+			{
+				Metric:    "foo",
+				Value:     0.3,
+				Timestamp: 2,
+				Tags: []Tag{{
+					Key:   "a",
+					Value: "b",
+				}},
+			},
+			{
+				Metric:    "bar.baz",
+				Value:     0.34,
+				Timestamp: 43,
+				Tags: []Tag{{
+					Key:   "a",
+					Value: "b",
+				}},
+			},
+		},
+	})
 }
--- a/app/vminsert/opentsdb/parser_timing_test.go
+++ b/app/vminsert/opentsdb/parser_timing_test.go
@@ -6,18 +6,19 @@ import (
 )

 func BenchmarkRowsUnmarshal(b *testing.B) {
-	s := `cpu.usage_user 1234556768 1.23 a=b
-cpu.usage_system 1234556768 23.344 a=b
-cpu.usage_iowait 1234556769 3.3443 a=b
-cpu.usage_irq 1234556768 0.34432 a=b
+	s := `put cpu.usage_user 1234556768 1.23 a=b
+put cpu.usage_system 1234556768 23.344 a=b
+put cpu.usage_iowait 1234556769 3.3443 a=b
+put cpu.usage_irq 1234556768 0.34432 a=b
 `
 	b.SetBytes(int64(len(s)))
 	b.ReportAllocs()
 	b.RunParallel(func(pb *testing.PB) {
 		var rows Rows
 		for pb.Next() {
-			if err := rows.Unmarshal(s); err != nil {
-				panic(fmt.Errorf("cannot unmarshal %q: %s", s, err))
+			rows.Unmarshal(s)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of parsed rows; got %d; want 4", len(rows.Rows)))
 			}
 		}
 	})
--- a/app/vminsert/opentsdb/request_handler.go
+++ b/app/vminsert/opentsdb/request_handler.go
@@ -1,7 +1,6 @@
 package opentsdb

 import (
-	"bytes"
 	"fmt"
 	"io"
 	"net"
@@ -11,52 +10,72 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
 	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/fastjson/fastfloat"
 )

-var rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="opentsdb"}`)
+var (
+	rowsInserted  = tenantmetrics.NewCounterMap(`vm_rows_inserted_total{type="opentsdb"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="opentsdb"}`)
+)

 // insertHandler processes remote write for OpenTSDB put protocol.
 //
 // See http://opentsdb.net/docs/build/html/api_telnet/put.html
-func insertHandler(r io.Reader) error {
+func insertHandler(at *auth.Token, r io.Reader) error {
 	return concurrencylimiter.Do(func() error {
-		return insertHandlerInternal(r)
+		return insertHandlerInternal(at, r)
 	})
 }

-func insertHandlerInternal(r io.Reader) error {
+func insertHandlerInternal(at *auth.Token, r io.Reader) error {
 	ctx := getPushCtx()
 	defer putPushCtx(ctx)
 	for ctx.Read(r) {
-		if err := ctx.InsertRows(); err != nil {
+		if err := ctx.InsertRows(at); err != nil {
 			return err
 		}
 	}
 	return ctx.Error()
 }

-func (ctx *pushCtx) InsertRows() error {
+func (ctx *pushCtx) InsertRows(at *auth.Token) error {
 	rows := ctx.Rows.Rows
 	ic := &ctx.Common
-	ic.Reset(len(rows))
+	ic.Reset()
+	atCopy := *at
 	for i := range rows {
 		r := &rows[i]
 		ic.Labels = ic.Labels[:0]
 		ic.AddLabel("", r.Metric)
 		for j := range r.Tags {
 			tag := &r.Tags[j]
+			if atCopy.AccountID == 0 {
+				// Multi-tenancy support via custom tags.
+				// Do not allow overriding AccountID and ProjectID from atCopy for security reasons.
+				if tag.Key == "VictoriaMetrics_AccountID" {
+					atCopy.AccountID = uint32(fastfloat.ParseUint64BestEffort(tag.Value))
+				}
+				if atCopy.ProjectID == 0 && tag.Key == "VictoriaMetrics_ProjectID" {
+					atCopy.ProjectID = uint32(fastfloat.ParseUint64BestEffort(tag.Value))
+				}
+			}
 			ic.AddLabel(tag.Key, tag.Value)
 		}
-		ic.WriteDataPoint(nil, ic.Labels, r.Timestamp, r.Value)
+		if err := ic.WriteDataPoint(&atCopy, ic.Labels, r.Timestamp, r.Value); err != nil {
+			return err
+		}
 	}
-	rowsInserted.Add(len(rows))
+	// Assume that all the rows for a single connection belong to the same (AccountID, ProjectID).
+	rowsInserted.Get(&atCopy).Add(len(rows))
+	rowsPerInsert.Update(float64(len(rows)))
 	return ic.FlushBufs()
 }

-const maxReadPacketSize = 4 * 1024 * 1024
-
 const flushTimeout = 3 * time.Second

 func (ctx *pushCtx) Read(r io.Reader) bool {
@@ -71,50 +90,44 @@ func (ctx *pushCtx) Read(r io.Reader) bool {
 			return false
 		}
 	}
-	lr := io.LimitReader(r, maxReadPacketSize)
-	ctx.reqBuf.Reset()
-	ctx.reqBuf.B = append(ctx.reqBuf.B[:0], ctx.tailBuf...)
-	n, err := io.CopyBuffer(&ctx.reqBuf, lr, ctx.copyBuf[:])
-	if err != nil {
-		if ne, ok := err.(net.Error); ok && ne.Timeout() {
+	ctx.reqBuf, ctx.tailBuf, ctx.err = common.ReadLinesBlock(r, ctx.reqBuf, ctx.tailBuf)
+	if ctx.err != nil {
+		if ne, ok := ctx.err.(net.Error); ok && ne.Timeout() {
 			// Flush the read data on timeout and try reading again.
+			ctx.err = nil
 		} else {
-			opentsdbReadErrors.Inc()
-			ctx.err = fmt.Errorf("cannot read OpenTSDB put protocol data: %s", err)
+			if ctx.err != io.EOF {
+				opentsdbReadErrors.Inc()
+				ctx.err = fmt.Errorf("cannot read OpenTSDB put protocol data: %s", ctx.err)
+			}
 			return false
 		}
-	} else if n < maxReadPacketSize {
-		// Mark the end of stream.
-		ctx.err = io.EOF
 	}
+	ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))

-	// Parse all the rows until the last newline in ctx.reqBuf.B
-	nn := bytes.LastIndexByte(ctx.reqBuf.B, '\n')
-	ctx.tailBuf = ctx.tailBuf[:0]
-	if nn >= 0 {
-		ctx.tailBuf = append(ctx.tailBuf[:0], ctx.reqBuf.B[nn+1:]...)
-		ctx.reqBuf.B = ctx.reqBuf.B[:nn]
-	}
-	if err = ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf.B)); err != nil {
-		opentsdbUnmarshalErrors.Inc()
-		ctx.err = fmt.Errorf("cannot unmarshal OpenTSDB put protocol data with size %d: %s", len(ctx.reqBuf.B), err)
-		return false
+	// Fill in missing timestamps
+	currentTimestamp := time.Now().Unix()
+	rows := ctx.Rows.Rows
+	for i := range rows {
+		r := &rows[i]
+		if r.Timestamp == 0 {
+			r.Timestamp = currentTimestamp
+		}
 	}

 	// Convert timestamps from seconds to milliseconds
-	for i := range ctx.Rows.Rows {
-		ctx.Rows.Rows[i].Timestamp *= 1e3
+	for i := range rows {
+		rows[i].Timestamp *= 1e3
 	}
 	return true
 }

 type pushCtx struct {
 	Rows   Rows
-	Common common.InsertCtx
+	Common netstorage.InsertCtx

-	reqBuf  bytesutil.ByteBuffer
+	reqBuf  []byte
 	tailBuf []byte
-	copyBuf [16 * 1024]byte

 	err error
 }
@@ -128,17 +141,16 @@ func (ctx *pushCtx) Error() error {

 func (ctx *pushCtx) reset() {
 	ctx.Rows.Reset()
-	ctx.Common.Reset(0)
-	ctx.reqBuf.Reset()
+	ctx.Common.Reset()
+	ctx.reqBuf = ctx.reqBuf[:0]
 	ctx.tailBuf = ctx.tailBuf[:0]

 	ctx.err = nil
 }

 var (
-	opentsdbReadCalls       = metrics.NewCounter(`vm_read_calls_total{name="opentsdb"}`)
-	opentsdbReadErrors      = metrics.NewCounter(`vm_read_errors_total{name="opentsdb"}`)
-	opentsdbUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="opentsdb"}`)
+	opentsdbReadCalls  = metrics.NewCounter(`vm_read_calls_total{name="opentsdb"}`)
+	opentsdbReadErrors = metrics.NewCounter(`vm_read_errors_total{name="opentsdb"}`)
 )

 func getPushCtx() *pushCtx {
--- a/app/vminsert/opentsdb/server.go
+++ b/app/vminsert/opentsdb/server.go
@@ -7,8 +7,10 @@ import (
 	"sync"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
 	"github.com/VictoriaMetrics/metrics"
 )

@@ -23,7 +25,7 @@ var (
 // Serve starts OpenTSDB collector on the given addr.
 func Serve(addr string) {
 	logger.Infof("starting TCP OpenTSDB collector at %q", addr)
-	lnTCP, err := net.Listen("tcp4", addr)
+	lnTCP, err := netutil.NewTCPListener("opentsdb", addr)
 	if err != nil {
 		logger.Fatalf("cannot start TCP OpenTSDB collector at %q: %s", addr, err)
 	}
@@ -70,7 +72,8 @@ func serveTCP(ln net.Listener) {
 		}
 		go func() {
 			writeRequestsTCP.Inc()
-			if err := insertHandler(c); err != nil {
+			var at auth.Token // TODO: properly initialize the auth token
+			if err := insertHandler(&at, c); err != nil {
 				writeErrorsTCP.Inc()
 				logger.Errorf("error in TCP OpenTSDB conn %q<->%q: %s", c.LocalAddr(), c.RemoteAddr(), err)
 			}
@@ -88,6 +91,7 @@ func serveUDP(ln net.PacketConn) {
 			defer wg.Done()
 			var bb bytesutil.ByteBuffer
 			bb.B = bytesutil.Resize(bb.B, 64*1024)
+			var at auth.Token // TODO: properly initialize the auth token
 			for {
 				bb.Reset()
 				bb.B = bb.B[:cap(bb.B)]
@@ -108,7 +112,7 @@ func serveUDP(ln net.PacketConn) {
 				}
 				bb.B = bb.B[:n]
 				writeRequestsUDP.Inc()
-				if err := insertHandler(bb.NewReader()); err != nil {
+				if err := insertHandler(&at, bb.NewReader()); err != nil {
 					writeErrorsUDP.Inc()
 					logger.Errorf("error in UDP OpenTSDB conn %q<->%q: %s", ln.LocalAddr(), addr, err)
 					continue
--- a/app/vminsert/opentsdbhttp/parser.go
+++ b/app/vminsert/opentsdbhttp/parser.go
@@ -0,0 +1,198 @@
+package opentsdbhttp
+
+import (
+	"fmt"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/fastjson"
+	"github.com/valyala/fastjson/fastfloat"
+)
+
+// Rows contains parsed OpenTSDB rows.
+type Rows struct {
+	Rows []Row
+
+	tagsPool []Tag
+}
+
+// Reset resets rs.
+func (rs *Rows) Reset() {
+	// Release references to objects, so they can be GC'ed.
+	for i := range rs.Rows {
+		rs.Rows[i].reset()
+	}
+	rs.Rows = rs.Rows[:0]
+
+	for i := range rs.tagsPool {
+		rs.tagsPool[i].reset()
+	}
+	rs.tagsPool = rs.tagsPool[:0]
+}
+
+// Unmarshal unmarshals OpenTSDB rows from av.
+//
+// See http://opentsdb.net/docs/build/html/api_http/put.html
+//
+// s must be unchanged until rs is in use.
+func (rs *Rows) Unmarshal(av *fastjson.Value) {
+	rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], av, rs.tagsPool[:0])
+}
+
+// Row is a single OpenTSDB row.
+type Row struct {
+	Metric    string
+	Tags      []Tag
+	Value     float64
+	Timestamp int64
+}
+
+func (r *Row) reset() {
+	r.Metric = ""
+	r.Tags = nil
+	r.Value = 0
+	r.Timestamp = 0
+}
+
+func (r *Row) unmarshal(o *fastjson.Value, tagsPool []Tag) ([]Tag, error) {
+	r.reset()
+	m := o.GetStringBytes("metric")
+	if len(m) == 0 {
+		return tagsPool, fmt.Errorf("missing `metric` in %s", o)
+	}
+	r.Metric = bytesutil.ToUnsafeString(m)
+
+	rawTs := o.Get("timestamp")
+	if rawTs != nil {
+		ts, err := getFloat64(rawTs)
+		if err != nil {
+			return tagsPool, fmt.Errorf("invalid `timestamp` in %s: %s", o, err)
+		}
+		r.Timestamp = int64(ts)
+	} else {
+		// Allow missing timestamp. It is automatically populated
+		// with the current time in this case.
+		r.Timestamp = 0
+	}
+
+	rawV := o.Get("value")
+	if rawV == nil {
+		return tagsPool, fmt.Errorf("missing `value` in %s", o)
+	}
+	v, err := getFloat64(rawV)
+	if err != nil {
+		return tagsPool, fmt.Errorf("invalid `value` in %s: %s", o, err)
+	}
+	r.Value = v
+
+	vt := o.Get("tags")
+	if vt == nil {
+		// Allow empty tags.
+		return tagsPool, nil
+	}
+	rawTags, err := vt.Object()
+	if err != nil {
+		return tagsPool, fmt.Errorf("invalid `tags` in %s: %s", o, err)
+	}
+
+	tagsStart := len(tagsPool)
+	tagsPool, err = unmarshalTags(tagsPool, rawTags)
+	if err != nil {
+		return tagsPool, fmt.Errorf("cannot parse tags %s: %s", rawTags, err)
+	}
+	tags := tagsPool[tagsStart:]
+	r.Tags = tags[:len(tags):len(tags)]
+	return tagsPool, nil
+}
+
+func getFloat64(v *fastjson.Value) (float64, error) {
+	switch v.Type() {
+	case fastjson.TypeNumber:
+		return v.Float64()
+	case fastjson.TypeString:
+		vStr, _ := v.StringBytes()
+		vFloat := fastfloat.ParseBestEffort(bytesutil.ToUnsafeString(vStr))
+		if vFloat == 0 && string(vStr) != "0" && string(vStr) != "0.0" {
+			return 0, fmt.Errorf("invalid float64 value: %q", vStr)
+		}
+		return vFloat, nil
+	default:
+		return 0, fmt.Errorf("value doesn't contain float64; it contains %s", v.Type())
+	}
+}
+
+func unmarshalRows(dst []Row, av *fastjson.Value, tagsPool []Tag) ([]Row, []Tag) {
+	switch av.Type() {
+	case fastjson.TypeObject:
+		return unmarshalRow(dst, av, tagsPool)
+	case fastjson.TypeArray:
+		a, _ := av.Array()
+		for _, o := range a {
+			dst, tagsPool = unmarshalRow(dst, o, tagsPool)
+		}
+		return dst, tagsPool
+	default:
+		logger.Errorf("OpenTSDB JSON must be either object or array; got %s; body=%s", av.Type(), av)
+		invalidLines.Inc()
+		return dst, tagsPool
+	}
+}
+
+func unmarshalRow(dst []Row, o *fastjson.Value, tagsPool []Tag) ([]Row, []Tag) {
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	var err error
+	tagsPool, err = r.unmarshal(o, tagsPool)
+	if err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal OpenTSDB object %s: %s", o, err)
+		invalidLines.Inc()
+	}
+	return dst, tagsPool
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="opentsdb-http"}`)
+
+func unmarshalTags(dst []Tag, o *fastjson.Object) ([]Tag, error) {
+	var err error
+	o.Visit(func(k []byte, v *fastjson.Value) {
+		if v.Type() != fastjson.TypeString {
+			err = fmt.Errorf("tag value must be string; got %s; value=%s", v.Type(), v)
+			return
+		}
+		if len(k) == 0 {
+			// Skip empty tags
+			return
+		}
+		vStr, _ := v.StringBytes()
+		if len(vStr) == 0 {
+			// Skip empty tags
+			return
+		}
+		if cap(dst) > len(dst) {
+			dst = dst[:len(dst)+1]
+		} else {
+			dst = append(dst, Tag{})
+		}
+		tag := &dst[len(dst)-1]
+		tag.Key = bytesutil.ToUnsafeString(k)
+		tag.Value = bytesutil.ToUnsafeString(vStr)
+	})
+	return dst, err
+}
+
+// Tag is an OpenTSDB tag.
+type Tag struct {
+	Key   string
+	Value string
+}
+
+func (t *Tag) reset() {
+	t.Key = ""
+	t.Value = ""
+}
--- a/app/vminsert/opentsdbhttp/parser_test.go
+++ b/app/vminsert/opentsdbhttp/parser_test.go
@@ -0,0 +1,246 @@
+package opentsdbhttp
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestRowsUnmarshalFailure(t *testing.T) {
+	f := func(s string) {
+		t.Helper()
+		var rows Rows
+		p := parserPool.Get()
+		defer parserPool.Put(p)
+		v, err := p.Parse(s)
+		if err != nil {
+			// Expected JSON parser error
+			return
+		}
+		// Verify OpenTSDB body parsing error
+		rows.Unmarshal(v)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
+		}
+		// Try again
+		rows.Unmarshal(v)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
+		}
+	}
+
+	// invalid json
+	f("{g")
+
+	// Invalid json type
+	f(`1`)
+	f(`"foo"`)
+	f(`[1,2]`)
+	f(`null`)
+
+	// Incomplete object
+	f(`{}`)
+	f(`{"metric": "aaa"}`)
+	f(`{"metric": "aaa", "timestamp": 1122}`)
+	f(`{"metric": "aaa", "timestamp": "tststs"}`)
+	f(`{"timestamp": 1122, "value": 33}`)
+	f(`{"value": 33}`)
+	f(`{"value": 33, "tags": {"fooo":"bar"}}`)
+
+	// Invalid value
+	f(`{"metric": "aaa", "timestamp": 1122, "value": "0.0.0"}`)
+
+	// Invalid metric type
+	f(`{"metric": "", "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": ["aaa"], "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": {"aaa":1}, "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": 1, "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
+
+	// Invalid timestamp type
+	f(`{"metric": "aaa", "timestamp": "foobar", "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": "aaa", "timestamp": [1,2], "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": "aaa", "timestamp": {"a":1}, "value": 0.45, "tags": {"foo": "bar"}}`)
+
+	// Invalid value type
+	f(`{"metric": "aaa", "timestamp": 1122, "value": [0,1], "tags": {"foo":"bar"}}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": {"a":1}, "tags": {"foo":"bar"}}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": "foobar", "tags": {"foo":"bar"}}`)
+
+	// Invalid tags type
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": 1}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": [1,2]}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": "foo"}`)
+
+	// Invalid tag value type
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": {"foo": ["bar"]}}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": {"foo": {"bar":"baz"}}}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": {"foo": 1}}`)
+
+	// Invalid multiline
+	f(`[{"metric": "aaa", "timestamp": 1122, "value": "trt", "tags":{"foo":"bar"}}, {"metric": "aaa", "timestamp": [1122], "value": 111}]`)
+}
+
+func TestRowsUnmarshalSuccess(t *testing.T) {
+	f := func(s string, rowsExpected *Rows) {
+		t.Helper()
+		var rows Rows
+
+		p := parserPool.Get()
+		defer parserPool.Put(p)
+		v, err := p.Parse(s)
+		if err != nil {
+			t.Fatalf("cannot parse json %s: %s", s, err)
+		}
+		rows.Unmarshal(v)
+		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
+			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
+		}
+
+		// Try unmarshaling again
+		rows.Unmarshal(v)
+		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
+			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
+		}
+
+		rows.Reset()
+		if len(rows.Rows) != 0 {
+			t.Fatalf("non-empty rows after reset: %+v", rows.Rows)
+		}
+	}
+
+	// Normal line
+	f(`{"metric": "foobar", "timestamp": 789, "value": -123.456, "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 789,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+	// Timestamp as string
+	f(`{"metric": "foobar", "timestamp": "1789", "value": -123.456, "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 1789,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+	// Timestamp as float64 (it is truncated to integer)
+	f(`{"metric": "foobar", "timestamp": 17.89, "value": -123.456, "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 17,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+	// Empty tags
+	f(`{"metric": "foobar", "timestamp": 789, "value": -123.456, "tags": {}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 789,
+			Tags:      nil,
+		}},
+	})
+	// Missing tags
+	f(`{"metric": "foobar", "timestamp": 789, "value": -123.456}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 789,
+			Tags:      nil,
+		}},
+	})
+	// Empty tag value
+	f(`{"metric": "foobar", "timestamp": 123, "value": -123.456, "tags": {"a":"", "b":"c", "": "d"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 123,
+			Tags: []Tag{
+				{
+					Key:   "b",
+					Value: "c",
+				},
+			},
+		}},
+	})
+	// Value as string
+	f(`{"metric": "foobar", "timestamp": 789, "value": "-12.456", "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -12.456,
+			Timestamp: 789,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+	// Missing timestamp
+	f(`{"metric": "foobar", "value": "-12.456", "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -12.456,
+			Timestamp: 0,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+
+	// Multiple tags
+	f(`{"metric": "foo", "value": 1, "timestamp": 2, "tags": {"bar":"baz", "x": "y"}}`, &Rows{
+		Rows: []Row{{
+			Metric: "foo",
+			Tags: []Tag{
+				{
+					Key:   "bar",
+					Value: "baz",
+				},
+				{
+					Key:   "x",
+					Value: "y",
+				},
+			},
+			Value:     1,
+			Timestamp: 2,
+		}},
+	})
+
+	// Multi lines
+	f(`[{"metric": "foo", "value": "0.3", "timestamp": 2, "tags": {"a":"b"}},
+{"metric": "bar.baz", "value": 0.34, "timestamp": 43, "tags": {"a":"b"}}]`, &Rows{
+		Rows: []Row{
+			{
+				Metric:    "foo",
+				Value:     0.3,
+				Timestamp: 2,
+				Tags: []Tag{{
+					Key:   "a",
+					Value: "b",
+				}},
+			},
+			{
+				Metric:    "bar.baz",
+				Value:     0.34,
+				Timestamp: 43,
+				Tags: []Tag{{
+					Key:   "a",
+					Value: "b",
+				}},
+			},
+		},
+	})
+}
--- a/app/vminsert/opentsdbhttp/parser_timing_test.go
+++ b/app/vminsert/opentsdbhttp/parser_timing_test.go
@@ -0,0 +1,33 @@
+package opentsdbhttp
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/valyala/fastjson"
+)
+
+func BenchmarkRowsUnmarshal(b *testing.B) {
+	s := `[{"metric": "cpu.usage_user", "timestamp": 1234556768, "value": 1.23, "tags": {"a":"b", "x": "y"}},
+{"metric": "cpu.usage_system", "timestamp": 1234556768, "value": 23.344, "tags": {"a":"b"}},
+{"metric": "cpu.usage_iowait", "timestamp": 1234556769, "value":3.3443, "tags": {"a":"b"}},
+{"metric": "cpu.usage_irq", "timestamp": 1234556768, "value": 0.34432, "tags": {"a":"b"}}
+]
+`
+	b.SetBytes(int64(len(s)))
+	b.ReportAllocs()
+	b.RunParallel(func(pb *testing.PB) {
+		var rows Rows
+		var p fastjson.Parser
+		for pb.Next() {
+			v, err := p.Parse(s)
+			if err != nil {
+				panic(fmt.Errorf("cannot parse %q: %s", s, err))
+			}
+			rows.Unmarshal(v)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of rows unmarshaled; got %d; want 4", len(rows.Rows)))
+			}
+		}
+	})
+}
--- a/app/vminsert/opentsdbhttp/request_handler.go
+++ b/app/vminsert/opentsdbhttp/request_handler.go
@@ -0,0 +1,155 @@
+package opentsdbhttp
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"runtime"
+	"sync"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
+	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/fastjson"
+)
+
+var (
+	rowsInserted  = tenantmetrics.NewCounterMap(`vm_rows_inserted_total{type="opentsdb-http"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="opentsdb-http"}`)
+
+	opentsdbReadCalls       = metrics.NewCounter(`vm_read_calls_total{name="opentsdb-http"}`)
+	opentsdbReadErrors      = metrics.NewCounter(`vm_read_errors_total{name="opentsdb-http"}`)
+	opentsdbUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="opentsdb-http"}`)
+)
+
+// insertHandler processes HTTP OpenTSDB put requests.
+// See http://opentsdb.net/docs/build/html/api_http/put.html
+func insertHandler(at *auth.Token, req *http.Request, maxSize int64) error {
+	return concurrencylimiter.Do(func() error {
+		return insertHandlerInternal(at, req, maxSize)
+	})
+}
+
+func insertHandlerInternal(at *auth.Token, req *http.Request, maxSize int64) error {
+	opentsdbReadCalls.Inc()
+
+	r := req.Body
+	if req.Header.Get("Content-Encoding") == "gzip" {
+		zr, err := common.GetGzipReader(r)
+		if err != nil {
+			opentsdbReadErrors.Inc()
+			return fmt.Errorf("cannot read gzipped http protocol data: %s", err)
+		}
+		defer common.PutGzipReader(zr)
+		r = zr
+	}
+
+	ctx := getPushCtx()
+	defer putPushCtx(ctx)
+
+	// Read the request in ctx.reqBuf
+	lr := io.LimitReader(r, maxSize+1)
+	reqLen, err := ctx.reqBuf.ReadFrom(lr)
+	if err != nil {
+		opentsdbReadErrors.Inc()
+		return fmt.Errorf("cannot read HTTP OpenTSDB request: %s", err)
+	}
+	if reqLen > maxSize {
+		opentsdbReadErrors.Inc()
+		return fmt.Errorf("too big HTTP OpenTSDB request; mustn't exceed %d bytes", maxSize)
+	}
+
+	// Unmarshal the request to ctx.Rows
+	p := parserPool.Get()
+	defer parserPool.Put(p)
+	v, err := p.ParseBytes(ctx.reqBuf.B)
+	if err != nil {
+		opentsdbUnmarshalErrors.Inc()
+		return fmt.Errorf("cannot parse HTTP OpenTSDB json: %s", err)
+	}
+	ctx.Rows.Unmarshal(v)
+
+	// Fill in missing timestamps
+	currentTimestamp := time.Now().Unix()
+	rows := ctx.Rows.Rows
+	for i := range rows {
+		r := &rows[i]
+		if r.Timestamp == 0 {
+			r.Timestamp = currentTimestamp
+		}
+	}
+
+	// Convert timestamps in seconds to milliseconds if needed.
+	// See http://opentsdb.net/docs/javadoc/net/opentsdb/core/Const.html#SECOND_MASK
+	for i := range rows {
+		r := &rows[i]
+		if r.Timestamp&secondMask == 0 {
+			r.Timestamp *= 1e3
+		}
+	}
+
+	// Insert ctx.Rows to db.
+	ic := &ctx.Common
+	ic.Reset()
+	for i := range rows {
+		r := &rows[i]
+		ic.Labels = ic.Labels[:0]
+		ic.AddLabel("", r.Metric)
+		for j := range r.Tags {
+			tag := &r.Tags[j]
+			ic.AddLabel(tag.Key, tag.Value)
+		}
+		if err := ic.WriteDataPoint(at, ic.Labels, r.Timestamp, r.Value); err != nil {
+			return err
+		}
+	}
+	rowsInserted.Get(at).Add(len(rows))
+	rowsPerInsert.Update(float64(len(rows)))
+	return ic.FlushBufs()
+}
+
+const secondMask int64 = 0x7FFFFFFF00000000
+
+var parserPool fastjson.ParserPool
+
+type pushCtx struct {
+	Rows   Rows
+	Common netstorage.InsertCtx
+
+	reqBuf bytesutil.ByteBuffer
+}
+
+func (ctx *pushCtx) reset() {
+	ctx.Rows.Reset()
+	ctx.Common.Reset()
+	ctx.reqBuf.Reset()
+}
+
+func getPushCtx() *pushCtx {
+	select {
+	case ctx := <-pushCtxPoolCh:
+		return ctx
+	default:
+		if v := pushCtxPool.Get(); v != nil {
+			return v.(*pushCtx)
+		}
+		return &pushCtx{}
+	}
+}
+
+func putPushCtx(ctx *pushCtx) {
+	ctx.reset()
+	select {
+	case pushCtxPoolCh <- ctx:
+	default:
+		pushCtxPool.Put(ctx)
+	}
+}
+
+var pushCtxPool sync.Pool
+var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))
--- a/app/vminsert/opentsdbhttp/server.go
+++ b/app/vminsert/opentsdbhttp/server.go
@@ -0,0 +1,87 @@
+package opentsdbhttp
+
+import (
+	"context"
+	"net/http"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
+)
+
+var (
+	writeRequests = metrics.NewCounter(`vm_http_requests_total{path="/insert/{}/api/put", protocol="opentsdb-http"}`)
+	writeErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/insert/{}/api/put", protocol="opentsdb-http"}`)
+)
+
+var (
+	httpServer     *http.Server
+	httpAddr       string
+	maxRequestSize int64
+)
+
+// Serve starts HTTP OpenTSDB server on the given addr.
+func Serve(addr string, maxReqSize int64) {
+	logger.Infof("starting HTTP OpenTSDB server at %q", addr)
+	httpAddr = addr
+	maxRequestSize = maxReqSize
+	httpServer = &http.Server{
+		Addr:         addr,
+		Handler:      http.HandlerFunc(requestHandler),
+		ReadTimeout:  30 * time.Second,
+		WriteTimeout: 10 * time.Second,
+	}
+	go func() {
+		err := httpServer.ListenAndServe()
+		if err == http.ErrServerClosed {
+			return
+		}
+		if err != nil {
+			logger.Fatalf("FATAL: error serving HTTP OpenTSDB: %s", err)
+		}
+	}()
+}
+
+// requestHandler handles HTTP OpenTSDB insert request.
+func requestHandler(w http.ResponseWriter, r *http.Request) {
+	p, err := httpserver.ParsePath(r.URL.Path)
+	if err != nil {
+		httpserver.Errorf(w, "cannot parse path %q: %s", r.URL.Path, err)
+		return
+	}
+	if p.Prefix != "insert" {
+		// This is not our link.
+		httpserver.Errorf(w, "unexpected path requested on HTTP OpenTSDB server: %q", r.URL.Path)
+		return
+	}
+	at, err := auth.NewToken(p.AuthToken)
+	if err != nil {
+		httpserver.Errorf(w, "auth error: %s", err)
+		return
+	}
+
+	switch p.Suffix {
+	case "api/put":
+		writeRequests.Inc()
+		if err := insertHandler(at, r, maxRequestSize); err != nil {
+			writeErrors.Inc()
+			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
+			return
+		}
+		w.WriteHeader(http.StatusNoContent)
+	default:
+		httpserver.Errorf(w, "unexpected path requested on HTTP OpenTSDB server: %q", r.URL.Path)
+	}
+}
+
+// Stop stops HTTP OpenTSDB server.
+func Stop() {
+	logger.Infof("stopping HTTP OpenTSDB server at %q...", httpAddr)
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+	if err := httpServer.Shutdown(ctx); err != nil {
+		logger.Fatalf("FATAL: cannot close HTTP OpenTSDB server: %s", err)
+	}
+}
--- a/app/vminsert/prometheus/request_handler.go
+++ b/app/vminsert/prometheus/request_handler.go
@@ -6,55 +6,67 @@ import (
 	"runtime"
 	"sync"

-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
 	"github.com/VictoriaMetrics/metrics"
 )

-var rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="prometheus"}`)
+var (
+	rowsInserted  = tenantmetrics.NewCounterMap(`vm_rows_inserted_total{type="prometheus"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="prometheus"}`)
+)

 // InsertHandler processes remote write for prometheus.
-func InsertHandler(r *http.Request, maxSize int64) error {
+func InsertHandler(at *auth.Token, r *http.Request, maxSize int64) error {
 	return concurrencylimiter.Do(func() error {
-		return insertHandlerInternal(r, maxSize)
+		return insertHandlerInternal(at, r, maxSize)
 	})
 }

-func insertHandlerInternal(r *http.Request, maxSize int64) error {
+func insertHandlerInternal(at *auth.Token, r *http.Request, maxSize int64) error {
 	ctx := getPushCtx()
 	defer putPushCtx(ctx)
 	if err := ctx.Read(r, maxSize); err != nil {
 		return err
 	}
-	timeseries := ctx.req.Timeseries
-	rowsLen := 0
-	for i := range timeseries {
-		rowsLen += len(timeseries[i].Samples)
-	}
+
 	ic := &ctx.Common
-	ic.Reset(rowsLen)
+	ic.Reset()
+	timeseries := ctx.req.Timeseries
+	rowsTotal := 0
 	for i := range timeseries {
 		ts := &timeseries[i]
-		var metricNameRaw []byte
+		storageNodeIdx := ic.GetStorageNodeIdx(at, ts.Labels)
+		ic.MetricNameBuf = ic.MetricNameBuf[:0]
 		for i := range ts.Samples {
 			r := &ts.Samples[i]
-			metricNameRaw = ic.WriteDataPointExt(metricNameRaw, ts.Labels, r.Timestamp, r.Value)
+			if len(ic.MetricNameBuf) == 0 {
+				ic.MetricNameBuf = storage.MarshalMetricNameRaw(ic.MetricNameBuf[:0], at.AccountID, at.ProjectID, ts.Labels)
+			}
+			if err := ic.WriteDataPointExt(at, storageNodeIdx, ic.MetricNameBuf, r.Timestamp, r.Value); err != nil {
+				return err
+			}
 		}
-		rowsInserted.Add(len(ts.Samples))
+		rowsTotal += len(ts.Samples)
 	}
+	rowsInserted.Get(at).Add(rowsTotal)
+	rowsPerInsert.Update(float64(rowsTotal))
 	return ic.FlushBufs()
 }

 type pushCtx struct {
-	Common common.InsertCtx
+	Common netstorage.InsertCtx

 	req    prompb.WriteRequest
 	reqBuf []byte
 }

 func (ctx *pushCtx) reset() {
-	ctx.Common.Reset(0)
+	ctx.Common.Reset()
 	ctx.req.Reset()
 	ctx.reqBuf = ctx.reqBuf[:0]
 }
--- a/app/vmselect/Makefile
+++ b/app/vmselect/Makefile
@@ -0,0 +1,38 @@
+# All these commands must run from repository root.
+
+run-vmselect:
+	mkdir -p vmselect-cache
+	DOCKER_OPTS='-v $(shell pwd)/vmselect-cache:/cache' \
+	APP_NAME=vmselect \
+	ARGS='-storageNode=localhost:8401 -selectNode=localhost:8481 -cacheDataPath=/cache' \
+	$(MAKE) run-via-docker
+
+vmselect:
+	APP_NAME=vmselect $(MAKE) app-local
+
+vmselect-race:
+	APP_NAME=vmselect RACE=-race $(MAKE) app-local
+
+vmselect-prod:
+	APP_NAME=vmselect $(MAKE) app-via-docker
+
+vmselect-prod-race:
+	APP_NAME=vmselect RACE=-race $(MAKE) app-via-docker
+
+vmselect-pure:
+	APP_NAME=vmselect $(MAKE) app-local-pure
+
+vmselect-pure-prod:
+	APP_NAME=vmselect APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
+
+package-vmselect:
+	APP_NAME=vmselect $(MAKE) package-via-docker
+
+package-vmselect-race:
+	APP_NAME=vmselect RACE=-race $(MAKE) package-via-docker
+
+publish-vmselect:
+	APP_NAME=vmselect $(MAKE) publish-via-docker
+
+publish-vmselect-race:
+	APP_NAME=vmselect RACE=-race $(MAKE) publish-via-docker
--- a/app/vmselect/README.md
+++ b/app/vmselect/README.md
@@ -1,2 +1,6 @@
-`vmselect` performs the incoming queries and fetches the required data
-from `vmstorage`.
+`vmselect` performs the following tasks:
+
+- Splits incoming selects to tasks for `vmstorage` nodes and issues these tasks
+  to all the `vmstorage` nodes in the cluster.
+
+- Merges responses from all the `vmstorage` nodes and returns a single response.
--- a/app/vmselect/deployment/Dockerfile
+++ b/app/vmselect/deployment/Dockerfile
@@ -0,0 +1,5 @@
+FROM scratch
+COPY --from=local/certs:1.0.2 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+COPY bin/vmselect-prod .
+EXPOSE 8481
+ENTRYPOINT ["/vmselect-prod"]
--- a/app/vmselect/main.go
+++ b/app/vmselect/main.go
@@ -1,7 +1,8 @@
-package vmselect
+package main

 import (
 	"flag"
+	"fmt"
 	"net/http"
 	"runtime"
 	"strings"
@@ -10,57 +11,150 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/prometheus"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
 	"github.com/VictoriaMetrics/metrics"
 )

 var (
-	deleteAuthKey         = flag.String("deleteAuthKey", "", "authKey for metrics' deletion via /api/v1/admin/tsdb/delete_series")
+	httpListenAddr        = flag.String("httpListenAddr", ":8481", "Address to listen for http connections")
+	cacheDataPath         = flag.String("cacheDataPath", "", "Path to directory for cache files. Cache isn't saved if empty")
 	maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", runtime.GOMAXPROCS(-1)*2, "The maximum number of concurrent search requests. It shouldn't exceed 2*vCPUs for better performance. See also -search.maxQueueDuration")
 	maxQueueDuration      = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached")
+	storageNodes          = flagutil.NewArray("storageNode", "Addresses of vmstorage nodes; usage: -storageNode=vmstorage-host1:8401 -storageNode=vmstorage-host2:8401")
 )

-// Init initializes vmselect
-func Init() {
-	tmpDirPath := *vmstorage.DataPath + "/tmp"
-	fs.RemoveDirContents(tmpDirPath)
-	netstorage.InitTmpBlocksDir(tmpDirPath)
-	promql.InitRollupResultCache(*vmstorage.DataPath + "/cache/rollupResult")
+func main() {
+	flag.Parse()
+	buildinfo.Init()
+	logger.Init()
+
+	logger.Infof("starting netstorage at storageNodes %s", *storageNodes)
+	startTime := time.Now()
+	if len(*storageNodes) == 0 {
+		logger.Fatalf("missing -storageNode arg")
+	}
+	netstorage.InitStorageNodes(*storageNodes)
+	logger.Infof("started netstorage in %s", time.Since(startTime))
+
+	if len(*cacheDataPath) > 0 {
+		tmpDataPath := *cacheDataPath + "/tmp"
+		fs.RemoveDirContents(tmpDataPath)
+		netstorage.InitTmpBlocksDir(tmpDataPath)
+		promql.InitRollupResultCache(*cacheDataPath + "/rollupResult")
+	} else {
+		netstorage.InitTmpBlocksDir("")
+		promql.InitRollupResultCache("")
+	}
 	concurrencyCh = make(chan struct{}, *maxConcurrentRequests)
+
+	go func() {
+		httpserver.Serve(*httpListenAddr, requestHandler)
+	}()
+
+	sig := procutil.WaitForSigterm()
+	logger.Infof("service received signal %s", sig)
+
+	logger.Infof("gracefully shutting down the service at %q", *httpListenAddr)
+	startTime = time.Now()
+	if err := httpserver.Stop(*httpListenAddr); err != nil {
+		logger.Fatalf("cannot stop the service: %s", err)
+	}
+	logger.Infof("successfully shut down the service in %s", time.Since(startTime))
+
+	logger.Infof("shutting down neststorage...")
+	startTime = time.Now()
+	netstorage.Stop()
+	if len(*cacheDataPath) > 0 {
+		promql.StopRollupResultCache()
+	}
+	logger.Infof("successfully stopped netstorage in %s", time.Since(startTime))
+
+	fs.MustStopDirRemover()
+
+	logger.Infof("the vmselect has been stopped")
 }

 var concurrencyCh chan struct{}

-// Stop stops vmselect
-func Stop() {
-	promql.StopRollupResultCache()
-}
+var (
+	concurrencyLimitReached = metrics.NewCounter(`vm_concurrent_select_limit_reached_total`)
+	concurrencyLimitTimeout = metrics.NewCounter(`vm_concurrent_select_limit_timeout_total`)

-// RequestHandler handles remote read API requests for Prometheus
-func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
+	_ = metrics.NewGauge(`vm_concurrent_select_capacity`, func() float64 {
+		return float64(cap(concurrencyCh))
+	})
+	_ = metrics.NewGauge(`vm_concurrent_select_current`, func() float64 {
+		return float64(len(concurrencyCh))
+	})
+)
+
+func requestHandler(w http.ResponseWriter, r *http.Request) bool {
 	// Limit the number of concurrent queries.
-	// Sleep for a second until giving up. This should resolve short bursts in requests.
-	t := time.NewTimer(*maxQueueDuration)
 	select {
 	case concurrencyCh <- struct{}{}:
-		t.Stop()
 		defer func() { <-concurrencyCh }()
-	case <-t.C:
-		httpserver.Errorf(w, "cannot handle more than %d concurrent requests", cap(concurrencyCh))
+	default:
+		// Sleep for a while until giving up. This should resolve short bursts in requests.
+		concurrencyLimitReached.Inc()
+		t := timerpool.Get(*maxQueueDuration)
+		select {
+		case concurrencyCh <- struct{}{}:
+			timerpool.Put(t)
+			defer func() { <-concurrencyCh }()
+		case <-t.C:
+			timerpool.Put(t)
+			concurrencyLimitTimeout.Inc()
+			err := &httpserver.ErrorWithStatusCode{
+				Err:        fmt.Errorf("cannot handle more than %d concurrent requests", cap(concurrencyCh)),
+				StatusCode: http.StatusServiceUnavailable,
+			}
+			httpserver.Errorf(w, "%s", err)
+			return true
+		}
+	}
+
+	path := r.URL.Path
+	if path == "/internal/resetRollupResultCache" {
+		promql.ResetRollupResultCache()
 		return true
 	}

-	path := strings.Replace(r.URL.Path, "//", "/", -1)
-	if strings.HasPrefix(path, "/api/v1/label/") {
-		s := r.URL.Path[len("/api/v1/label/"):]
+	p, err := httpserver.ParsePath(path)
+	if err != nil {
+		httpserver.Errorf(w, "cannot parse path %q: %s", path, err)
+		return true
+	}
+	at, err := auth.NewToken(p.AuthToken)
+	if err != nil {
+		httpserver.Errorf(w, "auth error: %s", err)
+		return true
+	}
+	switch p.Prefix {
+	case "select":
+		return selectHandler(w, r, p, at)
+	case "delete":
+		return deleteHandler(w, r, p, at)
+	default:
+		// This is not our link
+		return false
+	}
+}
+
+func selectHandler(w http.ResponseWriter, r *http.Request, p *httpserver.Path, at *auth.Token) bool {
+	if strings.HasPrefix(p.Suffix, "prometheus/api/v1/label/") {
+		s := p.Suffix[len("prometheus/api/v1/label/"):]
 		if strings.HasSuffix(s, "/values") {
 			labelValuesRequests.Inc()
 			labelName := s[:len(s)-len("/values")]
 			httpserver.EnableCORS(w, r)
-			if err := prometheus.LabelValuesHandler(labelName, w, r); err != nil {
+			if err := prometheus.LabelValuesHandler(at, labelName, w, r); err != nil {
 				labelValuesErrors.Inc()
 				sendPrometheusError(w, r, err)
 				return true
@@ -69,76 +163,87 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 		}
 	}

-	switch path {
-	case "/api/v1/query":
+	switch p.Suffix {
+	case "prometheus/api/v1/query":
 		queryRequests.Inc()
 		httpserver.EnableCORS(w, r)
-		if err := prometheus.QueryHandler(w, r); err != nil {
+		if err := prometheus.QueryHandler(at, w, r); err != nil {
 			queryErrors.Inc()
 			sendPrometheusError(w, r, err)
 			return true
 		}
 		return true
-	case "/api/v1/query_range":
+	case "prometheus/api/v1/query_range":
 		queryRangeRequests.Inc()
 		httpserver.EnableCORS(w, r)
-		if err := prometheus.QueryRangeHandler(w, r); err != nil {
+		if err := prometheus.QueryRangeHandler(at, w, r); err != nil {
 			queryRangeErrors.Inc()
 			sendPrometheusError(w, r, err)
 			return true
 		}
 		return true
-	case "/api/v1/series":
+	case "prometheus/api/v1/series":
 		seriesRequests.Inc()
 		httpserver.EnableCORS(w, r)
-		if err := prometheus.SeriesHandler(w, r); err != nil {
+		if err := prometheus.SeriesHandler(at, w, r); err != nil {
 			seriesErrors.Inc()
 			sendPrometheusError(w, r, err)
 			return true
 		}
 		return true
-	case "/api/v1/series/count":
+	case "prometheus/api/v1/series/count":
 		seriesCountRequests.Inc()
 		httpserver.EnableCORS(w, r)
-		if err := prometheus.SeriesCountHandler(w, r); err != nil {
+		if err := prometheus.SeriesCountHandler(at, w, r); err != nil {
 			seriesCountErrors.Inc()
 			sendPrometheusError(w, r, err)
 			return true
 		}
 		return true
-	case "/api/v1/labels":
+	case "prometheus/api/v1/labels":
 		labelsRequests.Inc()
 		httpserver.EnableCORS(w, r)
-		if err := prometheus.LabelsHandler(w, r); err != nil {
+		if err := prometheus.LabelsHandler(at, w, r); err != nil {
 			labelsErrors.Inc()
 			sendPrometheusError(w, r, err)
 			return true
 		}
 		return true
-	case "/api/v1/export":
+	case "prometheus/api/v1/labels/count":
+		labelsCountRequests.Inc()
+		httpserver.EnableCORS(w, r)
+		if err := prometheus.LabelsCountHandler(at, w, r); err != nil {
+			labelsCountErrors.Inc()
+			sendPrometheusError(w, r, err)
+			return true
+		}
+		return true
+	case "prometheus/api/v1/export":
 		exportRequests.Inc()
-		if err := prometheus.ExportHandler(w, r); err != nil {
+		if err := prometheus.ExportHandler(at, w, r); err != nil {
 			exportErrors.Inc()
 			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
 			return true
 		}
 		return true
-	case "/federate":
+	case "prometheus/federate":
 		federateRequests.Inc()
-		if err := prometheus.FederateHandler(w, r); err != nil {
+		if err := prometheus.FederateHandler(at, w, r); err != nil {
 			federateErrors.Inc()
-			httpserver.Errorf(w, "error int %q: %s", r.URL.Path, err)
+			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
 			return true
 		}
 		return true
-	case "/api/v1/admin/tsdb/delete_series":
+	default:
+		return false
+	}
+}
+
+func deleteHandler(w http.ResponseWriter, r *http.Request, p *httpserver.Path, at *auth.Token) bool {
+	switch p.Suffix {
+	case "prometheus/api/v1/admin/tsdb/delete_series":
 		deleteRequests.Inc()
-		authKey := r.FormValue("authKey")
-		if authKey != *deleteAuthKey {
-			httpserver.Errorf(w, "invalid authKey %q. It must match the value from -deleteAuthKey command line flag", authKey)
-			return true
-		}
-		if err := prometheus.DeleteHandler(r); err != nil {
+		if err := prometheus.DeleteHandler(at, r); err != nil {
 			deleteErrors.Inc()
 			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
 			return true
@@ -154,36 +259,42 @@ func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
 	logger.Errorf("error in %q: %s", r.URL.Path, err)

 	w.Header().Set("Content-Type", "application/json")
-	statusCode := 422
+	statusCode := http.StatusUnprocessableEntity
+	if esc, ok := err.(*httpserver.ErrorWithStatusCode); ok {
+		statusCode = esc.StatusCode
+	}
 	w.WriteHeader(statusCode)
 	prometheus.WriteErrorResponse(w, statusCode, err)
 }

 var (
-	labelValuesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/label/{}/values"}`)
-	labelValuesErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/label/{}/values"}`)
+	labelValuesRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/label/{}/values"}`)
+	labelValuesErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="select/{}/prometheus/api/v1/label/{}/values"}`)

-	queryRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query"}`)
-	queryErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/query"}`)
+	queryRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/query"}`)
+	queryErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/query"}`)

-	queryRangeRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query_range"}`)
-	queryRangeErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/query_range"}`)
+	queryRangeRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/prometheus/api/v1/query_range"}`)
+	queryRangeErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/query_range"}`)

-	seriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/series"}`)
-	seriesErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/series"}`)
+	seriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/series"}`)
+	seriesErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/series"}`)

-	seriesCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/series/count"}`)
-	seriesCountErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/series/count"}`)
+	seriesCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/series/count"}`)
+	seriesCountErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/series/count"}`)

-	labelsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels"}`)
-	labelsErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels"}`)
+	labelsRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/labels"}`)
+	labelsErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/labels"}`)

-	deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/admin/tsdb/delete_series"}`)
-	deleteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/admin/tsdb/delete_series"}`)
+	labelsCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/labels/count"}`)
+	labelsCountErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/labels/count"}`)

-	exportRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/export"}`)
-	exportErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/export"}`)
+	deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/delete/{}/prometheus/api/v1/admin/tsdb/delete_series"}`)
+	deleteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/delete/{}/prometheus/api/v1/admin/tsdb/delete_series"}`)

-	federateRequests = metrics.NewCounter(`vm_http_requests_total{path="/federate"}`)
-	federateErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/federate"}`)
+	exportRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/export"}`)
+	exportErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/export"}`)
+
+	federateRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/federate"}`)
+	federateErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/federate"}`)
 )
--- a/app/vmselect/netstorage/fadvise_freebsd.go
+++ b/app/vmselect/netstorage/fadvise_freebsd.go
@@ -0,0 +1,15 @@
+package netstorage
+
+import (
+	"os"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"golang.org/x/sys/unix"
+)
+
+func mustFadviseRandomRead(f *os.File) {
+	fd := int(f.Fd())
+	if err := unix.Fadvise(int(fd), 0, 0, unix.FADV_RANDOM|unix.FADV_WILLNEED); err != nil {
+		logger.Panicf("FATAL: error returned from unix.Fadvise(RANDOM|WILLNEED): %s", err)
+	}
+}
--- a/app/vmselect/netstorage/netstorage.go
+++ b/app/vmselect/netstorage/netstorage.go
--- a/app/vmselect/netstorage/tmp_blocks_file.go
+++ b/app/vmselect/netstorage/tmp_blocks_file.go
@@ -1,7 +1,6 @@
 package netstorage

 import (
-	"bufio"
 	"fmt"
 	"io/ioutil"
 	"os"
@@ -10,6 +9,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 )
@@ -22,9 +22,7 @@ func InitTmpBlocksDir(tmpDirPath string) {
 		tmpDirPath = os.TempDir()
 	}
 	tmpBlocksDir = tmpDirPath + "/searchResults"
-	if err := os.RemoveAll(tmpBlocksDir); err != nil {
-		logger.Panicf("FATAL: cannot remove %q: %s", tmpBlocksDir, err)
-	}
+	fs.MustRemoveAll(tmpBlocksDir)
 	if err := fs.MkdirAllIfNotExist(tmpBlocksDir); err != nil {
 		logger.Panicf("FATAL: cannot create %q: %s", tmpBlocksDir, err)
 	}
@@ -32,13 +30,23 @@ func InitTmpBlocksDir(tmpDirPath string) {

 var tmpBlocksDir string

-const maxInmemoryTmpBlocksFile = 512 * 1024
+func maxInmemoryTmpBlocksFile() int {
+	mem := memory.Allowed()
+	maxLen := mem / 1024
+	if maxLen < 64*1024 {
+		return 64 * 1024
+	}
+	return maxLen
+}
+
+var _ = metrics.NewGauge(`vm_tmp_blocks_max_inmemory_file_size_bytes`, func() float64 {
+	return float64(maxInmemoryTmpBlocksFile())
+})

 type tmpBlocksFile struct {
 	buf []byte

-	f  *os.File
-	bw *bufio.Writer
+	f *os.File

 	offset uint64
 }
@@ -46,7 +54,9 @@ type tmpBlocksFile struct {
 func getTmpBlocksFile() *tmpBlocksFile {
 	v := tmpBlocksFilePool.Get()
 	if v == nil {
-		return &tmpBlocksFile{}
+		return &tmpBlocksFile{
+			buf: make([]byte, 0, maxInmemoryTmpBlocksFile()),
+		}
 	}
 	return v.(*tmpBlocksFile)
 }
@@ -55,7 +65,6 @@ func putTmpBlocksFile(tbf *tmpBlocksFile) {
 	tbf.MustClose()
 	tbf.buf = tbf.buf[:0]
 	tbf.f = nil
-	tbf.bw = nil
 	tbf.offset = 0
 	tmpBlocksFilePool.Put(tbf)
 }
@@ -71,22 +80,6 @@ func (addr tmpBlockAddr) String() string {
 	return fmt.Sprintf("offset %d, size %d", addr.offset, addr.size)
 }

-func getBufioWriter(f *os.File) *bufio.Writer {
-	v := bufioWriterPool.Get()
-	if v == nil {
-		return bufio.NewWriterSize(f, maxInmemoryTmpBlocksFile*2)
-	}
-	bw := v.(*bufio.Writer)
-	bw.Reset(f)
-	return bw
-}
-
-func putBufioWriter(bw *bufio.Writer) {
-	bufioWriterPool.Put(bw)
-}
-
-var bufioWriterPool sync.Pool
-
 var tmpBlocksFilesCreated = metrics.NewCounter(`vm_tmp_blocks_files_created_total`)

 // WriteBlock writes b to tbf.
@@ -94,28 +87,31 @@ var tmpBlocksFilesCreated = metrics.NewCounter(`vm_tmp_blocks_files_created_tota
 // It returns errors since the operation may fail on space shortage
 // and this must be handled.
 func (tbf *tmpBlocksFile) WriteBlock(b *storage.Block) (tmpBlockAddr, error) {
+	bb := tmpBufPool.Get()
+	defer tmpBufPool.Put(bb)
+	bb.B = storage.MarshalBlock(bb.B[:0], b)
+
 	var addr tmpBlockAddr
 	addr.offset = tbf.offset
-
-	tbfBufLen := len(tbf.buf)
-	tbf.buf = storage.MarshalBlock(tbf.buf, b)
-	addr.size = len(tbf.buf) - tbfBufLen
+	addr.size = len(bb.B)
 	tbf.offset += uint64(addr.size)
-	if tbf.offset <= maxInmemoryTmpBlocksFile {
+	if len(tbf.buf)+len(bb.B) <= cap(tbf.buf) {
+		// Fast path - the data fits tbf.buf
+		tbf.buf = append(tbf.buf, bb.B...)
 		return addr, nil
 	}

+	// Slow path: flush the data from tbf.buf to file.
 	if tbf.f == nil {
 		f, err := ioutil.TempFile(tmpBlocksDir, "")
 		if err != nil {
 			return addr, err
 		}
 		tbf.f = f
-		tbf.bw = getBufioWriter(f)
 		tmpBlocksFilesCreated.Inc()
 	}
-	_, err := tbf.bw.Write(tbf.buf)
-	tbf.buf = tbf.buf[:0]
+	_, err := tbf.f.Write(tbf.buf)
+	tbf.buf = append(tbf.buf[:0], bb.B...)
 	if err != nil {
 		return addr, fmt.Errorf("cannot write block to %q: %s", tbf.f.Name(), err)
 	}
@@ -126,15 +122,15 @@ func (tbf *tmpBlocksFile) Finalize() error {
 	if tbf.f == nil {
 		return nil
 	}
-
-	err := tbf.bw.Flush()
-	putBufioWriter(tbf.bw)
-	tbf.bw = nil
+	if _, err := tbf.f.Write(tbf.buf); err != nil {
+		return fmt.Errorf("cannot flush the remaining %d bytes to tmpBlocksFile: %s", len(tbf.buf), err)
+	}
+	tbf.buf = tbf.buf[:0]
 	if _, err := tbf.f.Seek(0, 0); err != nil {
 		logger.Panicf("FATAL: cannot seek to the start of file: %s", err)
 	}
 	mustFadviseRandomRead(tbf.f)
-	return err
+	return nil
 }

 func (tbf *tmpBlocksFile) MustReadBlockAt(dst *storage.Block, addr tmpBlockAddr) {
@@ -169,10 +165,6 @@ func (tbf *tmpBlocksFile) MustClose() {
 	if tbf.f == nil {
 		return
 	}
-	if tbf.bw != nil {
-		putBufioWriter(tbf.bw)
-		tbf.bw = nil
-	}
 	fname := tbf.f.Name()

 	// Remove the file at first, then close it.
--- a/app/vmselect/netstorage/tmp_blocks_file_test.go
+++ b/app/vmselect/netstorage/tmp_blocks_file_test.go
@@ -30,7 +30,7 @@ func TestTmpBlocksFileSerial(t *testing.T) {
 }

 func TestTmpBlocksFileConcurrent(t *testing.T) {
-	concurrency := 4
+	concurrency := 3
 	ch := make(chan error, concurrency)
 	for i := 0; i < concurrency; i++ {
 		go func() {
@@ -69,7 +69,7 @@ func testTmpBlocksFile() error {
 		_, _, _ = b.MarshalData(0, 0)
 		return &b
 	}
-	for _, size := range []int{1024, 16 * 1024, maxInmemoryTmpBlocksFile / 2, 2 * maxInmemoryTmpBlocksFile} {
+	for _, size := range []int{1024, 16 * 1024, maxInmemoryTmpBlocksFile() / 2, 2 * maxInmemoryTmpBlocksFile()} {
 		err := func() error {
 			tbf := getTmpBlocksFile()
 			defer putTmpBlocksFile(tbf)
@@ -94,7 +94,7 @@ func testTmpBlocksFile() error {
 			}

 			// Read blocks in parallel and verify them
-			concurrency := 3
+			concurrency := 2
 			workCh := make(chan int)
 			doneCh := make(chan error)
 			for i := 0; i < concurrency; i++ {
--- a/app/vmselect/prometheus/labels_count_response.qtpl
+++ b/app/vmselect/prometheus/labels_count_response.qtpl
@@ -0,0 +1,17 @@
+{% import "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" %}
+
+{% stripspace %}
+LabelsCountResponse generates response for /api/v1/label_entries .
+{% func LabelsCountResponse(labelEntries []storage.TagEntry) %}
+{
+	"status":"success",
+	"data":{
+		{% for i, e := range labelEntries %}
+			{%q= e.Key %}:{%d= len(e.Values) %}
+			{% if i+1 < len(labelEntries) %},{% endif %}
+		{% endfor %}
+	}
+}
+{% endfunc %}
+
+{% endstripspace %}
--- a/app/vmselect/prometheus/labels_count_response.qtpl.go
+++ b/app/vmselect/prometheus/labels_count_response.qtpl.go
@@ -0,0 +1,74 @@
+// Code generated by qtc from "labels_count_response.qtpl". DO NOT EDIT.
+// See https://github.com/valyala/quicktemplate for details.
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:1
+package prometheus
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:1
+import "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+
+// LabelsCountResponse generates response for /api/v1/label_entries .
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:5
+import (
+	qtio422016 "io"
+
+	qt422016 "github.com/valyala/quicktemplate"
+)
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:5
+var (
+	_ = qtio422016.Copy
+	_ = qt422016.AcquireByteBuffer
+)
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:5
+func StreamLabelsCountResponse(qw422016 *qt422016.Writer, labelEntries []storage.TagEntry) {
+//line app/vmselect/prometheus/labels_count_response.qtpl:5
+	qw422016.N().S(`{"status":"success","data":{`)
+//line app/vmselect/prometheus/labels_count_response.qtpl:9
+	for i, e := range labelEntries {
+//line app/vmselect/prometheus/labels_count_response.qtpl:10
+		qw422016.N().Q(e.Key)
+//line app/vmselect/prometheus/labels_count_response.qtpl:10
+		qw422016.N().S(`:`)
+//line app/vmselect/prometheus/labels_count_response.qtpl:10
+		qw422016.N().D(len(e.Values))
+//line app/vmselect/prometheus/labels_count_response.qtpl:11
+		if i+1 < len(labelEntries) {
+//line app/vmselect/prometheus/labels_count_response.qtpl:11
+			qw422016.N().S(`,`)
+//line app/vmselect/prometheus/labels_count_response.qtpl:11
+		}
+//line app/vmselect/prometheus/labels_count_response.qtpl:12
+	}
+//line app/vmselect/prometheus/labels_count_response.qtpl:12
+	qw422016.N().S(`}}`)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+}
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+func WriteLabelsCountResponse(qq422016 qtio422016.Writer, labelEntries []storage.TagEntry) {
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	qw422016 := qt422016.AcquireWriter(qq422016)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	StreamLabelsCountResponse(qw422016, labelEntries)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	qt422016.ReleaseWriter(qw422016)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+}
+
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+func LabelsCountResponse(labelEntries []storage.TagEntry) string {
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	qb422016 := qt422016.AcquireByteBuffer()
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	WriteLabelsCountResponse(qb422016, labelEntries)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	qs422016 := string(qb422016.B)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	qt422016.ReleaseByteBuffer(qb422016)
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+	return qs422016
+//line app/vmselect/prometheus/labels_count_response.qtpl:15
+}
--- a/app/vmselect/prometheus/prometheus.go
+++ b/app/vmselect/prometheus/prometheus.go
@@ -6,20 +6,27 @@ import (
 	"math"
 	"net/http"
 	"runtime"
+	"sort"
 	"strconv"
 	"strings"
+	"sync"
 	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/quicktemplate"
 )

 var (
-	maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum time for search query execution")
-	maxQueryLen      = flag.Int("search.maxQueryLen", 16*1024, "The maximum search query length in bytes")
+	maxQueryDuration    = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum time for search query execution")
+	maxQueryLen         = flag.Int("search.maxQueryLen", 16*1024, "The maximum search query length in bytes")
+	denyPartialResponse = flag.Bool("search.denyPartialResponse", false, "Whether to deny partial responses when some of vmstorage nodes are unavailable. This trades consistency over availability")
+	selectNodes         = flagutil.NewArray("selectNode", "Addresses of vmselect nodes; usage: -selectNode=vmselect-host1:8481 -selectNode=vmselect-host2:8481")
 )

 // Default step used if not set.
@@ -30,16 +37,28 @@ const defaultStep = 5 * 60 * 1000
 const latencyOffset = 60 * 1000

 // FederateHandler implements /federate . See https://prometheus.io/docs/prometheus/latest/federation/
-func FederateHandler(w http.ResponseWriter, r *http.Request) error {
+func FederateHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	ct := currentTime()
 	if err := r.ParseForm(); err != nil {
 		return fmt.Errorf("cannot parse request form values: %s", err)
 	}
 	matches := r.Form["match[]"]
-	maxLookback := getDuration(r, "max_lookback", defaultStep)
-	start := getTime(r, "start", ct-maxLookback)
-	end := getTime(r, "end", ct)
+	if len(matches) == 0 {
+		return fmt.Errorf("missing `match[]` arg")
+	}
+	maxLookback, err := getDuration(r, "max_lookback", defaultStep)
+	if err != nil {
+		return err
+	}
+	start, err := getTime(r, "start", ct-maxLookback)
+	if err != nil {
+		return err
+	}
+	end, err := getTime(r, "end", ct)
+	if err != nil {
+		return err
+	}
 	deadline := getDeadline(r)
 	if start >= end {
 		start = end - defaultStep
@@ -49,19 +68,24 @@ func FederateHandler(w http.ResponseWriter, r *http.Request) error {
 		return err
 	}
 	sq := &storage.SearchQuery{
+		AccountID:    at.AccountID,
+		ProjectID:    at.ProjectID,
 		MinTimestamp: start,
 		MaxTimestamp: end,
 		TagFilterss:  tagFilterss,
 	}
-	rss, err := netstorage.ProcessSearchQuery(sq, deadline)
+	rss, isPartial, err := netstorage.ProcessSearchQuery(at, sq, true, deadline)
 	if err != nil {
 		return fmt.Errorf("cannot fetch data for %q: %s", sq, err)
 	}
+	if isPartial && getDenyPartialResponse(r) {
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}

 	resultsCh := make(chan *quicktemplate.ByteBuffer)
 	doneCh := make(chan error)
 	go func() {
-		err := rss.RunParallel(func(rs *netstorage.Result) {
+		err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
 			bb := quicktemplate.AcquireByteBuffer()
 			WriteFederate(bb, rs)
 			resultsCh <- bb
@@ -87,7 +111,7 @@ func FederateHandler(w http.ResponseWriter, r *http.Request) error {
 var federateDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/federate"}`)

 // ExportHandler exports data in raw format from /api/v1/export.
-func ExportHandler(w http.ResponseWriter, r *http.Request) error {
+func ExportHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	ct := currentTime()
 	if err := r.ParseForm(); err != nil {
@@ -97,16 +121,25 @@ func ExportHandler(w http.ResponseWriter, r *http.Request) error {
 	if len(matches) == 0 {
 		// Maintain backwards compatibility
 		match := r.FormValue("match")
+		if len(match) == 0 {
+			return fmt.Errorf("missing `match[]` arg")
+		}
 		matches = []string{match}
 	}
-	start := getTime(r, "start", 0)
-	end := getTime(r, "end", ct)
+	start, err := getTime(r, "start", 0)
+	if err != nil {
+		return err
+	}
+	end, err := getTime(r, "end", ct)
+	if err != nil {
+		return err
+	}
 	format := r.FormValue("format")
 	deadline := getDeadline(r)
 	if start >= end {
 		start = end - defaultStep
 	}
-	if err := exportHandler(w, matches, start, end, format, deadline); err != nil {
+	if err := exportHandler(at, w, matches, start, end, format, deadline); err != nil {
 		return err
 	}
 	exportDuration.UpdateDuration(startTime)
@@ -115,7 +148,7 @@ func ExportHandler(w http.ResponseWriter, r *http.Request) error {

 var exportDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/export"}`)

-func exportHandler(w http.ResponseWriter, matches []string, start, end int64, format string, deadline netstorage.Deadline) error {
+func exportHandler(at *auth.Token, w http.ResponseWriter, matches []string, start, end int64, format string, deadline netstorage.Deadline) error {
 	writeResponseFunc := WriteExportStdResponse
 	writeLineFunc := WriteExportJSONLine
 	contentType := "application/json"
@@ -132,19 +165,25 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
 		return err
 	}
 	sq := &storage.SearchQuery{
+		AccountID:    at.AccountID,
+		ProjectID:    at.ProjectID,
 		MinTimestamp: start,
 		MaxTimestamp: end,
 		TagFilterss:  tagFilterss,
 	}
-	rss, err := netstorage.ProcessSearchQuery(sq, deadline)
+	rss, isPartial, err := netstorage.ProcessSearchQuery(at, sq, true, deadline)
 	if err != nil {
 		return fmt.Errorf("cannot fetch data for %q: %s", sq, err)
 	}
+	if isPartial {
+		rss.Cancel()
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}

 	resultsCh := make(chan *quicktemplate.ByteBuffer, runtime.GOMAXPROCS(-1))
 	doneCh := make(chan error)
 	go func() {
-		err := rss.RunParallel(func(rs *netstorage.Result) {
+		err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
 			bb := quicktemplate.AcquireByteBuffer()
 			writeLineFunc(bb, rs)
 			resultsCh <- bb
@@ -156,6 +195,11 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
 	w.Header().Set("Content-Type", contentType)
 	writeResponseFunc(w, resultsCh)

+	// Consume all the data from resultsCh in the event writeResponseFunc
+	// fails to consume all the data.
+	for bb := range resultsCh {
+		quicktemplate.ReleaseByteBuffer(bb)
+	}
 	err = <-doneCh
 	if err != nil {
 		return fmt.Errorf("error during data fetching: %s", err)
@@ -166,7 +210,7 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
 // DeleteHandler processes /api/v1/admin/tsdb/delete_series prometheus API request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#delete-series
-func DeleteHandler(r *http.Request) error {
+func DeleteHandler(at *auth.Token, r *http.Request) error {
 	startTime := time.Now()
 	if err := r.ParseForm(); err != nil {
 		return fmt.Errorf("cannot parse request form values: %s", err)
@@ -175,19 +219,28 @@ func DeleteHandler(r *http.Request) error {
 		return fmt.Errorf("start and end aren't supported. Remove these args from the query in order to delete all the matching metrics")
 	}
 	matches := r.Form["match[]"]
+	if len(matches) == 0 {
+		return fmt.Errorf("missing `match[]` arg")
+	}
+	deadline := getDeadline(r)
 	tagFilterss, err := getTagFilterssFromMatches(matches)
 	if err != nil {
 		return err
 	}
 	sq := &storage.SearchQuery{
+		AccountID:   at.AccountID,
+		ProjectID:   at.ProjectID,
 		TagFilterss: tagFilterss,
 	}
-	deletedCount, err := netstorage.DeleteSeries(sq)
+	deletedCount, err := netstorage.DeleteSeries(at, sq, deadline)
 	if err != nil {
 		return fmt.Errorf("cannot delete time series matching %q: %s", matches, err)
 	}
 	if deletedCount > 0 {
-		promql.ResetRollupResultCache()
+		// Reset rollup result cache on all the vmselect nodes,
+		// since the cache may contain deleted data.
+		// TODO: reset only cache for (account, project)
+		resetRollupResultCaches()
 	}
 	deleteDuration.UpdateDuration(startTime)
 	return nil
@@ -195,15 +248,81 @@ func DeleteHandler(r *http.Request) error {

 var deleteDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/admin/tsdb/delete_series"}`)

+func resetRollupResultCaches() {
+	if len(*selectNodes) == 0 {
+		logger.Panicf("BUG: missing -selectNode flag")
+	}
+	for _, selectNode := range *selectNodes {
+		callURL := fmt.Sprintf("http://%s/internal/resetRollupResultCache", selectNode)
+		resp, err := httpClient.Get(callURL)
+		if err != nil {
+			logger.Errorf("error when accessing %q: %s", callURL, err)
+			resetRollupResultCacheErrors.Inc()
+			continue
+		}
+		if resp.StatusCode != http.StatusOK {
+			_ = resp.Body.Close()
+			logger.Errorf("unexpected status code at %q; got %d; want %d", callURL, resp.StatusCode, http.StatusOK)
+			resetRollupResultCacheErrors.Inc()
+			continue
+		}
+		_ = resp.Body.Close()
+	}
+	resetRollupResultCacheCalls.Inc()
+}
+
+var (
+	resetRollupResultCacheErrors = metrics.NewCounter("vm_reset_rollup_result_cache_errors_total")
+	resetRollupResultCacheCalls  = metrics.NewCounter("vm_reset_rollup_result_cache_calls_total")
+)
+
+var httpClient = &http.Client{
+	Timeout: time.Second * 5,
+}
+
 // LabelValuesHandler processes /api/v1/label/<labelName>/values request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values
-func LabelValuesHandler(labelName string, w http.ResponseWriter, r *http.Request) error {
+func LabelValuesHandler(at *auth.Token, labelName string, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	deadline := getDeadline(r)
-	labelValues, err := netstorage.GetLabelValues(labelName, deadline)
-	if err != nil {
-		return fmt.Errorf(`cannot obtain label values for %q: %s`, labelName, err)
+
+	if err := r.ParseForm(); err != nil {
+		return fmt.Errorf("cannot parse form values: %s", err)
+	}
+	var labelValues []string
+	var isPartial bool
+	if len(r.Form["match[]"]) == 0 && len(r.Form["start"]) == 0 && len(r.Form["end"]) == 0 {
+		var err error
+		labelValues, isPartial, err = netstorage.GetLabelValues(at, labelName, deadline)
+		if err != nil {
+			return fmt.Errorf(`cannot obtain label values for %q: %s`, labelName, err)
+		}
+	} else {
+		// Extended functionality that allows filtering by label filters and time range
+		// i.e. /api/v1/label/foo/values?match[]=foobar{baz="abc"}&start=...&end=...
+		// is equivalent to `label_values(foobar{baz="abc"}, foo)` call on the selected
+		// time range in Grafana templating.
+		matches := r.Form["match[]"]
+		if len(matches) == 0 {
+			matches = []string{fmt.Sprintf("{%s!=''}", labelName)}
+		}
+		ct := currentTime()
+		end, err := getTime(r, "end", ct)
+		if err != nil {
+			return err
+		}
+		start, err := getTime(r, "start", end-defaultStep)
+		if err != nil {
+			return err
+		}
+		labelValues, isPartial, err = labelValuesWithMatches(at, labelName, matches, start, end, deadline)
+		if err != nil {
+			return fmt.Errorf("cannot obtain label values for %q, match[]=%q, start=%d, end=%d: %s", labelName, matches, start, end, err)
+		}
+	}
+	if isPartial && getDenyPartialResponse(r) {
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
 	}

 	w.Header().Set("Content-Type", "application/json")
@@ -212,18 +331,87 @@ func LabelValuesHandler(labelName string, w http.ResponseWriter, r *http.Request
 	return nil
 }

+func labelValuesWithMatches(at *auth.Token, labelName string, matches []string, start, end int64, deadline netstorage.Deadline) ([]string, bool, error) {
+	if len(matches) == 0 {
+		logger.Panicf("BUG: matches must be non-empty")
+	}
+	tagFilterss, err := getTagFilterssFromMatches(matches)
+	if err != nil {
+		return nil, false, err
+	}
+	if start >= end {
+		start = end - defaultStep
+	}
+	sq := &storage.SearchQuery{
+		AccountID:    at.AccountID,
+		ProjectID:    at.ProjectID,
+		MinTimestamp: start,
+		MaxTimestamp: end,
+		TagFilterss:  tagFilterss,
+	}
+	rss, isPartial, err := netstorage.ProcessSearchQuery(at, sq, false, deadline)
+	if err != nil {
+		return nil, false, fmt.Errorf("cannot fetch data for %q: %s", sq, err)
+	}
+
+	m := make(map[string]struct{})
+	var mLock sync.Mutex
+	err = rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
+		labelValue := rs.MetricName.GetTagValue(labelName)
+		if len(labelValue) == 0 {
+			return
+		}
+		mLock.Lock()
+		m[string(labelValue)] = struct{}{}
+		mLock.Unlock()
+	})
+	if err != nil {
+		return nil, false, fmt.Errorf("error when data fetching: %s", err)
+	}
+
+	labelValues := make([]string, 0, len(m))
+	for labelValue := range m {
+		labelValues = append(labelValues, labelValue)
+	}
+	sort.Strings(labelValues)
+	return labelValues, isPartial, nil
+}
+
 var labelValuesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/label/{}/values"}`)

+// LabelsCountHandler processes /api/v1/labels/count request.
+func LabelsCountHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
+	startTime := time.Now()
+	deadline := getDeadline(r)
+	labelEntries, isPartial, err := netstorage.GetLabelEntries(at, deadline)
+	if err != nil {
+		return fmt.Errorf(`cannot obtain label entries: %s`, err)
+	}
+	if isPartial && getDenyPartialResponse(r) {
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	WriteLabelsCountResponse(w, labelEntries)
+	labelsCountDuration.UpdateDuration(startTime)
+	return nil
+}
+
+var labelsCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/labels/count"}`)
+
 // LabelsHandler processes /api/v1/labels request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names
-func LabelsHandler(w http.ResponseWriter, r *http.Request) error {
+func LabelsHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	deadline := getDeadline(r)
-	labels, err := netstorage.GetLabels(deadline)
+	labels, isPartial, err := netstorage.GetLabels(at, deadline)
 	if err != nil {
 		return fmt.Errorf("cannot obtain labels: %s", err)
 	}
+	if isPartial && getDenyPartialResponse(r) {
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}

 	w.Header().Set("Content-Type", "application/json")
 	WriteLabelsResponse(w, labels)
@@ -234,13 +422,17 @@ func LabelsHandler(w http.ResponseWriter, r *http.Request) error {
 var labelsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/labels"}`)

 // SeriesCountHandler processes /api/v1/series/count request.
-func SeriesCountHandler(w http.ResponseWriter, r *http.Request) error {
+func SeriesCountHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	deadline := getDeadline(r)
-	n, err := netstorage.GetSeriesCount(deadline)
+	n, isPartial, err := netstorage.GetSeriesCount(at, deadline)
 	if err != nil {
 		return fmt.Errorf("cannot obtain series count: %s", err)
 	}
+	if isPartial && getDenyPartialResponse(r) {
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}
+
 	w.Header().Set("Content-Type", "application/json")
 	WriteSeriesCountResponse(w, n)
 	seriesCountDuration.UpdateDuration(startTime)
@@ -252,7 +444,7 @@ var seriesCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="
 // SeriesHandler processes /api/v1/series request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers
-func SeriesHandler(w http.ResponseWriter, r *http.Request) error {
+func SeriesHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	ct := currentTime()

@@ -260,8 +452,22 @@ func SeriesHandler(w http.ResponseWriter, r *http.Request) error {
 		return fmt.Errorf("cannot parse form values: %s", err)
 	}
 	matches := r.Form["match[]"]
-	start := getTime(r, "start", ct-defaultStep)
-	end := getTime(r, "end", ct)
+	if len(matches) == 0 {
+		return fmt.Errorf("missing `match[]` arg")
+	}
+	end, err := getTime(r, "end", ct)
+	if err != nil {
+		return err
+	}
+	// Do not set start to minTimeMsecs by default as Prometheus does,
+	// since this leads to fetching and scanning all the data from the storage,
+	// which can take a lot of time for big storages.
+	// It is better setting start as end-defaultStep by default.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/91
+	start, err := getTime(r, "start", end-defaultStep)
+	if err != nil {
+		return err
+	}
 	deadline := getDeadline(r)

 	tagFilterss, err := getTagFilterssFromMatches(matches)
@@ -272,19 +478,24 @@ func SeriesHandler(w http.ResponseWriter, r *http.Request) error {
 		start = end - defaultStep
 	}
 	sq := &storage.SearchQuery{
+		AccountID:    at.AccountID,
+		ProjectID:    at.ProjectID,
 		MinTimestamp: start,
 		MaxTimestamp: end,
 		TagFilterss:  tagFilterss,
 	}
-	rss, err := netstorage.ProcessSearchQuery(sq, deadline)
+	rss, isPartial, err := netstorage.ProcessSearchQuery(at, sq, false, deadline)
 	if err != nil {
 		return fmt.Errorf("cannot fetch data for %q: %s", sq, err)
 	}
+	if isPartial && getDenyPartialResponse(r) {
+		return fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}

 	resultsCh := make(chan *quicktemplate.ByteBuffer)
 	doneCh := make(chan error)
 	go func() {
-		err := rss.RunParallel(func(rs *netstorage.Result) {
+		err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
 			bb := quicktemplate.AcquireByteBuffer()
 			writemetricNameObject(bb, &rs.MetricName)
 			resultsCh <- bb
@@ -297,11 +508,10 @@ func SeriesHandler(w http.ResponseWriter, r *http.Request) error {
 	WriteSeriesResponse(w, resultsCh)

 	// Consume all the data from resultsCh in the event WriteSeriesResponse
-	// fail to consume all the data.
+	// fails to consume all the data.
 	for bb := range resultsCh {
 		quicktemplate.ReleaseByteBuffer(bb)
 	}
-
 	err = <-doneCh
 	if err != nil {
 		return fmt.Errorf("error during data fetching: %s", err)
@@ -315,13 +525,22 @@ var seriesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
 // QueryHandler processes /api/v1/query request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
-func QueryHandler(w http.ResponseWriter, r *http.Request) error {
+func QueryHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	ct := currentTime()

 	query := r.FormValue("query")
-	start := getTime(r, "time", ct)
-	step := getDuration(r, "step", latencyOffset)
+	if len(query) == 0 {
+		return fmt.Errorf("missing `query` arg")
+	}
+	start, err := getTime(r, "time", ct)
+	if err != nil {
+		return err
+	}
+	step, err := getDuration(r, "step", latencyOffset)
+	if err != nil {
+		return err
+	}
 	deadline := getDeadline(r)

 	if len(query) > *maxQueryLen {
@@ -350,7 +569,7 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
 		start -= offset
 		end := start
 		start = end - window
-		if err := exportHandler(w, []string{childQuery}, start, end, "promapi", deadline); err != nil {
+		if err := exportHandler(at, w, []string{childQuery}, start, end, "promapi", deadline); err != nil {
 			return err
 		}
 		queryDuration.UpdateDuration(startTime)
@@ -358,12 +577,15 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
 	}

 	ec := promql.EvalConfig{
-		Start:    start,
-		End:      start,
-		Step:     step,
-		Deadline: deadline,
+		AuthToken: at,
+		Start:     start,
+		End:       start,
+		Step:      step,
+		Deadline:  deadline,
+
+		DenyPartialResponse: getDenyPartialResponse(r),
 	}
-	result, err := promql.Exec(&ec, query)
+	result, err := promql.Exec(&ec, query, true)
 	if err != nil {
 		return fmt.Errorf("cannot execute %q: %s", query, err)
 	}
@@ -379,14 +601,26 @@ var queryDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v
 // QueryRangeHandler processes /api/v1/query_range request.
 //
 // See https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
-func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
+func QueryRangeHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
 	startTime := time.Now()
 	ct := currentTime()

 	query := r.FormValue("query")
-	start := getTime(r, "start", ct-defaultStep)
-	end := getTime(r, "end", ct)
-	step := getDuration(r, "step", defaultStep)
+	if len(query) == 0 {
+		return fmt.Errorf("missing `query` arg")
+	}
+	start, err := getTime(r, "start", ct-defaultStep)
+	if err != nil {
+		return err
+	}
+	end, err := getTime(r, "end", ct)
+	if err != nil {
+		return err
+	}
+	step, err := getDuration(r, "step", defaultStep)
+	if err != nil {
+		return err
+	}
 	deadline := getDeadline(r)
 	mayCache := !getBool(r, "nocache")

@@ -400,44 +634,84 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
 	if err := promql.ValidateMaxPointsPerTimeseries(start, end, step); err != nil {
 		return err
 	}
-	start, end = promql.AdjustStartEnd(start, end, step)
+	if mayCache {
+		start, end = promql.AdjustStartEnd(start, end, step)
+	}

 	ec := promql.EvalConfig{
-		Start:    start,
-		End:      end,
-		Step:     step,
-		Deadline: deadline,
-		MayCache: mayCache,
+		AuthToken: at,
+		Start:     start,
+		End:       end,
+		Step:      step,
+		Deadline:  deadline,
+		MayCache:  mayCache,
+
+		DenyPartialResponse: getDenyPartialResponse(r),
 	}
-	result, err := promql.Exec(&ec, query)
+	result, err := promql.Exec(&ec, query, false)
 	if err != nil {
 		return fmt.Errorf("cannot execute %q: %s", query, err)
 	}
 	if ct-end < latencyOffset {
-		adjustLastPoints(result)
+		result = adjustLastPoints(result)
 	}

+	// Remove NaN values as Prometheus does.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/153
+	removeNaNValuesInplace(result)
+
 	w.Header().Set("Content-Type", "application/json")
 	WriteQueryRangeResponse(w, result)
 	queryRangeDuration.UpdateDuration(startTime)
 	return nil
 }

+func removeNaNValuesInplace(tss []netstorage.Result) {
+	for i := range tss {
+		ts := &tss[i]
+		hasNaNs := false
+		for _, v := range ts.Values {
+			if math.IsNaN(v) {
+				hasNaNs = true
+				break
+			}
+		}
+		if !hasNaNs {
+			// Fast path: nothing to remove.
+			continue
+		}
+
+		// Slow path: remove NaNs.
+		srcTimestamps := ts.Timestamps
+		dstValues := ts.Values[:0]
+		dstTimestamps := ts.Timestamps[:0]
+		for j, v := range ts.Values {
+			if math.IsNaN(v) {
+				continue
+			}
+			dstValues = append(dstValues, v)
+			dstTimestamps = append(dstTimestamps, srcTimestamps[j])
+		}
+		ts.Values = dstValues
+		ts.Timestamps = dstTimestamps
+	}
+}
+
 var queryRangeDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/query_range"}`)

 // adjustLastPoints substitutes the last point values with the previous
 // point values, since the last points may contain garbage.
-func adjustLastPoints(tss []netstorage.Result) {
+func adjustLastPoints(tss []netstorage.Result) []netstorage.Result {
 	if len(tss) == 0 {
-		return
+		return nil
 	}

 	// Search for the last non-NaN value across all the timeseries.
 	lastNonNaNIdx := -1
 	for i := range tss {
-		r := &tss[i]
-		j := len(r.Values) - 1
-		for j >= 0 && math.IsNaN(r.Values[j]) {
+		values := tss[i].Values
+		j := len(values) - 1
+		for j >= 0 && math.IsNaN(values[j]) {
 			j--
 		}
 		if j > lastNonNaNIdx {
@@ -446,75 +720,97 @@ func adjustLastPoints(tss []netstorage.Result) {
 	}
 	if lastNonNaNIdx == -1 {
 		// All timeseries contain only NaNs.
-		return
+		return nil
 	}

-	// Substitute last three values starting from lastNonNaNIdx
+	// Substitute the last two values starting from lastNonNaNIdx
 	// with the previous values for each timeseries.
 	for i := range tss {
-		r := &tss[i]
-		for j := 0; j < 3; j++ {
+		values := tss[i].Values
+		for j := 0; j < 2; j++ {
 			idx := lastNonNaNIdx + j
-			if idx <= 0 || idx >= len(r.Values) {
+			if idx <= 0 || idx >= len(values) || math.IsNaN(values[idx-1]) {
 				continue
 			}
-			r.Values[idx] = r.Values[idx-1]
+			values[idx] = values[idx-1]
 		}
 	}
+	return tss
 }

-func getTime(r *http.Request, argKey string, defaultValue int64) int64 {
+func getTime(r *http.Request, argKey string, defaultValue int64) (int64, error) {
 	argValue := r.FormValue(argKey)
 	if len(argValue) == 0 {
-		return defaultValue
+		return defaultValue, nil
 	}
 	secs, err := strconv.ParseFloat(argValue, 64)
 	if err != nil {
 		// Try parsing string format
 		t, err := time.Parse(time.RFC3339, argValue)
 		if err != nil {
-			return defaultValue
+			// Handle Prometheus'-provided minTime and maxTime.
+			// See https://github.com/prometheus/client_golang/issues/614
+			switch argValue {
+			case prometheusMinTimeFormatted:
+				return minTimeMsecs, nil
+			case prometheusMaxTimeFormatted:
+				return maxTimeMsecs, nil
+			}
+			return 0, fmt.Errorf("cannot parse %q=%q: %s", argKey, argValue, err)
 		}
 		secs = float64(t.UnixNano()) / 1e9
 	}
 	msecs := int64(secs * 1e3)
-	if msecs < minTimeMsecs || msecs > maxTimeMsecs {
-		return defaultValue
+	if msecs < minTimeMsecs {
+		msecs = 0
 	}
-	return msecs
+	if msecs > maxTimeMsecs {
+		msecs = maxTimeMsecs
+	}
+	return msecs, nil
 }

+var (
+	// These constants were obtained from https://github.com/prometheus/prometheus/blob/91d7175eaac18b00e370965f3a8186cc40bf9f55/web/api/v1/api.go#L442
+	// See https://github.com/prometheus/client_golang/issues/614 for details.
+	prometheusMinTimeFormatted = time.Unix(math.MinInt64/1000+62135596801, 0).UTC().Format(time.RFC3339Nano)
+	prometheusMaxTimeFormatted = time.Unix(math.MaxInt64/1000-62135596801, 999999999).UTC().Format(time.RFC3339Nano)
+)
+
 const (
 	// These values prevent from overflow when storing msec-precision time in int64.
-	minTimeMsecs = int64(-1<<63) / 1e6
+	minTimeMsecs = 0 // use 0 instead of `int64(-1<<63) / 1e6` because the storage engine doesn't actually support negative time
 	maxTimeMsecs = int64(1<<63-1) / 1e6
 )

-func getDuration(r *http.Request, argKey string, defaultValue int64) int64 {
+func getDuration(r *http.Request, argKey string, defaultValue int64) (int64, error) {
 	argValue := r.FormValue(argKey)
 	if len(argValue) == 0 {
-		return defaultValue
+		return defaultValue, nil
 	}
 	secs, err := strconv.ParseFloat(argValue, 64)
 	if err != nil {
 		// Try parsing string format
 		d, err := time.ParseDuration(argValue)
 		if err != nil {
-			return defaultValue
+			return 0, fmt.Errorf("cannot parse %q=%q: %s", argKey, argValue, err)
 		}
 		secs = d.Seconds()
 	}
 	msecs := int64(secs * 1e3)
 	if msecs <= 0 || msecs > maxDurationMsecs {
-		return defaultValue
+		return 0, fmt.Errorf("%q=%dms is out of allowed range [%d ... %d]", argKey, msecs, 0, int64(maxDurationMsecs))
 	}
-	return msecs
+	return msecs, nil
 }

 const maxDurationMsecs = 100 * 365 * 24 * 3600 * 1000

 func getDeadline(r *http.Request) netstorage.Deadline {
-	d := getDuration(r, "timeout", 0)
+	d, err := getDuration(r, "timeout", 0)
+	if err != nil {
+		d = 0
+	}
 	dMax := int64(maxQueryDuration.Seconds() * 1e3)
 	if d <= 0 || d > dMax {
 		d = dMax
@@ -548,3 +844,10 @@ func getTagFilterssFromMatches(matches []string) ([][]storage.TagFilter, error)
 	}
 	return tagFilterss, nil
 }
+
+func getDenyPartialResponse(r *http.Request) bool {
+	if *denyPartialResponse {
+		return true
+	}
+	return getBool(r, "deny_partial_response")
+}
--- a/app/vmselect/prometheus/prometheus_test.go
+++ b/app/vmselect/prometheus/prometheus_test.go
@@ -0,0 +1,115 @@
+package prometheus
+
+import (
+	"fmt"
+	"math"
+	"net/http"
+	"net/url"
+	"reflect"
+	"testing"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
+)
+
+func TestRemoveNaNValuesInplace(t *testing.T) {
+	f := func(tss []netstorage.Result, tssExpected []netstorage.Result) {
+		t.Helper()
+		removeNaNValuesInplace(tss)
+		if !reflect.DeepEqual(tss, tssExpected) {
+			t.Fatalf("unexpected result; got %v; want %v", tss, tssExpected)
+		}
+	}
+
+	nan := math.NaN()
+
+	f(nil, nil)
+	f([]netstorage.Result{
+		{
+			Timestamps: []int64{100, 200, 300},
+			Values:     []float64{1, 2, 3},
+		},
+		{
+			Timestamps: []int64{100, 200, 300, 400},
+			Values:     []float64{nan, nan, 3, nan},
+		},
+	}, []netstorage.Result{
+		{
+			Timestamps: []int64{100, 200, 300},
+			Values:     []float64{1, 2, 3},
+		},
+		{
+			Timestamps: []int64{300},
+			Values:     []float64{3},
+		},
+	})
+}
+
+func TestGetTimeSuccess(t *testing.T) {
+	f := func(s string, timestampExpected int64) {
+		t.Helper()
+		urlStr := fmt.Sprintf("http://foo.bar/baz?s=%s", url.QueryEscape(s))
+		r, err := http.NewRequest("GET", urlStr, nil)
+		if err != nil {
+			t.Fatalf("unexpected error in NewRequest: %s", err)
+		}
+
+		// Verify defaultValue
+		ts, err := getTime(r, "foo", 123)
+		if err != nil {
+			t.Fatalf("unexpected error when obtaining default time from getTime(%q): %s", s, err)
+		}
+		if ts != 123 {
+			t.Fatalf("unexpected default value for getTime(%q); got %d; want %d", s, ts, 123)
+		}
+
+		// Verify timestampExpected
+		ts, err = getTime(r, "s", 123)
+		if err != nil {
+			t.Fatalf("unexpected error in getTime(%q): %s", s, err)
+		}
+		if ts != timestampExpected {
+			t.Fatalf("unexpected timestamp for getTime(%q); got %d; want %d", s, ts, timestampExpected)
+		}
+	}
+
+	f("2019-07-07T20:01:02Z", 1562529662000)
+	f("2019-07-07T20:47:40+03:00", 1562521660000)
+	f("-292273086-05-16T16:47:06Z", minTimeMsecs)
+	f("292277025-08-18T07:12:54.999999999Z", maxTimeMsecs)
+	f("1562529662.324", 1562529662324)
+	f("-9223372036.854", minTimeMsecs)
+	f("-9223372036.855", minTimeMsecs)
+	f("9223372036.855", maxTimeMsecs)
+}
+
+func TestGetTimeError(t *testing.T) {
+	f := func(s string) {
+		t.Helper()
+		urlStr := fmt.Sprintf("http://foo.bar/baz?s=%s", url.QueryEscape(s))
+		r, err := http.NewRequest("GET", urlStr, nil)
+		if err != nil {
+			t.Fatalf("unexpected error in NewRequest: %s", err)
+		}
+
+		// Verify defaultValue
+		ts, err := getTime(r, "foo", 123)
+		if err != nil {
+			t.Fatalf("unexpected error when obtaining default time from getTime(%q): %s", s, err)
+		}
+		if ts != 123 {
+			t.Fatalf("unexpected default value for getTime(%q); got %d; want %d", s, ts, 123)
+		}
+
+		// Verify timestampExpected
+		_, err = getTime(r, "s", 123)
+		if err == nil {
+			t.Fatalf("expecting non-nil error in getTime(%q)", s)
+		}
+	}
+
+	f("foo")
+	f("2019-07-07T20:01:02Zisdf")
+	f("2019-07-07T20:47:40+03:00123")
+	f("-292273086-05-16T16:47:07Z")
+	f("292277025-08-18T07:12:54.999999998Z")
+}
--- a/app/vmselect/promql/aggr.go
+++ b/app/vmselect/promql/aggr.go
@@ -6,6 +6,9 @@ import (
 	"sort"
 	"strconv"
 	"strings"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 )

 var aggrFuncs = map[string]aggrFunc{
@@ -26,6 +29,8 @@ var aggrFuncs = map[string]aggrFunc{
 	"median":   aggrFuncMedian,
 	"limitk":   aggrFuncLimitK,
 	"distinct": newAggrFunc(aggrFuncDistinct),
+	"sum2":     newAggrFunc(aggrFuncSum2),
+	"geomean":  newAggrFunc(aggrFuncGeomean),
 }

 type aggrFunc func(afa *aggrFuncArg) ([]*timeseries, error)
@@ -65,33 +70,26 @@ func newAggrFunc(afe func(tss []*timeseries) []*timeseries) aggrFunc {
 	}
 }

+func removeGroupTags(metricName *storage.MetricName, modifier *modifierExpr) {
+	groupOp := strings.ToLower(modifier.Op)
+	switch groupOp {
+	case "", "by":
+		metricName.RemoveTagsOn(modifier.Args)
+	case "without":
+		metricName.RemoveTagsIgnoring(modifier.Args)
+	default:
+		logger.Panicf("BUG: unknown group modifier: %q", groupOp)
+	}
+}
+
 func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeseries, modifier *modifierExpr, keepOriginal bool) ([]*timeseries, error) {
 	arg := copyTimeseriesMetricNames(argOrig)

-	// Filter out superflouos tags.
-	var groupTags []string
-	groupOp := "by"
-	if modifier.Op != "" {
-		groupTags = modifier.Args
-		groupOp = strings.ToLower(modifier.Op)
-	}
-	switch groupOp {
-	case "by":
-		for _, ts := range arg {
-			ts.MetricName.RemoveTagsOn(groupTags)
-		}
-	case "without":
-		for _, ts := range arg {
-			ts.MetricName.RemoveTagsIgnoring(groupTags)
-		}
-	default:
-		return nil, fmt.Errorf(`unknown modifier: %q`, groupOp)
-	}
-
 	// Perform grouping.
 	m := make(map[string][]*timeseries)
 	bb := bbPool.Get()
 	for i, ts := range arg {
+		removeGroupTags(&ts.MetricName, modifier)
 		bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
 		if keepOriginal {
 			ts = argOrig[i]
@@ -100,10 +98,18 @@ func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeserie
 	}
 	bbPool.Put(bb)

+	srcTssCount := 0
+	dstTssCount := 0
 	rvs := make([]*timeseries, 0, len(m))
 	for _, tss := range m {
 		rv := afe(tss)
 		rvs = append(rvs, rv...)
+		srcTssCount += len(tss)
+		dstTssCount += len(rv)
+		if dstTssCount > 2000 && dstTssCount > 16*srcTssCount {
+			// This looks like count_values explosion.
+			return nil, fmt.Errorf(`too many timeseries after aggragation; got %d; want less than %d`, dstTssCount, 16*srcTssCount)
+		}
 	}
 	return rvs, nil
 }
@@ -132,6 +138,52 @@ func aggrFuncSum(tss []*timeseries) []*timeseries {
 	return tss[:1]
 }

+func aggrFuncSum2(tss []*timeseries) []*timeseries {
+	dst := tss[0]
+	for i := range dst.Values {
+		sum2 := float64(0)
+		count := 0
+		for _, ts := range tss {
+			v := ts.Values[i]
+			if math.IsNaN(v) {
+				continue
+			}
+			sum2 += v * v
+			count++
+		}
+		if count == 0 {
+			sum2 = nan
+		}
+		dst.Values[i] = sum2
+	}
+	return tss[:1]
+}
+
+func aggrFuncGeomean(tss []*timeseries) []*timeseries {
+	if len(tss) == 1 {
+		// Fast path - nothing to geomean.
+		return tss
+	}
+	dst := tss[0]
+	for i := range dst.Values {
+		p := 1.0
+		count := 0
+		for _, ts := range tss {
+			v := ts.Values[i]
+			if math.IsNaN(v) {
+				continue
+			}
+			p *= v
+			count++
+		}
+		if count == 0 {
+			p = nan
+		}
+		dst.Values[i] = math.Pow(p, 1/float64(count))
+	}
+	return tss[:1]
+}
+
 func aggrFuncMin(tss []*timeseries) []*timeseries {
 	if len(tss) == 1 {
 		// Fast path - nothing to min.
@@ -260,7 +312,11 @@ func aggrFuncCount(tss []*timeseries) []*timeseries {
 			}
 			count++
 		}
-		dst.Values[i] = float64(count)
+		v := float64(count)
+		if count == 0 {
+			v = nan
+		}
+		dst.Values[i] = v
 	}
 	return tss[:1]
 }
@@ -297,10 +353,32 @@ func aggrFuncCountValues(afa *aggrFuncArg) ([]*timeseries, error) {
 	if err != nil {
 		return nil, err
 	}
+
+	// Remove dstLabel from grouping like Prometheus does.
+	modifier := &afa.ae.Modifier
+	switch strings.ToLower(modifier.Op) {
+	case "without":
+		modifier.Args = append(modifier.Args, dstLabel)
+	case "by":
+		dstArgs := modifier.Args[:0]
+		for _, arg := range modifier.Args {
+			if arg == dstLabel {
+				continue
+			}
+			dstArgs = append(dstArgs, arg)
+		}
+		modifier.Args = dstArgs
+	default:
+		// Do nothing
+	}
+
 	afe := func(tss []*timeseries) []*timeseries {
 		m := make(map[float64]bool)
 		for _, ts := range tss {
 			for _, v := range ts.Values {
+				if math.IsNaN(v) {
+					continue
+				}
 				m[v] = true
 			}
 		}
@@ -313,7 +391,7 @@ func aggrFuncCountValues(afa *aggrFuncArg) ([]*timeseries, error) {
 		var rvs []*timeseries
 		for _, v := range values {
 			var dst timeseries
-			dst.CopyFrom(tss[0])
+			dst.CopyFromShallowTimestamps(tss[0])
 			dst.MetricName.RemoveTag(dstLabel)
 			dst.MetricName.AddTag(dstLabel, strconv.FormatFloat(v, 'g', -1, 64))
 			for i := range dst.Values {
@@ -372,7 +450,7 @@ func newAggrFuncTopK(isReverse bool) aggrFunc {
 					ts.Values[n] = nan
 				}
 			}
-			return rvs
+			return removeNaNs(rvs)
 		}
 		return aggrFuncExt(afe, args[1], &afa.ae.Modifier, true)
 	}
@@ -457,6 +535,7 @@ func newAggrQuantileFunc(phis []float64) func(tss []*timeseries) []*timeseries {
 			idx := int(math.Round(float64(len(tss)-1) * phi))
 			dst.Values[n] = tss[idx].Values[n]
 		}
+		tss[0] = dst
 		return tss[:1]
 	}
 }
--- a/app/vmselect/promql/aggr_incremental.go
+++ b/app/vmselect/promql/aggr_incremental.go
@@ -0,0 +1,450 @@
+package promql
+
+import (
+	"math"
+	"strings"
+	"sync"
+)
+
+// callbacks for optimized incremental calculations for aggregate functions
+// over rollups over metricExpr.
+//
+// These calculations save RAM for aggregates over big number of time series.
+var incrementalAggrFuncCallbacksMap = map[string]*incrementalAggrFuncCallbacks{
+	"sum": {
+		updateAggrFunc:   updateAggrSum,
+		mergeAggrFunc:    mergeAggrSum,
+		finalizeAggrFunc: finalizeAggrCommon,
+	},
+	"min": {
+		updateAggrFunc:   updateAggrMin,
+		mergeAggrFunc:    mergeAggrMin,
+		finalizeAggrFunc: finalizeAggrCommon,
+	},
+	"max": {
+		updateAggrFunc:   updateAggrMax,
+		mergeAggrFunc:    mergeAggrMax,
+		finalizeAggrFunc: finalizeAggrCommon,
+	},
+	"avg": {
+		updateAggrFunc:   updateAggrAvg,
+		mergeAggrFunc:    mergeAggrAvg,
+		finalizeAggrFunc: finalizeAggrAvg,
+	},
+	"count": {
+		updateAggrFunc:   updateAggrCount,
+		mergeAggrFunc:    mergeAggrCount,
+		finalizeAggrFunc: finalizeAggrCount,
+	},
+	"sum2": {
+		updateAggrFunc:   updateAggrSum2,
+		mergeAggrFunc:    mergeAggrSum2,
+		finalizeAggrFunc: finalizeAggrCommon,
+	},
+	"geomean": {
+		updateAggrFunc:   updateAggrGeomean,
+		mergeAggrFunc:    mergeAggrGeomean,
+		finalizeAggrFunc: finalizeAggrGeomean,
+	},
+}
+
+type incrementalAggrFuncContext struct {
+	ae *aggrFuncExpr
+
+	mLock sync.Mutex
+	m     map[uint]map[string]*incrementalAggrContext
+
+	callbacks *incrementalAggrFuncCallbacks
+}
+
+func newIncrementalAggrFuncContext(ae *aggrFuncExpr, callbacks *incrementalAggrFuncCallbacks) *incrementalAggrFuncContext {
+	return &incrementalAggrFuncContext{
+		ae:        ae,
+		m:         make(map[uint]map[string]*incrementalAggrContext),
+		callbacks: callbacks,
+	}
+}
+
+func (iafc *incrementalAggrFuncContext) updateTimeseries(ts *timeseries, workerID uint) {
+	iafc.mLock.Lock()
+	m := iafc.m[workerID]
+	if m == nil {
+		m = make(map[string]*incrementalAggrContext, 1)
+		iafc.m[workerID] = m
+	}
+	iafc.mLock.Unlock()
+
+	removeGroupTags(&ts.MetricName, &iafc.ae.Modifier)
+	bb := bbPool.Get()
+	bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
+	iac := m[string(bb.B)]
+	if iac == nil {
+		tsAggr := &timeseries{
+			Values:     make([]float64, len(ts.Values)),
+			Timestamps: ts.Timestamps,
+			denyReuse:  true,
+		}
+		tsAggr.MetricName.CopyFrom(&ts.MetricName)
+		iac = &incrementalAggrContext{
+			ts:     tsAggr,
+			values: make([]float64, len(ts.Values)),
+		}
+		m[string(bb.B)] = iac
+	}
+	bbPool.Put(bb)
+	iafc.callbacks.updateAggrFunc(iac, ts.Values)
+}
+
+func (iafc *incrementalAggrFuncContext) finalizeTimeseries() []*timeseries {
+	// There is no need in iafc.mLock.Lock here, since finalizeTimeseries must be called
+	// without concurrent goroutines touching iafc.
+	mGlobal := make(map[string]*incrementalAggrContext)
+	mergeAggrFunc := iafc.callbacks.mergeAggrFunc
+	for _, m := range iafc.m {
+		for k, iac := range m {
+			iacGlobal := mGlobal[k]
+			if iacGlobal == nil {
+				mGlobal[k] = iac
+				continue
+			}
+			mergeAggrFunc(iacGlobal, iac)
+		}
+	}
+	tss := make([]*timeseries, 0, len(mGlobal))
+	finalizeAggrFunc := iafc.callbacks.finalizeAggrFunc
+	for _, iac := range mGlobal {
+		finalizeAggrFunc(iac)
+		tss = append(tss, iac.ts)
+	}
+	return tss
+}
+
+type incrementalAggrFuncCallbacks struct {
+	updateAggrFunc   func(iac *incrementalAggrContext, values []float64)
+	mergeAggrFunc    func(dst, src *incrementalAggrContext)
+	finalizeAggrFunc func(iac *incrementalAggrContext)
+}
+
+func getIncrementalAggrFuncCallbacks(name string) *incrementalAggrFuncCallbacks {
+	name = strings.ToLower(name)
+	return incrementalAggrFuncCallbacksMap[name]
+}
+
+type incrementalAggrContext struct {
+	ts     *timeseries
+	values []float64
+}
+
+func finalizeAggrCommon(iac *incrementalAggrContext) {
+	counts := iac.values
+	dstValues := iac.ts.Values
+	_ = dstValues[len(counts)-1]
+	for i, v := range counts {
+		if v == 0 {
+			dstValues[i] = nan
+		}
+	}
+}
+
+func updateAggrSum(iac *incrementalAggrContext, values []float64) {
+	dstValues := iac.ts.Values
+	dstCounts := iac.values
+	_ = dstValues[len(values)-1]
+	_ = dstCounts[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		dstValues[i] += v
+	}
+}
+
+func mergeAggrSum(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	_ = srcCounts[len(srcValues)-1]
+	_ = dstCounts[len(srcValues)-1]
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if srcCounts[i] == 0 {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		dstValues[i] += v
+	}
+}
+
+func updateAggrMin(iac *incrementalAggrContext, values []float64) {
+	dstValues := iac.ts.Values
+	dstCounts := iac.values
+	_ = dstValues[len(values)-1]
+	_ = dstCounts[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		if v < dstValues[i] {
+			dstValues[i] = v
+		}
+	}
+}
+
+func mergeAggrMin(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	_ = srcCounts[len(srcValues)-1]
+	_ = dstCounts[len(srcValues)-1]
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if srcCounts[i] == 0 {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		if v < dstValues[i] {
+			dstValues[i] = v
+		}
+	}
+}
+
+func updateAggrMax(iac *incrementalAggrContext, values []float64) {
+	dstValues := iac.ts.Values
+	dstCounts := iac.values
+	_ = dstValues[len(values)-1]
+	_ = dstCounts[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		if v > dstValues[i] {
+			dstValues[i] = v
+		}
+	}
+}
+
+func mergeAggrMax(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	_ = srcCounts[len(srcValues)-1]
+	_ = dstCounts[len(srcValues)-1]
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if srcCounts[i] == 0 {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		if v > dstValues[i] {
+			dstValues[i] = v
+		}
+	}
+}
+
+func updateAggrAvg(iac *incrementalAggrContext, values []float64) {
+	// Do not use `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation,
+	// since it is slower and has no obvious benefits in increased precision.
+	dstValues := iac.ts.Values
+	dstCounts := iac.values
+	_ = dstValues[len(values)-1]
+	_ = dstCounts[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		dstValues[i] += v
+		dstCounts[i]++
+	}
+}
+
+func mergeAggrAvg(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	_ = srcCounts[len(srcValues)-1]
+	_ = dstCounts[len(srcValues)-1]
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if srcCounts[i] == 0 {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = srcCounts[i]
+			continue
+		}
+		dstValues[i] += v
+		dstCounts[i] += srcCounts[i]
+	}
+}
+
+func finalizeAggrAvg(iac *incrementalAggrContext) {
+	dstValues := iac.ts.Values
+	counts := iac.values
+	_ = dstValues[len(counts)-1]
+	for i, v := range counts {
+		if v == 0 {
+			dstValues[i] = nan
+			continue
+		}
+		dstValues[i] /= v
+	}
+}
+
+func updateAggrCount(iac *incrementalAggrContext, values []float64) {
+	dstValues := iac.ts.Values
+	_ = dstValues[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		dstValues[i]++
+	}
+}
+
+func mergeAggrCount(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		dstValues[i] += v
+	}
+}
+
+func finalizeAggrCount(iac *incrementalAggrContext) {
+	dstValues := iac.ts.Values
+	for i, v := range dstValues {
+		if v == 0 {
+			dstValues[i] = nan
+		}
+	}
+}
+
+func updateAggrSum2(iac *incrementalAggrContext, values []float64) {
+	dstValues := iac.ts.Values
+	dstCounts := iac.values
+	_ = dstValues[len(values)-1]
+	_ = dstCounts[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v * v
+			dstCounts[i] = 1
+			continue
+		}
+		dstValues[i] += v * v
+	}
+}
+
+func mergeAggrSum2(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	_ = srcCounts[len(srcValues)-1]
+	_ = dstCounts[len(srcValues)-1]
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if srcCounts[i] == 0 {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		dstValues[i] += v
+	}
+}
+
+func updateAggrGeomean(iac *incrementalAggrContext, values []float64) {
+	dstValues := iac.ts.Values
+	dstCounts := iac.values
+	_ = dstValues[len(values)-1]
+	_ = dstCounts[len(values)-1]
+	for i, v := range values {
+		if math.IsNaN(v) {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = 1
+			continue
+		}
+		dstValues[i] *= v
+		dstCounts[i]++
+	}
+}
+
+func mergeAggrGeomean(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	dstValues := dst.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	_ = srcCounts[len(srcValues)-1]
+	_ = dstCounts[len(srcValues)-1]
+	_ = dstValues[len(srcValues)-1]
+	for i, v := range srcValues {
+		if srcCounts[i] == 0 {
+			continue
+		}
+		if dstCounts[i] == 0 {
+			dstValues[i] = v
+			dstCounts[i] = srcCounts[i]
+			continue
+		}
+		dstValues[i] *= v
+		dstCounts[i] += srcCounts[i]
+	}
+}
+
+func finalizeAggrGeomean(iac *incrementalAggrContext) {
+	dstValues := iac.ts.Values
+	counts := iac.values
+	_ = dstValues[len(counts)-1]
+	for i, v := range counts {
+		if v == 0 {
+			dstValues[i] = nan
+			continue
+		}
+		dstValues[i] = math.Pow(dstValues[i], 1/v)
+	}
+}
--- a/app/vmselect/promql/aggr_incremental_test.go
+++ b/app/vmselect/promql/aggr_incremental_test.go
@@ -0,0 +1,188 @@
+package promql
+
+import (
+	"fmt"
+	"math"
+	"reflect"
+	"runtime"
+	"sync"
+	"testing"
+)
+
+func TestIncrementalAggr(t *testing.T) {
+	defaultTimestamps := []int64{100e3, 200e3, 300e3, 400e3}
+	values := [][]float64{
+		{1, nan, 2, nan},
+		{3, nan, nan, 4},
+		{nan, nan, 5, 6},
+		{7, nan, 8, 9},
+		{4, nan, nan, nan},
+		{2, nan, 3, 2},
+		{0, nan, 1, 1},
+	}
+	tssSrc := make([]*timeseries, len(values))
+	for i, vs := range values {
+		ts := &timeseries{
+			Timestamps: defaultTimestamps,
+			Values:     vs,
+		}
+		tssSrc[i] = ts
+	}
+
+	copyTimeseries := func(tssSrc []*timeseries) []*timeseries {
+		tssDst := make([]*timeseries, len(tssSrc))
+		for i, tsSrc := range tssSrc {
+			var tsDst timeseries
+			tsDst.CopyFromShallowTimestamps(tsSrc)
+			tssDst[i] = &tsDst
+		}
+		return tssDst
+	}
+
+	f := func(name string, valuesExpected []float64) {
+		t.Helper()
+		callbacks := getIncrementalAggrFuncCallbacks(name)
+		ae := &aggrFuncExpr{
+			Name: name,
+		}
+		tssExpected := []*timeseries{{
+			Timestamps: defaultTimestamps,
+			Values:     valuesExpected,
+		}}
+		// run the test multiple times to make sure there are no side effects on concurrency
+		for i := 0; i < 10; i++ {
+			iafc := newIncrementalAggrFuncContext(ae, callbacks)
+			tssSrcCopy := copyTimeseries(tssSrc)
+			if err := testIncrementalParallelAggr(iafc, tssSrcCopy, tssExpected); err != nil {
+				t.Fatalf("unexpected error on iteration %d: %s", i, err)
+			}
+		}
+	}
+
+	t.Run("sum", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{17, nan, 19, 22}
+		f("sum", valuesExpected)
+	})
+	t.Run("min", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{0, nan, 1, 1}
+		f("min", valuesExpected)
+	})
+	t.Run("max", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{7, nan, 8, 9}
+		f("max", valuesExpected)
+	})
+	t.Run("avg", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{2.8333333333333335, nan, 3.8, 4.4}
+		f("avg", valuesExpected)
+	})
+	t.Run("count", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{6, nan, 5, 5}
+		f("count", valuesExpected)
+	})
+	t.Run("sum2", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{79, nan, 103, 138}
+		f("sum2", valuesExpected)
+	})
+	t.Run("geomean", func(t *testing.T) {
+		t.Parallel()
+		valuesExpected := []float64{0, nan, 2.9925557394776896, 3.365865436338599}
+		f("geomean", valuesExpected)
+	})
+}
+
+func testIncrementalParallelAggr(iafc *incrementalAggrFuncContext, tssSrc, tssExpected []*timeseries) error {
+	const workersCount = 3
+	tsCh := make(chan *timeseries)
+	var wg sync.WaitGroup
+	wg.Add(workersCount)
+	for i := 0; i < workersCount; i++ {
+		go func(workerID uint) {
+			defer wg.Done()
+			for ts := range tsCh {
+				runtime.Gosched() // allow other goroutines performing the work
+				iafc.updateTimeseries(ts, workerID)
+			}
+		}(uint(i))
+	}
+	for _, ts := range tssSrc {
+		tsCh <- ts
+	}
+	close(tsCh)
+	wg.Wait()
+	tssActual := iafc.finalizeTimeseries()
+	if err := expectTimeseriesEqual(tssActual, tssExpected); err != nil {
+		return fmt.Errorf("%s; tssActual=%v, tssExpected=%v", err, tssActual, tssExpected)
+	}
+	return nil
+}
+
+func expectTimeseriesEqual(actual, expected []*timeseries) error {
+	if len(actual) != len(expected) {
+		return fmt.Errorf("unexpected number of time series; got %d; want %d", len(actual), len(expected))
+	}
+	mActual := timeseriesToMap(actual)
+	mExpected := timeseriesToMap(expected)
+	if len(mActual) != len(mExpected) {
+		return fmt.Errorf("unexpected number of time series after converting to map; got %d; want %d", len(mActual), len(mExpected))
+	}
+	for k, tsExpected := range mExpected {
+		tsActual := mActual[k]
+		if tsActual == nil {
+			return fmt.Errorf("missing time series for key=%q", k)
+		}
+		if err := expectTsEqual(tsActual, tsExpected); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func timeseriesToMap(tss []*timeseries) map[string]*timeseries {
+	m := make(map[string]*timeseries, len(tss))
+	for _, ts := range tss {
+		k := ts.MetricName.Marshal(nil)
+		m[string(k)] = ts
+	}
+	return m
+}
+
+func expectTsEqual(actual, expected *timeseries) error {
+	mnActual := actual.MetricName.Marshal(nil)
+	mnExpected := expected.MetricName.Marshal(nil)
+	if string(mnActual) != string(mnExpected) {
+		return fmt.Errorf("unexpected metric name; got %q; want %q", mnActual, mnExpected)
+	}
+	if !reflect.DeepEqual(actual.Timestamps, expected.Timestamps) {
+		return fmt.Errorf("unexpected timestamps; got %v; want %v", actual.Timestamps, expected.Timestamps)
+	}
+	if err := compareValues(actual.Values, expected.Values); err != nil {
+		return fmt.Errorf("%s; actual %v; expected %v", err, actual.Values, expected.Values)
+	}
+	return nil
+}
+
+func compareValues(vs1, vs2 []float64) error {
+	if len(vs1) != len(vs2) {
+		return fmt.Errorf("unexpected number of values; got %d; want %d", len(vs1), len(vs2))
+	}
+	for i, v1 := range vs1 {
+		v2 := vs2[i]
+		if math.IsNaN(v1) {
+			if !math.IsNaN(v2) {
+				return fmt.Errorf("unexpected value; got %v; want %v", v1, v2)
+			}
+			continue
+		}
+		eps := math.Abs(v1 - v2)
+		if eps > 1e-14 {
+			return fmt.Errorf("unexpected value; got %v; want %v", v1, v2)
+		}
+	}
+	return nil
+}
--- a/app/vmselect/promql/binary_op.go
+++ b/app/vmselect/promql/binary_op.go
@@ -260,6 +260,9 @@ func newBinaryOpFunc(bf func(left, right float64, isBool bool) float64) binaryOp
 				dstValues[j] = bf(a, b, isBool)
 			}
 		}
+		// Optimization: remove time series containing only NaNs.
+		// This is quite common after applying filters like `q > 0`.
+		dst = removeNaNs(dst)
 		return dst, nil
 	}
 }
@@ -271,7 +274,7 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 			rvsLeft := make([]*timeseries, len(right))
 			tsLeft := left[0]
 			for i, tsRight := range right {
-				tsRight.MetricName.ResetMetricGroup()
+				resetMetricGroupIfRequired(be, tsRight)
 				rvsLeft[i] = tsLeft
 			}
 			return rvsLeft, right, right, nil
@@ -281,7 +284,7 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 			rvsRight := make([]*timeseries, len(left))
 			tsRight := right[0]
 			for i, tsLeft := range left {
-				tsLeft.MetricName.ResetMetricGroup()
+				resetMetricGroupIfRequired(be, tsLeft)
 				rvsRight[i] = tsRight
 			}
 			return left, rvsRight, left, nil
@@ -296,8 +299,13 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 		if len(tss) == 1 {
 			return nil
 		}
-		return fmt.Errorf(`duplicate timeseries on the %s side of %q: %s %s`, side, be.Op, stringMetricTags(&tss[0].MetricName), be.GroupModifier.AppendString(nil))
+		if mergeNonOverlappingTimeseries(tss) {
+			return nil
+		}
+		return fmt.Errorf(`duplicate timeseries on the %s side of %s %s: %s and %s`, side, be.Op, be.GroupModifier.AppendString(nil),
+			stringMetricTags(&tss[0].MetricName), stringMetricTags(&tss[1].MetricName))
 	}
+
 	var rvsLeft, rvsRight []*timeseries
 	mLeft, mRight := createTimeseriesMapByTagSet(be, left, right)
 	joinOp := strings.ToLower(be.JoinModifier.Op)
@@ -314,6 +322,7 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 			}
 			src := tssRight[0]
 			for _, ts := range tssLeft {
+				resetMetricGroupIfRequired(be, ts)
 				ts.MetricName.AddMissingTags(joinTags, &src.MetricName)
 				rvsLeft = append(rvsLeft, ts)
 				rvsRight = append(rvsRight, src)
@@ -324,6 +333,7 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 			}
 			src := tssLeft[0]
 			for _, ts := range tssRight {
+				resetMetricGroupIfRequired(be, ts)
 				ts.MetricName.AddMissingTags(joinTags, &src.MetricName)
 				rvsLeft = append(rvsLeft, src)
 				rvsRight = append(rvsRight, ts)
@@ -335,7 +345,7 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 			if err := ensureOneX("right", tssRight); err != nil {
 				return nil, nil, nil, err
 			}
-			tssLeft[0].MetricName.ResetMetricGroup()
+			resetMetricGroupIfRequired(be, tssLeft[0])
 			rvsLeft = append(rvsLeft, tssLeft[0])
 			rvsRight = append(rvsRight, tssRight[0])
 		default:
@@ -349,6 +359,19 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 	return rvsLeft, rvsRight, dst, nil
 }

+func resetMetricGroupIfRequired(be *binaryOpExpr, ts *timeseries) {
+	if isBinaryOpCmp(be.Op) && !be.Bool {
+		// Do not reset MetricGroup for non-boolean `compare` binary ops like Prometheus does.
+		return
+	}
+	switch be.Op {
+	case "default", "if", "ifnot":
+		// Do not reset MetricGroup for these ops.
+		return
+	}
+	ts.MetricName.ResetMetricGroup()
+}
+
 func binaryOpPlus(left, right float64) float64 {
 	return left + right
 }
@@ -395,10 +418,25 @@ func binaryOpIfnot(left, right float64) float64 {
 }

 func binaryOpEq(left, right float64) bool {
+	// Special handling for nan == nan.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
+	if math.IsNaN(left) {
+		return math.IsNaN(right)
+	}
+
 	return left == right
 }

 func binaryOpNeq(left, right float64) bool {
+	// Special handling for comparison with nan.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
+	if math.IsNaN(left) {
+		return !math.IsNaN(right)
+	}
+	if math.IsNaN(right) {
+		return true
+	}
+
 	return left != right
 }

@@ -497,3 +535,26 @@ func isScalar(arg []*timeseries) bool {
 	}
 	return len(mn.Tags) == 0
 }
+
+func mergeNonOverlappingTimeseries(tss []*timeseries) bool {
+	if len(tss) < 2 {
+		logger.Panicf("BUG: expecting at least two timeseries. Got %d", len(tss))
+	}
+
+	// Check whether time series in tss overlap.
+	var dst timeseries
+	dst.CopyFromShallowTimestamps(tss[0])
+	dstValues := dst.Values
+	for _, ts := range tss[1:] {
+		for i, value := range ts.Values {
+			if math.IsNaN(dstValues[i]) {
+				dstValues[i] = value
+			} else if !math.IsNaN(value) {
+				// Time series overlap.
+				return false
+			}
+		}
+	}
+	tss[0].CopyFromShallowTimestamps(&dst)
+	return true
+}
--- a/app/vmselect/promql/eval.go
+++ b/app/vmselect/promql/eval.go
@@ -8,6 +8,7 @@ import (
 	"sync"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
@@ -16,10 +17,10 @@ import (
 )

 var (
-	maxPointsPerTimeseries = flag.Int("search.maxPointsPerTimeseries", 10e3, "The maximum points per a single timeseries returned from the search")
+	maxPointsPerTimeseries = flag.Int("search.maxPointsPerTimeseries", 30e3, "The maximum points per a single timeseries returned from the search")
 )

-// The minumum number of points per timeseries for enabling time rounding.
+// The minimum number of points per timeseries for enabling time rounding.
 // This improves cache hit ratio for frequently requested queries over
 // big time ranges.
 const minTimeseriesPointsForTimeRounding = 50
@@ -31,7 +32,7 @@ const minTimeseriesPointsForTimeRounding = 50
 func ValidateMaxPointsPerTimeseries(start, end, step int64) error {
 	points := (end-start)/step + 1
 	if uint64(points) > uint64(*maxPointsPerTimeseries) {
-		return fmt.Errorf(`too many points for the given step=%d, start=%d and end=%d: %d; cannot exceed %d points`,
+		return fmt.Errorf(`too many points for the given step=%d, start=%d and end=%d: %d; cannot exceed -search.maxPointsPerTimeseries=%d`,
 			step, start, end, uint64(points), *maxPointsPerTimeseries)
 	}
 	return nil
@@ -62,14 +63,17 @@ func AdjustStartEnd(start, end, step int64) (int64, int64) {

 // EvalConfig is the configuration required for query evaluation via Exec
 type EvalConfig struct {
-	Start int64
-	End   int64
-	Step  int64
+	AuthToken *auth.Token
+	Start     int64
+	End       int64
+	Step      int64

 	Deadline netstorage.Deadline

 	MayCache bool

+	DenyPartialResponse bool
+
 	timestamps     []int64
 	timestampsOnce sync.Once
 }
@@ -77,11 +81,13 @@ type EvalConfig struct {
 // newEvalConfig returns new EvalConfig copy from src.
 func newEvalConfig(src *EvalConfig) *EvalConfig {
 	var ec EvalConfig
+	ec.AuthToken = src.AuthToken
 	ec.Start = src.Start
 	ec.End = src.End
 	ec.Step = src.Step
 	ec.Deadline = src.Deadline
 	ec.MayCache = src.MayCache
+	ec.DenyPartialResponse = src.DenyPartialResponse

 	// do not copy src.timestamps - they must be generated again.
 	return &ec
@@ -145,14 +151,14 @@ func evalExpr(ec *EvalConfig, e expr) ([]*timeseries, error) {
 		re := &rollupExpr{
 			Expr: me,
 		}
-		rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, re)
+		rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, re, nil)
 		if err != nil {
 			return nil, fmt.Errorf(`cannot evaluate %q: %s`, me.AppendString(nil), err)
 		}
 		return rv, nil
 	}
 	if re, ok := e.(*rollupExpr); ok {
-		rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, re)
+		rv, err := evalRollupFunc(ec, "default_rollup", rollupDefault, re, nil)
 		if err != nil {
 			return nil, fmt.Errorf(`cannot evaluate %q: %s`, re.AppendString(nil), err)
 		}
@@ -188,13 +194,30 @@ func evalExpr(ec *EvalConfig, e expr) ([]*timeseries, error) {
 		if err != nil {
 			return nil, err
 		}
-		rv, err := evalRollupFunc(ec, fe.Name, rf, re)
+		rv, err := evalRollupFunc(ec, fe.Name, rf, re, nil)
 		if err != nil {
 			return nil, fmt.Errorf(`cannot evaluate %q: %s`, fe.AppendString(nil), err)
 		}
 		return rv, nil
 	}
 	if ae, ok := e.(*aggrFuncExpr); ok {
+		if callbacks := getIncrementalAggrFuncCallbacks(ae.Name); callbacks != nil {
+			fe, nrf := tryGetArgRollupFuncWithMetricExpr(ae)
+			if fe != nil {
+				// There is an optimized path for calculating aggrFuncExpr over rollupFunc over metricExpr.
+				// The optimized path saves RAM for aggregates over big number of time series.
+				args, re, err := evalRollupFuncArgs(ec, fe)
+				if err != nil {
+					return nil, err
+				}
+				rf, err := nrf(args)
+				if err != nil {
+					return nil, err
+				}
+				iafc := newIncrementalAggrFuncContext(ae, callbacks)
+				return evalRollupFunc(ec, fe.Name, rf, re, iafc)
+			}
+		}
 		args, err := evalExprs(ec, ae.Args)
 		if err != nil {
 			return nil, err
@@ -249,6 +272,69 @@ func evalExpr(ec *EvalConfig, e expr) ([]*timeseries, error) {
 	return nil, fmt.Errorf("unexpected expression %q", e.AppendString(nil))
 }

+func tryGetArgRollupFuncWithMetricExpr(ae *aggrFuncExpr) (*funcExpr, newRollupFunc) {
+	if len(ae.Args) != 1 {
+		return nil, nil
+	}
+	e := ae.Args[0]
+	// Make sure e contains one of the following:
+	// - metricExpr
+	// - metricExpr[d]
+	// - rollupFunc(metricExpr)
+	// - rollupFunc(metricExpr[d])
+
+	if me, ok := e.(*metricExpr); ok {
+		// e = metricExpr
+		if me.IsEmpty() {
+			return nil, nil
+		}
+		fe := &funcExpr{
+			Name: "default_rollup",
+			Args: []expr{me},
+		}
+		nrf := getRollupFunc(fe.Name)
+		return fe, nrf
+	}
+	if re, ok := e.(*rollupExpr); ok {
+		if me, ok := re.Expr.(*metricExpr); !ok || me.IsEmpty() {
+			return nil, nil
+		}
+		// e = rollupExpr(metricExpr)
+		fe := &funcExpr{
+			Name: "default_rollup",
+			Args: []expr{re},
+		}
+		nrf := getRollupFunc(fe.Name)
+		return fe, nrf
+	}
+	fe, ok := e.(*funcExpr)
+	if !ok {
+		return nil, nil
+	}
+	nrf := getRollupFunc(fe.Name)
+	if nrf == nil {
+		return nil, nil
+	}
+	rollupArgIdx := getRollupArgIdx(fe.Name)
+	arg := fe.Args[rollupArgIdx]
+	if me, ok := arg.(*metricExpr); ok {
+		if me.IsEmpty() {
+			return nil, nil
+		}
+		return &funcExpr{
+			Name: fe.Name,
+			Args: []expr{me},
+		}, nrf
+	}
+	if re, ok := arg.(*rollupExpr); ok {
+		if me, ok := re.Expr.(*metricExpr); !ok || me.IsEmpty() {
+			return nil, nil
+		}
+		return fe, nrf
+	}
+	return nil, nil
+}
+
 func evalExprs(ec *EvalConfig, es []expr) ([][]*timeseries, error) {
 	var rvs [][]*timeseries
 	for _, e := range es {
@@ -308,7 +394,7 @@ func getRollupExprArg(arg expr) *rollupExpr {
 	return &reNew
 }

-func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, re *rollupExpr) ([]*timeseries, error) {
+func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, re *rollupExpr, iafc *incrementalAggrFuncContext) ([]*timeseries, error) {
 	ecNew := ec
 	var offset int64
 	if len(re.Offset) > 0 {
@@ -325,19 +411,11 @@ func evalRollupFunc(ec *EvalConfig, name string, rf rollupFunc, re *rollupExpr)
 	var rvs []*timeseries
 	var err error
 	if me, ok := re.Expr.(*metricExpr); ok {
-		if me.IsEmpty() {
-			rvs = evalNumber(ecNew, nan)
-		} else {
-			var window int64
-			if len(re.Window) > 0 {
-				window, err = DurationValue(re.Window, ec.Step)
-				if err != nil {
-					return nil, err
-				}
-			}
-			rvs, err = evalRollupFuncWithMetricExpr(ecNew, name, rf, me, window)
-		}
+		rvs, err = evalRollupFuncWithMetricExpr(ecNew, name, rf, me, iafc, re.Window)
 	} else {
+		if iafc != nil {
+			logger.Panicf("BUG: iafc must be nil for rollup %q over subquery %q", name, re.AppendString(nil))
+		}
 		rvs, err = evalRollupFuncWithSubquery(ecNew, name, rf, re)
 	}
 	if err != nil {
@@ -379,8 +457,7 @@ func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, re *
 	}

 	ecSQ := newEvalConfig(ec)
-	ecSQ.Start -= window + maxSilenceInterval
-	ecSQ.End += step
+	ecSQ.Start -= window + maxSilenceInterval + step
 	ecSQ.Step = step
 	if err := ValidateMaxPointsPerTimeseries(ecSQ.Start, ecSQ.End, ecSQ.Step); err != nil {
 		return nil, err
@@ -395,30 +472,19 @@ func evalRollupFuncWithSubquery(ec *EvalConfig, name string, rf rollupFunc, re *
 	preFunc, rcs := getRollupConfigs(name, rf, ec.Start, ec.End, ec.Step, window, sharedTimestamps)
 	tss := make([]*timeseries, 0, len(tssSQ)*len(rcs))
 	var tssLock sync.Mutex
+	removeMetricGroup := !rollupFuncsKeepMetricGroup[name]
 	doParallel(tssSQ, func(tsSQ *timeseries, values []float64, timestamps []int64) ([]float64, []int64) {
 		values, timestamps = removeNanValues(values[:0], timestamps[:0], tsSQ.Values, tsSQ.Timestamps)
 		preFunc(values, timestamps)
 		for _, rc := range rcs {
 			var ts timeseries
-			ts.MetricName.CopyFrom(&tsSQ.MetricName)
-			if len(rc.TagValue) > 0 {
-				ts.MetricName.AddTag("rollup", rc.TagValue)
-			}
-			ts.Values = rc.Do(ts.Values[:0], values, timestamps)
-			ts.Timestamps = sharedTimestamps
-			ts.denyReuse = true
+			doRollupForTimeseries(rc, &ts, &tsSQ.MetricName, values, timestamps, sharedTimestamps, removeMetricGroup)
 			tssLock.Lock()
 			tss = append(tss, &ts)
 			tssLock.Unlock()
 		}
 		return values, timestamps
 	})
-	if !rollupFuncsKeepMetricGroup[name] {
-		tss = copyTimeseriesMetricNames(tss)
-		for _, ts := range tss {
-			ts.MetricName.ResetMetricGroup()
-		}
-	}
 	return tss, nil
 }

@@ -472,31 +538,27 @@ func removeNanValues(dstValues []float64, dstTimestamps []int64, values []float6
 	return dstValues, dstTimestamps
 }

-func getMaxPointsPerRollup() int {
-	maxPointsPerRollupOnce.Do(func() {
-		n := memory.Allowed() / 16 / 8
-		if n <= 16 {
-			n = 16
-		}
-		maxPointsPerRollup = n
-	})
-	return maxPointsPerRollup
-}
-
-var (
-	maxPointsPerRollup     int
-	maxPointsPerRollupOnce sync.Once
-)
-
 var (
 	rollupResultCacheFullHits    = metrics.NewCounter(`vm_rollup_result_cache_full_hits_total`)
 	rollupResultCachePartialHits = metrics.NewCounter(`vm_rollup_result_cache_partial_hits_total`)
 	rollupResultCacheMiss        = metrics.NewCounter(`vm_rollup_result_cache_miss_total`)
 )

-func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me *metricExpr, window int64) ([]*timeseries, error) {
+func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me *metricExpr, iafc *incrementalAggrFuncContext, windowStr string) ([]*timeseries, error) {
+	if me.IsEmpty() {
+		return evalNumber(ec, nan), nil
+	}
+	var window int64
+	if len(windowStr) > 0 {
+		var err error
+		window, err = DurationValue(windowStr, ec.Step)
+		if err != nil {
+			return nil, err
+		}
+	}
+
 	// Search for partial results in cache.
-	tssCached, start := rollupResultCacheV.Get(name, ec, me, window)
+	tssCached, start := rollupResultCacheV.Get(name, ec, me, iafc, window)
 	if start > ec.End {
 		// The result is fully cached.
 		rollupResultCacheFullHits.Inc()
@@ -510,14 +572,19 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me

 	// Fetch the remaining part of the result.
 	sq := &storage.SearchQuery{
+		AccountID:    ec.AuthToken.AccountID,
+		ProjectID:    ec.AuthToken.ProjectID,
 		MinTimestamp: start - window - maxSilenceInterval,
 		MaxTimestamp: ec.End + ec.Step,
 		TagFilterss:  [][]storage.TagFilter{me.TagFilters},
 	}
-	rss, err := netstorage.ProcessSearchQuery(sq, ec.Deadline)
+	rss, isPartial, err := netstorage.ProcessSearchQuery(ec.AuthToken, sq, true, ec.Deadline)
 	if err != nil {
 		return nil, err
 	}
+	if isPartial && ec.DenyPartialResponse {
+		return nil, fmt.Errorf("cannot return full response, since some of vmstorage nodes are unavailable")
+	}
 	rssLen := rss.Len()
 	if rssLen == 0 {
 		rss.Cancel()
@@ -533,28 +600,80 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
 	// Verify timeseries fit available memory after the rollup.
 	// Take into account points from tssCached.
 	pointsPerTimeseries := 1 + (ec.End-ec.Start)/ec.Step
-	if uint64(pointsPerTimeseries) > uint64(getMaxPointsPerRollup()/rssLen/len(rcs)) {
+	rollupPoints := mulNoOverflow(pointsPerTimeseries, int64(rssLen*len(rcs)))
+	rollupMemorySize := mulNoOverflow(rollupPoints, 16)
+	rml := getRollupMemoryLimiter()
+	if !rml.Get(uint64(rollupMemorySize)) {
 		rss.Cancel()
-		return nil, fmt.Errorf("cannot process more than %d data points for %d time series with %d points in each time series; "+
-			"possible solutions are: reducing the number of matching time series; switching to node with more RAM; increasing `step` query arg (%gs)",
-			getMaxPointsPerRollup(), rssLen*len(rcs), pointsPerTimeseries, float64(ec.Step)/1e3)
+		return nil, fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+
+			"possible solutions are: reducing the number of matching time series; switching to node with more RAM; "+
+			"increasing -memory.allowedPercent; increasing `step` query arg (%gs)",
+			rollupPoints, rssLen*len(rcs), pointsPerTimeseries, float64(ec.Step)/1e3)
 	}
+	defer rml.Put(uint64(rollupMemorySize))

 	// Evaluate rollup
-	tss := make([]*timeseries, 0, rssLen*len(rcs))
+	removeMetricGroup := !rollupFuncsKeepMetricGroup[name]
+	var tss []*timeseries
+	if iafc != nil {
+		tss, err = evalRollupWithIncrementalAggregate(iafc, rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
+	} else {
+		tss, err = evalRollupNoIncrementalAggregate(rss, rcs, preFunc, sharedTimestamps, removeMetricGroup)
+	}
+	if err != nil {
+		return nil, err
+	}
+	tss = mergeTimeseries(tssCached, tss, start, ec)
+	if !isPartial {
+		rollupResultCacheV.Put(name, ec, me, iafc, window, tss)
+	}
+	return tss, nil
+}
+
+var (
+	rollupMemoryLimiter     memoryLimiter
+	rollupMemoryLimiterOnce sync.Once
+)
+
+func getRollupMemoryLimiter() *memoryLimiter {
+	rollupMemoryLimiterOnce.Do(func() {
+		rollupMemoryLimiter.MaxSize = uint64(memory.Allowed()) / 4
+	})
+	return &rollupMemoryLimiter
+}
+
+func evalRollupWithIncrementalAggregate(iafc *incrementalAggrFuncContext, rss *netstorage.Results, rcs []*rollupConfig,
+	preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64, removeMetricGroup bool) ([]*timeseries, error) {
+	err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
+		preFunc(rs.Values, rs.Timestamps)
+		ts := getTimeseries()
+		defer putTimeseries(ts)
+		for _, rc := range rcs {
+			ts.Reset()
+			doRollupForTimeseries(rc, ts, &rs.MetricName, rs.Values, rs.Timestamps, sharedTimestamps, removeMetricGroup)
+			iafc.updateTimeseries(ts, workerID)
+
+			// ts.Timestamps points to sharedTimestamps. Zero it, so it can be re-used.
+			ts.Timestamps = nil
+			ts.denyReuse = false
+		}
+	})
+	if err != nil {
+		return nil, err
+	}
+	tss := iafc.finalizeTimeseries()
+	return tss, nil
+}
+
+func evalRollupNoIncrementalAggregate(rss *netstorage.Results, rcs []*rollupConfig,
+	preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64, removeMetricGroup bool) ([]*timeseries, error) {
+	tss := make([]*timeseries, 0, rss.Len()*len(rcs))
 	var tssLock sync.Mutex
-	err = rss.RunParallel(func(rs *netstorage.Result) {
+	err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) {
 		preFunc(rs.Values, rs.Timestamps)
 		for _, rc := range rcs {
 			var ts timeseries
-			ts.MetricName.CopyFrom(&rs.MetricName)
-			if len(rc.TagValue) > 0 {
-				ts.MetricName.AddTag("rollup", rc.TagValue)
-			}
-			ts.Values = rc.Do(ts.Values[:0], rs.Values, rs.Timestamps)
-			ts.Timestamps = sharedTimestamps
-			ts.denyReuse = true
-
+			doRollupForTimeseries(rc, &ts, &rs.MetricName, rs.Values, rs.Timestamps, sharedTimestamps, removeMetricGroup)
 			tssLock.Lock()
 			tss = append(tss, &ts)
 			tssLock.Unlock()
@@ -563,18 +682,23 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
 	if err != nil {
 		return nil, err
 	}
-	if !rollupFuncsKeepMetricGroup[name] {
-		tss = copyTimeseriesMetricNames(tss)
-		for _, ts := range tss {
-			ts.MetricName.ResetMetricGroup()
-		}
-	}
-	tss = mergeTimeseries(tssCached, tss, start, ec)
-	rollupResultCacheV.Put(name, ec, me, window, tss)
-
 	return tss, nil
 }

+func doRollupForTimeseries(rc *rollupConfig, tsDst *timeseries, mnSrc *storage.MetricName, valuesSrc []float64, timestampsSrc []int64,
+	sharedTimestamps []int64, removeMetricGroup bool) {
+	tsDst.MetricName.CopyFrom(mnSrc)
+	if len(rc.TagValue) > 0 {
+		tsDst.MetricName.AddTag("rollup", rc.TagValue)
+	}
+	if removeMetricGroup {
+		tsDst.MetricName.ResetMetricGroup()
+	}
+	tsDst.Values = rc.Do(tsDst.Values[:0], valuesSrc, timestampsSrc)
+	tsDst.Timestamps = sharedTimestamps
+	tsDst.denyReuse = true
+}
+
 func getRollupConfigs(name string, rf rollupFunc, start, end, step, window int64, sharedTimestamps []int64) (func(values []float64, timestamps []int64), []*rollupConfig) {
 	preFunc := func(values []float64, timestamps []int64) {}
 	if rollupFuncsRemoveCounterResets[name] {
@@ -584,13 +708,14 @@ func getRollupConfigs(name string, rf rollupFunc, start, end, step, window int64
 	}
 	newRollupConfig := func(rf rollupFunc, tagValue string) *rollupConfig {
 		return &rollupConfig{
-			TagValue:   tagValue,
-			Func:       rf,
-			Start:      start,
-			End:        end,
-			Step:       step,
-			Window:     window,
-			Timestamps: sharedTimestamps,
+			TagValue:        tagValue,
+			Func:            rf,
+			Start:           start,
+			End:             end,
+			Step:            step,
+			Window:          window,
+			MayAdjustWindow: rollupFuncsMayAdjustWindow[name],
+			Timestamps:      sharedTimestamps,
 		}
 	}
 	appendRollupConfigs := func(dst []*rollupConfig) []*rollupConfig {
@@ -617,6 +742,11 @@ func getRollupConfigs(name string, rf rollupFunc, start, end, step, window int64
 			deltaValues(values)
 		}
 		rcs = appendRollupConfigs(rcs)
+	case "rollup_candlestick":
+		rcs = append(rcs, newRollupConfig(rollupFirst, "open"))
+		rcs = append(rcs, newRollupConfig(rollupLast, "close"))
+		rcs = append(rcs, newRollupConfig(rollupMin, "low"))
+		rcs = append(rcs, newRollupConfig(rollupMax, "high"))
 	default:
 		rcs = append(rcs, newRollupConfig(rf, ""))
 	}
@@ -628,6 +758,8 @@ var bbPool bytesutil.ByteBufferPool
 func evalNumber(ec *EvalConfig, n float64) []*timeseries {
 	var ts timeseries
 	ts.denyReuse = true
+	ts.MetricName.AccountID = ec.AuthToken.AccountID
+	ts.MetricName.ProjectID = ec.AuthToken.ProjectID
 	timestamps := ec.getSharedTimestamps()
 	values := make([]float64, len(timestamps))
 	for i := range timestamps {
@@ -653,3 +785,11 @@ func evalTime(ec *EvalConfig) []*timeseries {
 	}
 	return rv
 }
+
+func mulNoOverflow(a, b int64) int64 {
+	if math.MaxInt64/b < a {
+		// Overflow
+		return math.MaxInt64
+	}
+	return a * b
+}
--- a/app/vmselect/promql/exec.go
+++ b/app/vmselect/promql/exec.go
@@ -1,16 +1,23 @@
 package promql

 import (
+	"flag"
 	"fmt"
 	"math"
 	"sort"
 	"sync"
 	"sync/atomic"
+	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/metrics"
 )

+var logSlowQueryDuration = flag.Duration("search.logSlowQueryDuration", 5*time.Second, "Log queries with execution time exceeding this value. Zero disables slow query logging")
+
+var slowQueries = metrics.NewCounter(`vm_slow_queries_total`)
+
 // ExpandWithExprs expands WITH expressions inside q and returns the resulting
 // PromQL without WITH expressions.
 func ExpandWithExprs(q string) (string, error) {
@@ -22,8 +29,20 @@ func ExpandWithExprs(q string) (string, error) {
 	return string(buf), nil
 }

-// Exec executes q for the given ec until the deadline.
-func Exec(ec *EvalConfig, q string) ([]netstorage.Result, error) {
+// Exec executes q for the given ec.
+func Exec(ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result, error) {
+	if *logSlowQueryDuration > 0 {
+		startTime := time.Now()
+		defer func() {
+			d := time.Since(startTime)
+			if d >= *logSlowQueryDuration {
+				logger.Infof("slow query according to -search.logSlowQueryDuration=%s: duration=%s, start=%d, end=%d, step=%d, accountID=%d, projectID=%d, query=%q",
+					*logSlowQueryDuration, d, ec.Start/1000, ec.End/1000, ec.Step/1000, ec.AuthToken.AccountID, ec.AuthToken.ProjectID, q)
+				slowQueries.Inc()
+			}
+		}()
+	}
+
 	ec.validate()

 	e, err := parsePromQLWithCache(q)
@@ -50,6 +69,14 @@ func Exec(ec *EvalConfig, q string) ([]netstorage.Result, error) {
 	}
 	ec.End -= ec.Step

+	if isFirstPointOnly {
+		// Remove all the points except the first one from every time series.
+		for _, ts := range rv {
+			ts.Values = ts.Values[:1]
+			ts.Timestamps = ts.Timestamps[:1]
+		}
+	}
+
 	maySort := maySortResults(e, rv)
 	result, err := timeseriesToResult(rv, maySort)
 	if err != nil {
@@ -78,14 +105,14 @@ func maySortResults(e expr, tss []*timeseries) bool {
 func timeseriesToResult(tss []*timeseries, maySort bool) ([]netstorage.Result, error) {
 	tss = removeNaNs(tss)
 	result := make([]netstorage.Result, len(tss))
-	m := make(map[string]bool)
+	m := make(map[string]struct{}, len(tss))
 	bb := bbPool.Get()
 	for i, ts := range tss {
 		bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
-		if m[string(bb.B)] {
+		if _, ok := m[string(bb.B)]; ok {
 			return nil, fmt.Errorf(`duplicate output timeseries: %s%s`, ts.MetricName.MetricGroup, stringMetricName(&ts.MetricName))
 		}
-		m[string(bb.B)] = true
+		m[string(bb.B)] = struct{}{}

 		rs := &result[i]
 		rs.MetricNameMarshaled = append(rs.MetricNameMarshaled[:0], bb.B...)
@@ -107,18 +134,23 @@ func timeseriesToResult(tss []*timeseries, maySort bool) ([]netstorage.Result, e
 func removeNaNs(tss []*timeseries) []*timeseries {
 	rvs := tss[:0]
 	for _, ts := range tss {
-		nans := 0
+		allNans := true
 		for _, v := range ts.Values {
-			if math.IsNaN(v) {
-				nans++
+			if !math.IsNaN(v) {
+				allNans = false
+				break
 			}
 		}
-		if nans == len(ts.Values) {
+		if allNans {
 			// Skip timeseries with all NaNs.
 			continue
 		}
 		rvs = append(rvs, ts)
 	}
+	for i := len(rvs); i < len(tss); i++ {
+		// Zero unused time series, so GC could reclaim them.
+		tss[i] = nil
+	}
 	return rvs
 }

--- a/app/vmselect/promql/exec_test.go
+++ b/app/vmselect/promql/exec_test.go
@@ -5,6 +5,7 @@ import (
 	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 )

@@ -47,22 +48,31 @@ func TestExpandWithExprsError(t *testing.T) {
 }

 func TestExecSuccess(t *testing.T) {
+	accountID := uint32(123)
+	projectID := uint32(567)
 	start := int64(1000e3)
 	end := int64(2000e3)
 	step := int64(200e3)
 	timestampsExpected := []int64{1000e3, 1200e3, 1400e3, 1600e3, 1800e3, 2000e3}
-	metricNameExpected := storage.MetricName{}
+	metricNameExpected := storage.MetricName{
+		AccountID: accountID,
+		ProjectID: projectID,
+	}

 	f := func(q string, resultExpected []netstorage.Result) {
 		t.Helper()
 		ec := &EvalConfig{
+			AuthToken: &auth.Token{
+				AccountID: accountID,
+				ProjectID: projectID,
+			},
 			Start:    start,
 			End:      end,
 			Step:     step,
 			Deadline: netstorage.NewDeadline(time.Minute),
 		}
 		for i := 0; i < 5; i++ {
-			result, err := Exec(ec, q)
+			result, err := Exec(ec, q, false)
 			if err != nil {
 				t.Fatalf(`unexpected error when executing %q: %s`, q, err)
 			}
@@ -286,7 +296,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `time()[300s:100s] offset 100s`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{800, 1000, 1200, 1400, 1600, 1800},
+			Values:     []float64{900, 1100, 1300, 1500, 1700, 1900},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -297,7 +307,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `time()[1.5i:0.5i] offset 0.5i`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{800, 1000, 1200, 1400, 1600, 1800},
+			Values:     []float64{900, 1100, 1300, 1500, 1700, 1900},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -308,7 +318,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `time()[300s] offset 100s`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{700, 900, 1100, 1300, 1500, 1700},
+			Values:     []float64{900, 1100, 1300, 1500, 1700, 1900},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -319,7 +329,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `time()[300s]`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{800, 1000, 1200, 1400, 1600, 1800},
+			Values:     []float64{1000, 1200, 1400, 1600, 1800, 2000},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -574,6 +584,30 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`clamp_max(alias(time(),"foobar"), 1400)`, func(t *testing.T) {
+		t.Parallel()
+		q := `clamp_max(alias(time(), "foobar"), 1400)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1000, 1200, 1400, 1400, 1400, 1400},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.MetricGroup = []byte("foobar")
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`CLAmp_MAx(alias(time(),"foobar"), 1400)`, func(t *testing.T) {
+		t.Parallel()
+		q := `CLAmp_MAx(alias(time(), "foobar"), 1400)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1000, 1200, 1400, 1400, 1400, 1400},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.MetricGroup = []byte("foobar")
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run("clamp_min(time(), -time()+3000)", func(t *testing.T) {
 		t.Parallel()
 		q := `clamp_min(time(), -time()+2500)`
@@ -789,6 +823,18 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{}
 		f(q, resultExpected)
 	})
+	t.Run(`alias()`, func(t *testing.T) {
+		t.Parallel()
+		q := `alias(time(), "foobar")`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1000, 1200, 1400, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.MetricGroup = []byte("foobar")
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`label_set(tag)`, func(t *testing.T) {
 		t.Parallel()
 		q := `label_set(time(), "tagname", "tagvalue")`
@@ -1266,6 +1312,72 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`label_value()`, func(t *testing.T) {
+		t.Parallel()
+		q := `with (
+			x = (
+				label_set(time(), "foo", "123.456", "__name__", "aaa"),
+				label_set(-time(), "foo", "bar", "__name__", "bbb"),
+				label_set(-time(), "__name__", "bxs"),
+				label_set(-time(), "foo", "45", "bar", "xs"),
+			)
+		)
+		sort(x + label_value(x, "foo"))`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{-955, -1155, -1355, -1555, -1755, -1955},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("bar"),
+				Value: []byte("xs"),
+			},
+			{
+				Key:   []byte("foo"),
+				Value: []byte("45"),
+			},
+		}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1123.456, 1323.456, 1523.456, 1723.456, 1923.456, 2123.456},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("123.456"),
+		}}
+		resultExpected := []netstorage.Result{r1, r2}
+		f(q, resultExpected)
+	})
+	t.Run(`label_transform(mismatch)`, func(t *testing.T) {
+		t.Parallel()
+		q := `label_transform(time(), "__name__", "foobar", "xx")`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1000, 1200, 1400, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`label_transform(match)`, func(t *testing.T) {
+		t.Parallel()
+		q := `label_transform(
+			label_set(time(), "foo", "a.bar.baz"),
+			"foo", "\\.", "-")`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1000, 1200, 1400, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("a-bar-baz"),
+		}}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`label_replace(mismatch)`, func(t *testing.T) {
 		t.Parallel()
 		q := `label_replace(time(), "__name__", "x${1}y", "foo", ".+")`
@@ -1410,6 +1522,62 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r1, r2}
 		f(q, resultExpected)
 	})
+	t.Run(`a cmp scalar (leave MetricGroup)`, func(t *testing.T) {
+		t.Parallel()
+		q := `sort_desc((
+			label_set(time(), "__name__", "foo", "a", "x"),
+			label_set(time()+200, "__name__", "bar", "a", "x"),
+		) > 1300)`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, 1400, 1600, 1800, 2000, 2200},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.MetricGroup = []byte("bar")
+		r1.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("a"),
+			Value: []byte("x"),
+		}}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, nan, 1400, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.MetricGroup = []byte("foo")
+		r2.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("a"),
+			Value: []byte("x"),
+		}}
+		resultExpected := []netstorage.Result{r1, r2}
+		f(q, resultExpected)
+	})
+	t.Run(`a cmp bool scalar (drop MetricGroup)`, func(t *testing.T) {
+		t.Parallel()
+		q := `sort_desc((
+			label_set(time(), "__name__", "foo", "a", "x"),
+			label_set(time()+200, "__name__", "bar", "a", "y"),
+		) >= bool 1200)`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1, 1, 1, 1, 1, 1},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("a"),
+			Value: []byte("y"),
+		}}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{0, 1, 1, 1, 1, 1},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("a"),
+			Value: []byte("x"),
+		}}
+		resultExpected := []netstorage.Result{r1, r2}
+		f(q, resultExpected)
+	})
 	t.Run(`1 > 2`, func(t *testing.T) {
 		t.Parallel()
 		q := `1 > 2`
@@ -1512,13 +1680,14 @@ func TestExecSuccess(t *testing.T) {
 	t.Run(`vector default scalar`, func(t *testing.T) {
 		t.Parallel()
 		q := `sort_desc(union(
-			label_set(time() > 1400, "foo", "bar"),
-			label_set(time() < 1700, "foo", "baz")) default 123)`
+			label_set(time() > 1400, "__name__", "x", "foo", "bar"),
+			label_set(time() < 1700, "__name__", "y", "foo", "baz")) default 123)`
 		r1 := netstorage.Result{
 			MetricName: metricNameExpected,
 			Values:     []float64{123, 123, 123, 1600, 1800, 2000},
 			Timestamps: timestampsExpected,
 		}
+		r1.MetricName.MetricGroup = []byte("x")
 		r1.MetricName.Tags = []storage.Tag{{
 			Key:   []byte("foo"),
 			Value: []byte("bar"),
@@ -1528,6 +1697,7 @@ func TestExecSuccess(t *testing.T) {
 			Values:     []float64{1000, 1200, 1400, 1600, 123, 123},
 			Timestamps: timestampsExpected,
 		}
+		r2.MetricName.MetricGroup = []byte("y")
 		r2.MetricName.Tags = []storage.Tag{{
 			Key:   []byte("foo"),
 			Value: []byte("baz"),
@@ -1699,6 +1869,96 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`vector * on(foo) group_left() duplicate_nonoverlapping_timeseries`, func(t *testing.T) {
+		t.Parallel()
+		q := `label_set(time()/10, "foo", "bar", "xx", "yy", "__name__", "qwert") + on(foo) group_left() (
+			label_set(time() < 1400, "foo", "bar", "op", "le"),
+			label_set(time() >= 1400, "foo", "bar", "op", "ge"),
+		)`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1100, 1320, 1540, 1760, 1980, 2200},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("xx"),
+				Value: []byte("yy"),
+			},
+		}
+		resultExpected := []netstorage.Result{r1}
+		f(q, resultExpected)
+	})
+	t.Run(`vector * on(foo) group_left(__name__)`, func(t *testing.T) {
+		t.Parallel()
+		q := `label_set(time()/10, "foo", "bar", "xx", "yy", "__name__", "qwert") + on(foo) group_left(__name__)
+			label_set(time(), "foo", "bar", "__name__", "aaa")`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1100, 1320, 1540, 1760, 1980, 2200},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.MetricGroup = []byte("aaa")
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("xx"),
+				Value: []byte("yy"),
+			},
+		}
+		resultExpected := []netstorage.Result{r1}
+		f(q, resultExpected)
+	})
+	t.Run(`vector * on(foo) group_right()`, func(t *testing.T) {
+		t.Parallel()
+		q := `sort(label_set(time()/10, "foo", "bar", "xx", "yy", "__name__", "qwert") + on(foo) group_right(xx) (
+			label_set(time(), "foo", "bar", "__name__", "aaa"),
+			label_set(time()+3, "foo", "bar", "__name__", "yyy","ppp", "123"),
+		))`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1100, 1320, 1540, 1760, 1980, 2200},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("xx"),
+				Value: []byte("yy"),
+			},
+		}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1103, 1323, 1543, 1763, 1983, 2203},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("ppp"),
+				Value: []byte("123"),
+			},
+			{
+				Key:   []byte("xx"),
+				Value: []byte("yy"),
+			},
+		}
+		resultExpected := []netstorage.Result{r1, r2}
+		f(q, resultExpected)
+	})
 	t.Run(`vector * on() group_left scalar`, func(t *testing.T) {
 		t.Parallel()
 		q := `sort_desc((label_set(time(), "foo", "bar") or label_set(10, "foo", "qwert")) * on() group_left 2)`
@@ -2018,6 +2278,83 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r1, r2}
 		f(q, resultExpected)
 	})
+	t.Run(`histogram_quantile(negative-bucket-count)`, func(t *testing.T) {
+		t.Parallel()
+		q := `histogram_quantile(0.6,
+			label_set(90, "foo", "bar", "le", "10")
+			or label_set(-100, "foo", "bar", "le", "30")
+			or label_set(300, "foo", "bar", "le", "+Inf")
+		)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{30, 30, 30, 30, 30, 30},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("bar"),
+		}}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`histogram_quantile(nan-bucket-count)`, func(t *testing.T) {
+		t.Parallel()
+		q := `histogram_quantile(0.6,
+			label_set(90, "foo", "bar", "le", "10")
+			or label_set(NaN, "foo", "bar", "le", "30")
+			or label_set(300, "foo", "bar", "le", "+Inf")
+		)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{30, 30, 30, 30, 30, 30},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("bar"),
+		}}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`histogram_quantile(nan-bucket-count)`, func(t *testing.T) {
+		t.Parallel()
+		q := `histogram_quantile(0.2,
+			label_set(0, "foo", "bar", "le", "10")
+			or label_set(100, "foo", "bar", "le", "30")
+			or label_set(300, "foo", "bar", "le", "+Inf")
+		)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{22, 22, 22, 22, 22, 22},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("bar"),
+		}}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`histogram_quantile(zero-bucket-count)`, func(t *testing.T) {
+		t.Parallel()
+		q := `histogram_quantile(0.6,
+			label_set(0, "foo", "bar", "le", "10")
+			or label_set(0, "foo", "bar", "le", "30")
+			or label_set(0, "foo", "bar", "le", "+Inf")
+		)`
+		resultExpected := []netstorage.Result{}
+		f(q, resultExpected)
+	})
+	t.Run(`histogram_quantile(nan-bucket-count)`, func(t *testing.T) {
+		t.Parallel()
+		q := `histogram_quantile(0.6,
+			label_set(nan, "foo", "bar", "le", "10")
+			or label_set(nan, "foo", "bar", "le", "30")
+			or label_set(nan, "foo", "bar", "le", "+Inf")
+		)`
+		resultExpected := []netstorage.Result{}
+		f(q, resultExpected)
+	})
 	t.Run(`median_over_time()`, func(t *testing.T) {
 		t.Parallel()
 		q := `median_over_time({})`
@@ -2068,6 +2405,17 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`avg(scalar) wiTHout (xx, yy)`, func(t *testing.T) {
+		t.Parallel()
+		q := `avg wiTHout (xx, yy) (123)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{123, 123, 123, 123, 123, 123},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`sum(time)`, func(t *testing.T) {
 		t.Parallel()
 		q := `sum(time()/100)`
@@ -2079,6 +2427,51 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`geomean(time)`, func(t *testing.T) {
+		t.Parallel()
+		q := `geomean(time()/100)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{10, 12, 14, 16, 18, 20},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`geomean_over_time(time)`, func(t *testing.T) {
+		t.Parallel()
+		q := `round(geomean_over_time(alias(time()/100, "foobar")[3i]), 0.1)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{6.8, 8.8, 10.9, 12.9, 14.9, 16.9},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.MetricGroup = []byte("foobar")
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`sum2(time)`, func(t *testing.T) {
+		t.Parallel()
+		q := `sum2(time()/100)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{100, 144, 196, 256, 324, 400},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`sum2_over_time(time)`, func(t *testing.T) {
+		t.Parallel()
+		q := `sum2_over_time(alias(time()/100, "foobar")[3i])`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{155, 251, 371, 515, 683, 875},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`sum(multi-vector)`, func(t *testing.T) {
 		t.Parallel()
 		q := `sum(label_set(10, "foo", "bar") or label_set(time()/100, "baz", "sss"))`
@@ -2090,6 +2483,39 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`geomean(multi-vector)`, func(t *testing.T) {
+		t.Parallel()
+		q := `round(geomean(label_set(10, "foo", "bar") or label_set(time()/100, "baz", "sss")), 0.1)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{10, 11, 11.8, 12.6, 13.4, 14.1},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`sum2(multi-vector)`, func(t *testing.T) {
+		t.Parallel()
+		q := `sum2(label_set(10, "foo", "bar") or label_set(time()/100, "baz", "sss"))`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{200, 244, 296, 356, 424, 500},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`sqrt(sum2(multi-vector))`, func(t *testing.T) {
+		t.Parallel()
+		q := `round(sqrt(sum2(label_set(10, "foo", "bar") or label_set(time()/100, "baz", "sss"))))`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{14, 16, 17, 19, 21, 22},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`avg(multi-vector)`, func(t *testing.T) {
 		t.Parallel()
 		q := `avg(label_set(10, "foo", "bar") or label_set(time()/100, "baz", "sss"))`
@@ -2114,10 +2540,10 @@ func TestExecSuccess(t *testing.T) {
 	})
 	t.Run(`count(multi-vector)`, func(t *testing.T) {
 		t.Parallel()
-		q := `count(label_set(10, "foo", "bar") or label_set((15-time()/100)^0.5, "baz", "sss"))`
+		q := `count(label_set(time()<1500, "foo", "bar") or label_set(time()<1800, "baz", "sss"))`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{2, 2, 2, 1, 1, 1},
+			Values:     []float64{2, 2, 2, 1, nan, nan},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -2279,6 +2705,21 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r1, r2}
 		f(q, resultExpected)
 	})
+	t.Run(`topk(1, nan_timeseries)`, func(t *testing.T) {
+		t.Parallel()
+		q := `topk(1, label_set(NaN, "foo", "bar") or label_set(time()/150, "baz", "sss")) default 0`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{6.666666666666667, 8, 9.333333333333334, 10.666666666666666, 12, 13.333333333333334},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("baz"),
+			Value: []byte("sss"),
+		}}
+		resultExpected := []netstorage.Result{r1}
+		f(q, resultExpected)
+	})
 	t.Run(`topk(2)`, func(t *testing.T) {
 		t.Parallel()
 		q := `sort(topk(2, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
@@ -2378,7 +2819,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `distinct_over_time((time() < 1700)[500s])`
 		r1 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{3, 3, 3, 2, 1, nan},
+			Values:     []float64{3, 3, 3, 3, 2, 1},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r1}
@@ -2389,7 +2830,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `distinct_over_time((time() < 1700)[2.5i])`
 		r1 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{3, 3, 3, 2, 1, nan},
+			Values:     []float64{3, 3, 3, 3, 2, 1},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r1}
@@ -2645,7 +3086,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `integrate(time()*1e-3)`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{200, 240.00000000000003, 280, 320, 360, 400},
+			Values:     []float64{160, 200, 240.00000000000003, 280, 320, 360},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -2667,7 +3108,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `rate(2000-time())`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{4.5, 3.5, 2.5, 1.5, 0.5, -0.5},
+			Values:     []float64{5.5, 4.5, 3.5, 2.5, 1.5, 0.5},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -2678,7 +3119,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `rate((2000-time())[100s])`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{4.5, 3.5, 2.5, 1.5, 0.5, -0.5},
+			Values:     []float64{5.5, 4.5, 3.5, 2.5, 1.5, 0.5},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -2689,7 +3130,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `rate((2000-time())[100s:])`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{4.5, 3.5, 2.5, 1.5, 0.5, -0.5},
+			Values:     []float64{5.5, 4.5, 3.5, 2.5, 1.5, 0.5},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -2700,7 +3141,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `rate((2000-time())[100s:100s])`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{4, 6.5, 4.5, 2.5, 0.5, -1.5},
+			Values:     []float64{5.5, 4.5, 6.5, 4.5, 2.5, 0.5},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -2711,7 +3152,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `rate((2000-time())[100s:100s] offset 100s)`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{4.5, 3.5, 5.5, 3.5, 1.5, -0.5},
+			Values:     []float64{6, 5, 7.5, 5.5, 3.5, 1.5},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -2722,7 +3163,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `rate((2000-time())[100s:100s] offset 100s)[:] offset 100s`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{6, 5, 7.5, 5.5, 3.5, 1.5},
+			Values:     []float64{7, 6, 5, 7.5, 5.5, 3.5},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -2744,7 +3185,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `increase(2000-time())`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{900, 700, 500, 300, 100, -100},
+			Values:     []float64{1100, 900, 700, 500, 300, 100},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -3009,6 +3450,48 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`rollup_candlestick()`, func(t *testing.T) {
+		t.Parallel()
+		q := `sort(rollup_candlestick(round(rand(0),0.01)[:10s]))`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{0.02, 0.02, 0.03, 0, 0.03, 0.02},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("rollup"),
+			Value: []byte("low"),
+		}}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{0.32, 0.82, 0.13, 0.28, 0.86, 0.57},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("rollup"),
+			Value: []byte("close"),
+		}}
+		r3 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{0.9, 0.32, 0.82, 0.13, 0.28, 0.86},
+			Timestamps: timestampsExpected,
+		}
+		r3.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("rollup"),
+			Value: []byte("open"),
+		}}
+		r4 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{0.85, 0.94, 0.97, 0.93, 0.98, 0.92},
+			Timestamps: timestampsExpected,
+		}
+		r4.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("rollup"),
+			Value: []byte("high"),
+		}}
+		resultExpected := []netstorage.Result{r1, r2, r3, r4}
+		f(q, resultExpected)
+	})
 	t.Run(`rollup_increase()`, func(t *testing.T) {
 		t.Parallel()
 		q := `sort(rollup_increase(time()))`
@@ -3047,7 +3530,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `sort(rollup(time()[:50s]))`
 		r1 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{1050, 1250, 1450, 1650, 1850, 2050},
+			Values:     []float64{850, 1050, 1250, 1450, 1650, 1850},
 			Timestamps: timestampsExpected,
 		}
 		r1.MetricName.Tags = []storage.Tag{{
@@ -3056,21 +3539,21 @@ func TestExecSuccess(t *testing.T) {
 		}}
 		r2 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{1200, 1400, 1600, 1800, 2000, 2200},
+			Values:     []float64{925, 1125, 1325, 1525, 1725, 1925},
 			Timestamps: timestampsExpected,
 		}
 		r2.MetricName.Tags = []storage.Tag{{
 			Key:   []byte("rollup"),
-			Value: []byte("max"),
+			Value: []byte("avg"),
 		}}
 		r3 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{1125, 1325, 1525, 1725, 1925, 2125},
+			Values:     []float64{1000, 1200, 1400, 1600, 1800, 2000},
 			Timestamps: timestampsExpected,
 		}
 		r3.MetricName.Tags = []storage.Tag{{
 			Key:   []byte("rollup"),
-			Value: []byte("avg"),
+			Value: []byte("max"),
 		}}
 		resultExpected := []netstorage.Result{r1, r2, r3}
 		f(q, resultExpected)
@@ -3417,19 +3900,131 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r1, r2, r3, r4, r5, r6}
 		f(q, resultExpected)
 	})
+	t.Run(`count_values by (xxx)`, func(t *testing.T) {
+		t.Parallel()
+		q := `count_values("xxx", label_set(10, "foo", "bar", "xxx", "aaa") or label_set(floor(time()/600), "foo", "bar", "baz", "xx")) by (xxx)`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1, nan, nan, nan, nan, nan},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("1"),
+			},
+		}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, 1, 1, 1, nan, nan},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("2"),
+			},
+		}
+		r3 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, nan, nan, nan, 1, 1},
+			Timestamps: timestampsExpected,
+		}
+		r3.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("3"),
+			},
+		}
+		r4 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1, 1, 1, 1, 1, 1},
+			Timestamps: timestampsExpected,
+		}
+		r4.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("10"),
+			},
+		}
+		resultExpected := []netstorage.Result{r1, r2, r3, r4}
+		f(q, resultExpected)
+	})
+	t.Run(`count_values without (baz)`, func(t *testing.T) {
+		t.Parallel()
+		q := `count_values("xxx", label_set(floor(time()/600), "foo", "bar")) without (baz)`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1, nan, nan, nan, nan, nan},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("1"),
+			},
+		}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, 1, 1, 1, nan, nan},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("2"),
+			},
+		}
+		r3 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, nan, nan, nan, 1, 1},
+			Timestamps: timestampsExpected,
+		}
+		r3.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("3"),
+			},
+		}
+		resultExpected := []netstorage.Result{r1, r2, r3}
+		f(q, resultExpected)
+	})
 }

 func TestExecError(t *testing.T) {
 	f := func(q string) {
 		t.Helper()
 		ec := &EvalConfig{
+			AuthToken: &auth.Token{
+				AccountID: 123,
+				ProjectID: 567,
+			},
 			Start:    1000,
 			End:      2000,
 			Step:     100,
 			Deadline: netstorage.NewDeadline(time.Minute),
 		}
 		for i := 0; i < 4; i++ {
-			rv, err := Exec(ec, q)
+			rv, err := Exec(ec, q, false)
+			if err == nil {
+				t.Fatalf(`expecting non-nil error on %q`, q)
+			}
+			if rv != nil {
+				t.Fatalf(`expecting nil rv`)
+			}
+			rv, err = Exec(ec, q, true)
 			if err == nil {
 				t.Fatalf(`expecting non-nil error on %q`, q)
 			}
@@ -3461,6 +4056,7 @@ func TestExecError(t *testing.T) {
 	f(`hour(1,2)`)
 	f(`label_join()`)
 	f(`label_replace(1)`)
+	f(`label_transform(1)`)
 	f(`label_set()`)
 	f(`label_set(1, "foo")`)
 	f(`label_del()`)
@@ -3506,6 +4102,9 @@ func TestExecError(t *testing.T) {
 	f(`keep_last_value()`)
 	f(`distinct_over_time()`)
 	f(`distinct()`)
+	f(`alias()`)
+	f(`alias(1)`)
+	f(`alias(1, "foo", "bar")`)

 	// Invalid argument type
 	f(`median_over_time({}, 2)`)
@@ -3535,6 +4134,11 @@ func TestExecError(t *testing.T) {
 	f(`label_replace(1, "foo", "bar", 4, 5)`)
 	f(`label_replace(1, "foo", "bar", "baz", 5)`)
 	f(`label_replace(1, "foo", "bar", "baz", "invalid(regexp")`)
+	f(`label_transform(1, 2, 3, 4)`)
+	f(`label_transform(1, "foo", 3, 4)`)
+	f(`label_transform(1, "foo", "bar", 4)`)
+	f(`label_transform(1, "foo", "invalid(regexp", "baz`)
+	f(`alias(1, 2)`)

 	// Duplicate timeseries
 	f(`(label_set(1, "foo", "bar") or label_set(2, "foo", "baz"))
@@ -3545,12 +4149,31 @@ func TestExecError(t *testing.T) {
 	f(`1 + group_left() (label_set(1, "foo", bar"), label_set(2, "foo", "baz"))`)
 	f(`1 + on() group_left() (label_set(1, "foo", bar"), label_set(2, "foo", "baz"))`)
 	f(`1 + on(a) group_left(b) (label_set(1, "foo", bar"), label_set(2, "foo", "baz"))`)
+	f(`label_set(1, "foo", "bar") + on(foo) group_left() (label_set(1, "foo", "bar", "a", "b"), label_set(1, "foo", "bar", "a", "c"))`)
 	f(`(label_set(1, "foo", bar"), label_set(2, "foo", "baz")) + group_right 1`)
 	f(`(label_set(1, "foo", bar"), label_set(2, "foo", "baz")) + on() group_right 1`)
 	f(`(label_set(1, "foo", bar"), label_set(2, "foo", "baz")) + on(a) group_right(b,c) 1`)
 	f(`(label_set(1, "foo", bar"), label_set(2, "foo", "baz")) + on() 1`)
+	f(`(label_set(1, "foo", "bar", "a", "b"), label_set(1, "foo", "bar", "a", "c")) + on(foo) group_right() label_set(1, "foo", "bar")`)
 	f(`1 + on() (label_set(1, "foo", bar"), label_set(2, "foo", "baz"))`)

+	// duplicate metrics after binary op
+	f(`(
+		label_set(time(), "__name__", "foo", "a", "x"),
+		label_set(time()+200, "__name__", "bar", "a", "x"),
+	) > bool 1300`)
+	f(`(
+		label_set(time(), "__name__", "foo", "a", "x"),
+		label_set(time()+200, "__name__", "bar", "a", "x"),
+	) + 10`)
+
+	// Invalid aggregates
+	f(`sum(1, 2)`)
+	f(`sum(1) foo (bar)`)
+	f(`sum foo () (bar)`)
+	f(`sum(foo) by (1)`)
+	f(`count(foo) without ("bar")`)
+
 	// With expressions
 	f(`ttf()`)
 	f(`ttf(1, 2)`)
@@ -3574,6 +4197,12 @@ func testResultsEqual(t *testing.T, result, resultExpected []netstorage.Result)

 func testMetricNamesEqual(t *testing.T, mn, mnExpected *storage.MetricName) {
 	t.Helper()
+	if mn.AccountID != mnExpected.AccountID {
+		t.Fatalf(`unexpected accountID; got %d; want %d`, mn.AccountID, mnExpected.AccountID)
+	}
+	if mn.ProjectID != mnExpected.ProjectID {
+		t.Fatalf(`unexpected projectID; got %d; want %d`, mn.ProjectID, mnExpected.ProjectID)
+	}
 	if string(mn.MetricGroup) != string(mnExpected.MetricGroup) {
 		t.Fatalf(`unexpected MetricGroup; got %q; want %q`, mn.MetricGroup, mnExpected.MetricGroup)
 	}
--- a/app/vmselect/promql/lexer.go
+++ b/app/vmselect/promql/lexer.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"strconv"
 	"strings"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 )

 type lexer struct {
@@ -85,10 +87,7 @@ again:
 		goto tokenFoundLabel
 	}
 	if isIdentPrefix(s) {
-		token, err = scanIdent(s)
-		if err != nil {
-			return "", err
-		}
+		token = scanIdent(s)
 		goto tokenFoundLabel
 	}
 	if isStringPrefix(s) {
@@ -150,12 +149,6 @@ func scanString(s string) (string, error) {
 }

 func scanPositiveNumber(s string) (string, error) {
-	if strings.HasPrefix(s, "Inf") {
-		return "Inf", nil
-	}
-	if strings.HasPrefix(s, "NaN") {
-		return "NaN", nil
-	}
 	// Scan integer part. It may be empty if fractional part exists.
 	i := 0
 	for i < len(s) && isDecimalChar(s[i]) {
@@ -210,15 +203,103 @@ func scanPositiveNumber(s string) (string, error) {
 	return s[:j], nil
 }

-func scanIdent(s string) (string, error) {
-	if len(s) == 0 {
-		return "", fmt.Errorf("ident cannot be empty")
-	}
+func scanIdent(s string) string {
 	i := 0
-	for i < len(s) && isIdentChar(s[i]) {
-		i++
+	for i < len(s) {
+		if isIdentChar(s[i]) {
+			i++
+			continue
+		}
+		if s[i] != '\\' {
+			break
+		}
+
+		// Do not verify the next char, since it is escaped.
+		i += 2
+		if i > len(s) {
+			i--
+			break
+		}
 	}
-	return s[:i], nil
+	if i == 0 {
+		logger.Panicf("BUG: scanIdent couldn't find a single ident char; make sure isIdentPrefix called before scanIdent")
+	}
+	return s[:i]
+}
+
+func unescapeIdent(s string) string {
+	n := strings.IndexByte(s, '\\')
+	if n < 0 {
+		return s
+	}
+	dst := make([]byte, 0, len(s))
+	for {
+		dst = append(dst, s[:n]...)
+		s = s[n+1:]
+		if len(s) == 0 {
+			return string(dst)
+		}
+		if s[0] == 'x' && len(s) >= 3 {
+			h1 := fromHex(s[1])
+			h2 := fromHex(s[2])
+			if h1 >= 0 && h2 >= 0 {
+				dst = append(dst, byte((h1<<4)|h2))
+				s = s[3:]
+			} else {
+				dst = append(dst, s[0])
+				s = s[1:]
+			}
+		} else {
+			dst = append(dst, s[0])
+			s = s[1:]
+		}
+		n = strings.IndexByte(s, '\\')
+		if n < 0 {
+			dst = append(dst, s...)
+			return string(dst)
+		}
+	}
+}
+
+func fromHex(ch byte) int {
+	if ch >= '0' && ch <= '9' {
+		return int(ch - '0')
+	}
+	if ch >= 'a' && ch <= 'f' {
+		return int((ch - 'a') + 10)
+	}
+	if ch >= 'A' && ch <= 'F' {
+		return int((ch - 'A') + 10)
+	}
+	return -1
+}
+
+func toHex(n byte) byte {
+	if n < 10 {
+		return '0' + n
+	}
+	return 'a' + (n - 10)
+}
+
+func appendEscapedIdent(dst, s []byte) []byte {
+	for i := 0; i < len(s); i++ {
+		ch := s[i]
+		if isIdentChar(ch) {
+			if i == 0 && !isFirstIdentChar(ch) {
+				// hex-encode the first char
+				dst = append(dst, '\\', 'x', toHex(ch>>4), toHex(ch&0xf))
+			} else {
+				dst = append(dst, ch)
+			}
+		} else if ch >= 0x20 && ch < 0x7f {
+			// Leave ASCII printable chars as is
+			dst = append(dst, '\\', ch)
+		} else {
+			// hex-encode non-printable chars
+			dst = append(dst, '\\', 'x', toHex(ch>>4), toHex(ch&0xf))
+		}
+	}
+	return dst
 }

 func (lex *lexer) Prev() {
@@ -246,6 +327,14 @@ func scanTagFilterOpPrefix(s string) int {
 	return -1
 }

+func isInfOrNaN(s string) bool {
+	if len(s) != 3 {
+		return false
+	}
+	s = strings.ToLower(s)
+	return s == "inf" || s == "nan"
+}
+
 func isOffset(s string) bool {
 	s = strings.ToLower(s)
 	return s == "offset"
@@ -274,7 +363,7 @@ func isPositiveNumberPrefix(s string) bool {

 	// Check for .234 numbers
 	if s[0] != '.' || len(s) < 2 {
-		return strings.HasPrefix(s, "Inf") || strings.HasPrefix(s, "NaN")
+		return false
 	}
 	return isDecimalChar(s[1])
 }
@@ -353,6 +442,10 @@ func isIdentPrefix(s string) bool {
 	if len(s) == 0 {
 		return false
 	}
+	if s[0] == '\\' {
+		// Assume this is an escape char for the next char.
+		return true
+	}
 	return isFirstIdentChar(s[0])
 }

@@ -367,7 +460,7 @@ func isIdentChar(ch byte) bool {
 	if isFirstIdentChar(ch) {
 		return true
 	}
-	return isDecimalChar(ch) || ch == ':' || ch == '.'
+	return isDecimalChar(ch) || ch == '.'
 }

 func isSpaceChar(ch byte) bool {
--- a/app/vmselect/promql/lexer_test.go
+++ b/app/vmselect/promql/lexer_test.go
@@ -5,6 +5,57 @@ import (
 	"testing"
 )

+func TestUnescapeIdent(t *testing.T) {
+	f := func(s, resultExpected string) {
+		t.Helper()
+		result := unescapeIdent(s)
+		if result != resultExpected {
+			t.Fatalf("unexpected result for unescapeIdent(%q); got %q; want %q", s, result, resultExpected)
+		}
+	}
+	f("", "")
+	f("a", "a")
+	f("\\", "")
+	f(`\\`, `\`)
+	f(`\foo\-bar`, `foo-bar`)
+	f(`a\\\\b\"c\d`, `a\\b"cd`)
+	f(`foo.bar:baz_123`, `foo.bar:baz_123`)
+	f(`foo\ bar`, `foo bar`)
+	f(`\x21`, `!`)
+	f(`\xeDfoo\x2Fbar\-\xqw\x`, "\xedfoo\x2fbar-xqwx")
+}
+
+func TestAppendEscapedIdent(t *testing.T) {
+	f := func(s, resultExpected string) {
+		t.Helper()
+		result := appendEscapedIdent(nil, []byte(s))
+		if string(result) != resultExpected {
+			t.Fatalf("unexpected result for appendEscapedIdent(%q); got %q; want %q", s, result, resultExpected)
+		}
+	}
+	f(`a`, `a`)
+	f(`a.b:c_23`, `a.b:c_23`)
+	f(`a b-cd+dd\`, `a\ b\-cd\+dd\\`)
+	f("a\x1E\x20\xee", `a\x1e\ \xee`)
+	f("\x2e\x2e", `\x2e.`)
+}
+
+func TestScanIdent(t *testing.T) {
+	f := func(s, resultExpected string) {
+		t.Helper()
+		result := scanIdent(s)
+		if result != resultExpected {
+			t.Fatalf("unexpected result for scanIdent(%q): got %q; want %q", s, result, resultExpected)
+		}
+	}
+	f("a", "a")
+	f("foo.bar:baz_123", "foo.bar:baz_123")
+	f("a+b", "a")
+	f("foo()", "foo")
+	f(`a\-b+c`, `a\-b`)
+	f(`a\ b\\\ c\`, `a\ b\\\ c\`)
+}
+
 func TestLexerNextPrev(t *testing.T) {
 	var lex lexer
 	lex.Init("foo bar baz")
--- a/app/vmselect/promql/memory_limiter.go
+++ b/app/vmselect/promql/memory_limiter.go
@@ -0,0 +1,33 @@
+package promql
+
+import (
+	"sync"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+type memoryLimiter struct {
+	MaxSize uint64
+
+	mu    sync.Mutex
+	usage uint64
+}
+
+func (ml *memoryLimiter) Get(n uint64) bool {
+	ml.mu.Lock()
+	ok := n <= ml.MaxSize && ml.MaxSize-n >= ml.usage
+	if ok {
+		ml.usage += n
+	}
+	ml.mu.Unlock()
+	return ok
+}
+
+func (ml *memoryLimiter) Put(n uint64) {
+	ml.mu.Lock()
+	if n > ml.usage {
+		logger.Panicf("BUG: n=%d cannot exceed %d", n, ml.usage)
+	}
+	ml.usage -= n
+	ml.mu.Unlock()
+}
--- a/app/vmselect/promql/memory_limiter_test.go
+++ b/app/vmselect/promql/memory_limiter_test.go
@@ -0,0 +1,56 @@
+package promql
+
+import (
+	"testing"
+)
+
+func TestMemoryLimiter(t *testing.T) {
+	var ml memoryLimiter
+	ml.MaxSize = 100
+
+	// Allocate memory
+	if !ml.Get(10) {
+		t.Fatalf("cannot get 10 out of %d bytes", ml.MaxSize)
+	}
+	if ml.usage != 10 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 10)
+	}
+	if !ml.Get(20) {
+		t.Fatalf("cannot get 20 out of 90 bytes")
+	}
+	if ml.usage != 30 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 30)
+	}
+	if ml.Get(1000) {
+		t.Fatalf("unexpected get for 1000 bytes")
+	}
+	if ml.usage != 30 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 30)
+	}
+	if ml.Get(71) {
+		t.Fatalf("unexpected get for 71 bytes")
+	}
+	if ml.usage != 30 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 30)
+	}
+	if !ml.Get(70) {
+		t.Fatalf("cannot get 70 bytes")
+	}
+	if ml.usage != 100 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 100)
+	}
+
+	// Return memory back
+	ml.Put(10)
+	ml.Put(70)
+	if ml.usage != 20 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 20)
+	}
+	if !ml.Get(30) {
+		t.Fatalf("cannot get 30 bytes")
+	}
+	ml.Put(50)
+	if ml.usage != 0 {
+		t.Fatalf("unexpected usage; got %d; want %d", ml.usage, 0)
+	}
+}
--- a/app/vmselect/promql/parser.go
+++ b/app/vmselect/promql/parser.go
@@ -6,7 +6,6 @@ import (
 	"strings"
 	"sync"

-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 )
@@ -19,12 +18,13 @@ func getDefaultWithArgExprs() []*withArgExpr {

 			// ttf - time to fuckup
 			`ttf(freev) = smooth_exponential(
-				clamp_max(clamp_min(freev, 0) / clamp_min(deriv(-freev), 0), 365*24*3600),
+				clamp_max(clamp_max(-freev, 0) / clamp_max(deriv_fast(freev), 0), 365*24*3600),
 				clamp_max(step()/300, 1)
 			)`,

 			`median_over_time(m) = quantile_over_time(0.5, m)`,
 			`range_median(q) = range_quantile(0.5, q)`,
+			`alias(q, name) = label_set(q, "__name__", name)`,
 		})
 	})
 	return defaultWithArgExprs
@@ -373,7 +373,7 @@ func (p *parser) parseSingleExpr() (expr, error) {
 }

 func (p *parser) parseSingleExprWithoutRollupSuffix() (expr, error) {
-	if isPositiveNumberPrefix(p.lex.Token) {
+	if isPositiveNumberPrefix(p.lex.Token) || isInfOrNaN(p.lex.Token) {
 		return p.parsePositiveNumberExpr()
 	}
 	if isStringPrefix(p.lex.Token) {
@@ -417,7 +417,7 @@ func (p *parser) parseSingleExprWithoutRollupSuffix() (expr, error) {
 }

 func (p *parser) parsePositiveNumberExpr() (*numberExpr, error) {
-	if !isPositiveNumberPrefix(p.lex.Token) {
+	if !isPositiveNumberPrefix(p.lex.Token) && !isInfOrNaN(p.lex.Token) {
 		return nil, fmt.Errorf(`positiveNumberExpr: unexpected token %q; want "number"`, p.lex.Token)
 	}

@@ -744,7 +744,7 @@ func expandWithExpr(was []*withArgExpr, e expr) (expr, error) {
 		if !t.HasNonEmptyMetricGroup() {
 			return t, nil
 		}
-		k := bytesutil.ToUnsafeString(t.TagFilters[0].Value)
+		k := string(appendEscapedIdent(nil, t.TagFilters[0].Value))
 		wa := getWithArgExpr(was, k)
 		if wa == nil {
 			return t, nil
@@ -811,7 +811,9 @@ func expandModifierArgs(was []*withArgExpr, args []string) ([]string, error) {
 			continue
 		}
 		if len(wa.Args) > 0 {
-			return nil, fmt.Errorf("cannot use func %q instead of %q in %s", wa.Name, arg, args)
+			// Template funcs cannot be used inside modifier list. Leave the arg as is.
+			dstArgs = append(dstArgs, arg)
+			continue
 		}
 		me, ok := wa.Expr.(*metricExpr)
 		if ok {
@@ -851,6 +853,10 @@ func expandModifierArgs(was []*withArgExpr, args []string) ([]string, error) {

 func expandWithExprExt(was []*withArgExpr, wa *withArgExpr, args []expr) (expr, error) {
 	if len(wa.Args) != len(args) {
+		if args == nil {
+			// Just return metricExpr with the wa.Name name.
+			return newMetricExpr(wa.Name), nil
+		}
 		return nil, fmt.Errorf("invalid number of args for %q; got %d; want %d", wa.Name, len(args), len(wa.Args))
 	}
 	wasNew := make([]*withArgExpr, 0, len(was)+len(args))
@@ -869,6 +875,14 @@ func expandWithExprExt(was []*withArgExpr, wa *withArgExpr, args []expr) (expr,
 	return expandWithExpr(wasNew, wa.Expr)
 }

+func newMetricExpr(name string) *metricExpr {
+	return &metricExpr{
+		TagFilters: []storage.TagFilter{{
+			Value: []byte(name),
+		}},
+	}
+}
+
 func extractStringValue(token string) (string, error) {
 	if !isStringPrefix(token) {
 		return "", fmt.Errorf(`stringExpr must contain only string literals; got %q`, token)
@@ -1074,9 +1088,6 @@ func (p *parser) parseTagFilterExpr() (*tagFilterExpr, error) {
 	}
 	var tfe tagFilterExpr
 	tfe.Key = p.lex.Token
-	if tfe.Key == "__name__" {
-		tfe.Key = ""
-	}
 	if err := p.lex.Next(); err != nil {
 		return nil, err
 	}
@@ -1125,8 +1136,16 @@ func (tfe *tagFilterExpr) toTagFilter() (*storage.TagFilter, error) {
 	}

 	var tf storage.TagFilter
-	tf.Key = []byte(tfe.Key)
-	tf.Value = []byte(tfe.Value.S)
+	tf.Key = []byte(unescapeIdent(tfe.Key))
+	if len(tfe.Key) == 0 {
+		tf.Value = []byte(unescapeIdent(tfe.Value.S))
+	} else {
+		tf.Value = []byte(tfe.Value.S)
+	}
+	if string(tf.Key) == "__name__" {
+		// This is required for storage.Search
+		tf.Key = nil
+	}
 	tf.IsRegexp = tfe.IsRegexp
 	tf.IsNegative = tfe.IsNegative
 	if !tf.IsRegexp {
@@ -1507,7 +1526,7 @@ func (wa *withArgExpr) AppendString(dst []byte) []byte {
 }

 type rollupExpr struct {
-	// The expression for the rollup. Usually it is metricExpr, but may be arbitary expr
+	// The expression for the rollup. Usually it is metricExpr, but may be arbitrary expr
 	// if subquery is used. https://prometheus.io/blog/2019/01/28/subquery-support/
 	Expr expr

@@ -1585,7 +1604,7 @@ func (me *metricExpr) AppendString(dst []byte) []byte {
 	if len(tfs) > 0 {
 		tf := &tfs[0]
 		if len(tf.Key) == 0 && !tf.IsNegative && !tf.IsRegexp {
-			dst = append(dst, tf.Value...)
+			dst = appendEscapedIdent(dst, tf.Value)
 			tfs = tfs[1:]
 		}
 	}
@@ -1627,7 +1646,7 @@ func appendStringTagFilter(dst []byte, tf *storage.TagFilter) []byte {
 	if len(tf.Key) == 0 {
 		dst = append(dst, "__name__"...)
 	} else {
-		dst = append(dst, tf.Key...)
+		dst = appendEscapedIdent(dst, tf.Key)
 	}
 	var op string
 	if tf.IsNegative {
--- a/app/vmselect/promql/parser_test.go
+++ b/app/vmselect/promql/parser_test.go
@@ -118,6 +118,17 @@ func TestParsePromQLSuccess(t *testing.T) {
 	same("with")
 	same("WITH")
 	same("With")
+	same("alias")
+	same(`alias{foo="bar"}`)
+	same(`aLIas{alias="aa"}`)
+	another(`al\ias`, `alias`)
+	// identifiers with with escape chars
+	same(`foo\ bar`)
+	same(`foo\-bar\{{baz\+bar="aa"}`)
+	another(`\x2E\x2ef\oo{b\xEF\ar="aa"}`, `\x2e.foo{b\xefar="aa"}`)
+	// Duplicate filters
+	same(`foo{__name__="bar"}`)
+	same(`foo{a="b", a="c", __name__="aaa", b="d"}`)
 	// Metric filters ending with comma
 	another(`m{foo="bar",}`, `m{foo="bar"}`)
 	// String concat in tag value
@@ -159,14 +170,34 @@ func TestParsePromQLSuccess(t *testing.T) {
 	another(`-.2`, `-0.2`)
 	another(`-.2E-2`, `-0.002`)
 	same(`NaN`)
+	another(`nan`, `NaN`)
+	another(`NAN`, `NaN`)
+	another(`nAN`, `NaN`)
 	another(`Inf`, `+Inf`)
+	another(`INF`, `+Inf`)
+	another(`inf`, `+Inf`)
 	another(`+Inf`, `+Inf`)
 	another(`-Inf`, `-Inf`)
+	another(`-inF`, `-Inf`)

 	// binaryOpExpr
-	another(`NaN + 2 *3 * Inf`, `NaN`)
-	another(`Inf - Inf`, `NaN`)
-	another(`Inf + Inf`, `+Inf`)
+	another(`nan == nan`, `NaN`)
+	another(`nan ==bool nan`, `1`)
+	another(`nan !=bool nan`, `0`)
+	another(`nan !=bool 2`, `1`)
+	another(`2 !=bool nan`, `1`)
+	another(`nan >bool nan`, `0`)
+	another(`nan <bool nan`, `0`)
+	another(`1 ==bool nan`, `0`)
+	another(`NaN !=bool 1`, `1`)
+	another(`inf >=bool 2`, `1`)
+	another(`-1 >bool -inf`, `1`)
+	another(`-1 <bool -inf`, `0`)
+	another(`nan + 2 *3 * inf`, `NaN`)
+	another(`INF - Inf`, `NaN`)
+	another(`Inf + inf`, `+Inf`)
+	another(`1/0`, `+Inf`)
+	another(`0/0`, `NaN`)
 	another(`-m`, `0 - m`)
 	same(`m + ignoring () n[5m]`)
 	another(`M + IGNORING () N[5m]`, `M + ignoring () N[5m]`)
@@ -251,6 +282,8 @@ func TestParsePromQLSuccess(t *testing.T) {
 	same(`rate(rate(m[5m]))`)
 	same(`rate(rate(m[5m])[1h:])`)
 	same(`rate(rate(m[5m])[1h:3s])`)
+	// funcName with escape chars
+	same(`foo\(ba\-r()`)

 	// aggrFuncExpr
 	same(`sum(http_server_request) by ()`)
@@ -295,10 +328,14 @@ func TestParsePromQLSuccess(t *testing.T) {
 	another(`with (ct={job="test", i="bar"}) ct + {ct, x="d"} + foo{ct, ct} + ctx(1)`,
 		`(({job="test", i="bar"} + {job="test", i="bar", x="d"}) + foo{job="test", i="bar"}) + ctx(1)`)
 	another(`with (foo = bar) {__name__=~"foo"}`, `{__name__=~"foo"}`)
-	another(`with (foo = bar) {__name__="foo"}`, `bar`)
+	another(`with (foo = bar) foo{__name__="foo"}`, `bar`)
 	another(`with (foo = bar) {__name__="foo", x="y"}`, `bar{x="y"}`)
 	another(`with (foo(bar) = {__name__!="bar"}) foo(x)`, `{__name__!="bar"}`)
-	another(`with (foo(bar) = {__name__="bar"}) foo(x)`, `x`)
+	another(`with (foo(bar) = bar{__name__="bar"}) foo(x)`, `x`)
+	another(`with (foo\-bar(baz) = baz + baz) foo\-bar((x,y))`, `(x, y) + (x, y)`)
+	another(`with (foo\-bar(baz) = baz + baz) foo\-bar(x*y)`, `(x * y) + (x * y)`)
+	another(`with (foo\-bar(baz) = baz + baz) foo\-bar(x\*y)`, `x\*y + x\*y`)
+	another(`with (foo\-bar(b\ az) = b\ az + b\ az) foo\-bar(x\*y)`, `x\*y + x\*y`)
 	// override ttf to something new.
 	another(`with (ttf = a) ttf + b`, `a + b`)
 	// override ttf to ru
@@ -332,8 +369,11 @@ func TestParsePromQLSuccess(t *testing.T) {
 	another(`with (x="a", y=x) y+"bc"`, `"abc"`)
 	another(`with (x="a", y="b"+x) "we"+y+"z"+f()`, `"webaz" + f()`)
 	another(`with (f(x) = m{foo=x+"y", bar="y"+x, baz=x} + x) f("qwe")`, `m{foo="qwey", bar="yqwe", baz="qwe"} + "qwe"`)
+	another(`with (f(a)=a) f`, `f`)
+	another(`with (f\q(a)=a) f\q`, `fq`)

 	// Verify withExpr for aggr func modifiers
+	another(`with (f(x) = x, y = sum(m) by (f)) y`, `sum(m) by (f)`)
 	another(`with (f(x) = sum(m) by (x)) f(foo)`, `sum(m) by (foo)`)
 	another(`with (f(x) = sum(m) by (x)) f((foo, bar, foo))`, `sum(m) by (foo, bar)`)
 	another(`with (f(x) = sum(m) without (x,y)) f((a, b))`, `sum(m) without (a, b, y)`)
@@ -658,7 +698,7 @@ func TestParsePromQLError(t *testing.T) {
 	f(`with (x=m) f(b, a{x})`)
 	f(`with (x=m) sum(a{x})`)
 	f(`with (x=m) (a{x})`)
-	f(`with (f(a)=a) f`)
+	f(`with (f(a)=a) f(1, 2)`)
 	f(`with (f(x)=x{foo="bar"}) f(1)`)
 	f(`with (f(x)=x{foo="bar"}) f(m + n)`)
 	f(`with (f = with`)
@@ -668,8 +708,7 @@ func TestParsePromQLError(t *testing.T) {
 	f(`with (f(,)=x) x`)
 	f(`with (x(a) = {b="c"}) foo{x}`)
 	f(`with (f(x) = m{foo=xx}) f("qwe")`)
-	f(`a + with(f(x)=x) f`)
-	f(`with (f(x) = x, y = sum(m) by (f)) y`)
+	f(`a + with(f(x)=x) f(1,2)`)
 	f(`with (f(x) = sum(m) by (x)) f({foo="bar"})`)
 	f(`with (f(x) = sum(m) by (x)) f((xx(), {foo="bar"}))`)
 	f(`with (f(x) = m + on (x) n) f(xx())`)
--- a/app/vmselect/promql/regexp_cache.go
+++ b/app/vmselect/promql/regexp_cache.go
@@ -1,7 +1,6 @@
 package promql

 import (
-	"fmt"
 	"regexp"
 	"sync"
 	"sync/atomic"
@@ -10,12 +9,16 @@ import (
 )

 func compileRegexpAnchored(re string) (*regexp.Regexp, error) {
+	reAnchored := "^(?:" + re + ")$"
+	return compileRegexp(reAnchored)
+}
+
+func compileRegexp(re string) (*regexp.Regexp, error) {
 	rcv := regexpCacheV.Get(re)
 	if rcv != nil {
 		return rcv.r, rcv.err
 	}
-	regexAnchored := fmt.Sprintf("^(?:%s)$", re)
-	r, err := regexp.Compile(regexAnchored)
+	r, err := regexp.Compile(re)
 	rcv = &regexpCacheValue{
 		r:   r,
 		err: err,
@@ -77,7 +80,7 @@ func (rc *regexpCache) Get(regexp string) *regexpCacheValue {
 	rcv := rc.m[regexp]
 	rc.mu.RUnlock()

-	if rc == nil {
+	if rcv == nil {
 		atomic.AddUint64(&rc.misses, 1)
 	}
 	return rcv
--- a/app/vmselect/promql/rollup.go
+++ b/app/vmselect/promql/rollup.go
@@ -19,13 +19,14 @@ var rollupFuncs = map[string]newRollupFunc{
 	// See funcs accepting range-vector on https://prometheus.io/docs/prometheus/latest/querying/functions/ .
 	"changes":            newRollupFuncOneArg(rollupChanges),
 	"delta":              newRollupFuncOneArg(rollupDelta),
-	"deriv":              newRollupFuncOneArg(rollupDeriv),
+	"deriv":              newRollupFuncOneArg(rollupDerivSlow),
+	"deriv_fast":         newRollupFuncOneArg(rollupDerivFast),
 	"holt_winters":       newRollupHoltWinters,
 	"idelta":             newRollupFuncOneArg(rollupIdelta),
 	"increase":           newRollupFuncOneArg(rollupDelta),  // + rollupFuncsRemoveCounterResets
 	"irate":              newRollupFuncOneArg(rollupIderiv), // + rollupFuncsRemoveCounterResets
 	"predict_linear":     newRollupPredictLinear,
-	"rate":               newRollupFuncOneArg(rollupDeriv), // + rollupFuncsRemoveCounterResets
+	"rate":               newRollupFuncOneArg(rollupDerivFast), // + rollupFuncsRemoveCounterResets
 	"resets":             newRollupFuncOneArg(rollupResets),
 	"avg_over_time":      newRollupFuncOneArg(rollupAvg),
 	"min_over_time":      newRollupFuncOneArg(rollupMin),
@@ -37,16 +38,33 @@ var rollupFuncs = map[string]newRollupFunc{
 	"stdvar_over_time":   newRollupFuncOneArg(rollupStdvar),

 	// Additional rollup funcs.
+	"sum2_over_time":     newRollupFuncOneArg(rollupSum2),
+	"geomean_over_time":  newRollupFuncOneArg(rollupGeomean),
 	"first_over_time":    newRollupFuncOneArg(rollupFirst),
 	"last_over_time":     newRollupFuncOneArg(rollupLast),
 	"distinct_over_time": newRollupFuncOneArg(rollupDistinct),
 	"integrate":          newRollupFuncOneArg(rollupIntegrate),
 	"ideriv":             newRollupFuncOneArg(rollupIderiv),
+	"lifetime":           newRollupFuncOneArg(rollupLifetime),
+	"scrape_interval":    newRollupFuncOneArg(rollupScrapeInterval),
 	"rollup":             newRollupFuncOneArg(rollupFake),
 	"rollup_rate":        newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
 	"rollup_deriv":       newRollupFuncOneArg(rollupFake),
 	"rollup_delta":       newRollupFuncOneArg(rollupFake),
 	"rollup_increase":    newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
+	"rollup_candlestick": newRollupFuncOneArg(rollupFake),
+}
+
+var rollupFuncsMayAdjustWindow = map[string]bool{
+	"default_rollup":  true,
+	"first_over_time": true,
+	"last_over_time":  true,
+	"deriv":           true,
+	"deriv_fast":      true,
+	"irate":           true,
+	"rate":            true,
+	"lifetime":        true,
+	"scrape_interval": true,
 }

 var rollupFuncsRemoveCounterResets = map[string]bool{
@@ -64,6 +82,7 @@ var rollupFuncsKeepMetricGroup = map[string]bool{
 	"max_over_time":      true,
 	"quantile_over_time": true,
 	"rollup":             true,
+	"geomean_over_time":  true,
 }

 func getRollupArgIdx(funcName string) int {
@@ -120,6 +139,13 @@ type rollupConfig struct {
 	Step   int64
 	Window int64

+	// Whether window may be adjusted to 2 x interval between data points.
+	// This is needed for functions which have dt in the denominator
+	// such as rate, deriv, etc.
+	// Without the adjustement their value would jump in unexpected directions
+	// when using window smaller than 2 x scrape_interval.
+	MayAdjustWindow bool
+
 	Timestamps []int64
 }

@@ -162,7 +188,7 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
 	if window <= 0 {
 		window = rc.Step
 	}
-	if window < maxPrevInterval {
+	if rc.MayAdjustWindow && window < maxPrevInterval {
 		window = maxPrevInterval
 	}
 	rfa := getRollupFuncArg()
@@ -171,24 +197,21 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i

 	i := 0
 	j := 0
-	for _, ts := range rc.Timestamps {
-		tEnd := ts + rc.Step
+	ni := 0
+	nj := 0
+	for _, tEnd := range rc.Timestamps {
 		tStart := tEnd - window
-		n := sort.Search(len(timestamps)-i, func(n int) bool {
-			return timestamps[i+n] > tStart
-		})
-		i += n
+		ni = seekFirstTimestampIdxAfter(timestamps[i:], tStart, ni)
+		i += ni
 		if j < i {
 			j = i
 		}
-		n = sort.Search(len(timestamps)-j, func(n int) bool {
-			return timestamps[j+n] > tEnd
-		})
-		j += n
+		nj = seekFirstTimestampIdxAfter(timestamps[j:], tEnd, nj)
+		j += nj

 		rfa.prevValue = nan
 		rfa.prevTimestamp = tStart - maxPrevInterval
-		if i > 0 && timestamps[i-1] > rfa.prevTimestamp {
+		if i < len(timestamps) && i > 0 && timestamps[i-1] > rfa.prevTimestamp {
 			rfa.prevValue = values[i-1]
 			rfa.prevTimestamp = timestamps[i-1]
 		}
@@ -204,16 +227,73 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
 	return dstValues
 }

+func seekFirstTimestampIdxAfter(timestamps []int64, seekTimestamp int64, nHint int) int {
+	if len(timestamps) == 0 || timestamps[0] > seekTimestamp {
+		return 0
+	}
+	startIdx := nHint - 2
+	if startIdx < 0 {
+		startIdx = 0
+	}
+	if startIdx >= len(timestamps) {
+		startIdx = len(timestamps) - 1
+	}
+	endIdx := nHint + 2
+	if endIdx > len(timestamps) {
+		endIdx = len(timestamps)
+	}
+	if startIdx > 0 && timestamps[startIdx] <= seekTimestamp {
+		timestamps = timestamps[startIdx:]
+		endIdx -= startIdx
+	} else {
+		startIdx = 0
+	}
+	if endIdx < len(timestamps) && timestamps[endIdx] > seekTimestamp {
+		timestamps = timestamps[:endIdx]
+	}
+	if len(timestamps) < 16 {
+		// Fast path: the number of timestamps to search is small, so scan them all.
+		for i, timestamp := range timestamps {
+			if timestamp > seekTimestamp {
+				return startIdx + i
+			}
+		}
+		return startIdx + len(timestamps)
+	}
+	// Slow path: too big len(timestamps), so use binary search.
+	i := sort.Search(len(timestamps), func(n int) bool {
+		return n >= 0 && n < len(timestamps) && timestamps[n] > seekTimestamp
+	})
+	return startIdx + i
+}
+
 func getMaxPrevInterval(timestamps []int64) int64 {
 	if len(timestamps) < 2 {
 		return int64(maxSilenceInterval)
 	}
 	d := (timestamps[len(timestamps)-1] - timestamps[0]) / int64(len(timestamps)-1)
 	if d <= 0 {
-		return 1
+		return int64(maxSilenceInterval)
 	}
-	// Slightly increase d in order to handle possible jitter in scrape interval.
-	return d + (d / 16)
+	// Increase d more for smaller scrape intervals in order to hide possible gaps
+	// when high jitter is present.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/139 .
+	if d <= 2*1000 {
+		return d + 4*d
+	}
+	if d <= 4*1000 {
+		return d + 2*d
+	}
+	if d <= 8*1000 {
+		return d + d
+	}
+	if d <= 16*1000 {
+		return d + d/2
+	}
+	if d <= 32*1000 {
+		return d + d/4
+	}
+	return d + d/8
 }

 func removeCounterResets(values []float64) {
@@ -246,12 +326,14 @@ func deltaValues(values []float64) {
 	if len(values) == 0 {
 		return
 	}
+	prevDelta := float64(0)
 	prevValue := values[0]
 	for i, v := range values[1:] {
-		values[i] = v - prevValue
+		prevDelta = v - prevValue
+		values[i] = prevDelta
 		prevValue = v
 	}
-	values[len(values)-1] = nan
+	values[len(values)-1] = prevDelta
 }

 func derivValues(values []float64, timestamps []int64) {
@@ -260,16 +342,23 @@ func derivValues(values []float64, timestamps []int64) {
 	if len(values) == 0 {
 		return
 	}
+	prevDeriv := float64(0)
 	prevValue := values[0]
 	prevTs := timestamps[0]
 	for i, v := range values[1:] {
 		ts := timestamps[i+1]
+		if ts == prevTs {
+			// Use the previous value for duplicate timestamps.
+			values[i] = prevDeriv
+			continue
+		}
 		dt := float64(ts-prevTs) * 1e-3
-		values[i] = (v - prevValue) / dt
+		prevDeriv = (v - prevValue) / dt
+		values[i] = prevDeriv
 		prevValue = v
 		prevTs = ts
 	}
-	values[len(values)-1] = nan
+	values[len(values)-1] = prevDeriv
 }

 type newRollupFunc func(args []interface{}) (rollupFunc, error)
@@ -296,11 +385,11 @@ func newRollupHoltWinters(args []interface{}) (rollupFunc, error) {
 		return nil, err
 	}
 	rf := func(rfa *rollupFuncArg) float64 {
-		// There is no need in handling NaNs here, since they must be cleanup up
+		// There is no need in handling NaNs here, since they must be cleaned up
 		// before calling rollup funcs.
 		values := rfa.values
 		if len(values) == 0 {
-			return nan
+			return rfa.prevValue
 		}
 		sf := sfs[rfa.idx]
 		if sf <= 0 || sf >= 1 {
@@ -342,41 +431,55 @@ func newRollupPredictLinear(args []interface{}) (rollupFunc, error) {
 		return nil, err
 	}
 	rf := func(rfa *rollupFuncArg) float64 {
-		// There is no need in handling NaNs here, since they must be cleanup up
-		// before calling rollup funcs.
-		values := rfa.values
-		timestamps := rfa.timestamps
-		if len(values) == 0 {
+		v, k := linearRegression(rfa)
+		if math.IsNaN(v) {
 			return nan
 		}
-
-		// See https://en.wikipedia.org/wiki/Simple_linear_regression#Numerical_example
-		// TODO: determine whether this shit really works.
-		tFirst := rfa.prevTimestamp
-		vSum := rfa.prevValue
-		if math.IsNaN(rfa.prevValue) {
-			tFirst = timestamps[0]
-			vSum = 0
-		}
-		tSum := float64(0)
-		tvSum := float64(0)
-		ttSum := float64(0)
-		for i, v := range values {
-			dt := float64(timestamps[i]-tFirst) * 1e-3
-			vSum += v
-			tSum += dt
-			tvSum += dt * v
-			ttSum += dt * dt
-		}
-		n := float64(len(values))
-		k := (n*tvSum - tSum*vSum) / (n*ttSum - tSum*tSum)
-		v := (vSum - k*tSum) / n
 		sec := secs[rfa.idx]
 		return v + k*sec
 	}
 	return rf, nil
 }

+func linearRegression(rfa *rollupFuncArg) (float64, float64) {
+	// There is no need in handling NaNs here, since they must be cleaned up
+	// before calling rollup funcs.
+	values := rfa.values
+	timestamps := rfa.timestamps
+	if len(values) == 0 {
+		return rfa.prevValue, 0
+	}
+
+	// See https://en.wikipedia.org/wiki/Simple_linear_regression#Numerical_example
+	tFirst := rfa.prevTimestamp
+	vSum := rfa.prevValue
+	tSum := float64(0)
+	tvSum := float64(0)
+	ttSum := float64(0)
+	n := 1.0
+	if math.IsNaN(rfa.prevValue) {
+		tFirst = timestamps[0]
+		vSum = 0
+		n = 0
+	}
+	for i, v := range values {
+		dt := float64(timestamps[i]-tFirst) * 1e-3
+		vSum += v
+		tSum += dt
+		tvSum += dt * v
+		ttSum += dt * dt
+	}
+	n += float64(len(values))
+	if n == 1 {
+		return vSum, 0
+	}
+	k := (n*tvSum - tSum*vSum) / (n*ttSum - tSum*tSum)
+	v := (vSum - k*tSum) / n
+	// Adjust v to the last timestamp on the given time range.
+	v += k * (float64(timestamps[len(timestamps)-1]-tFirst) * 1e-3)
+	return v, k
+}
+
 func newRollupQuantile(args []interface{}) (rollupFunc, error) {
 	if err := expectRollupArgsNum(args, 2); err != nil {
 		return nil, err
@@ -386,11 +489,15 @@ func newRollupQuantile(args []interface{}) (rollupFunc, error) {
 		return nil, err
 	}
 	rf := func(rfa *rollupFuncArg) float64 {
-		// There is no need in handling NaNs here, since they must be cleanup up
+		// There is no need in handling NaNs here, since they must be cleaned up
 		// before calling rollup funcs.
 		values := rfa.values
 		if len(values) == 0 {
-			return nan
+			return rfa.prevValue
+		}
+		if len(values) == 1 {
+			// Fast path - only a single value.
+			return values[0]
 		}
 		hf := histogram.GetFast()
 		for _, v := range values {
@@ -408,11 +515,11 @@ func rollupAvg(rfa *rollupFuncArg) float64 {
 	// Do not use `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation,
 	// since it is slower and has no significant benefits in precision.

-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		return rfa.prevValue
 	}
 	var sum float64
 	for _, v := range values {
@@ -422,11 +529,11 @@ func rollupAvg(rfa *rollupFuncArg) float64 {
 }

 func rollupMin(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		return rfa.prevValue
 	}
 	minValue := values[0]
 	for _, v := range values {
@@ -438,11 +545,11 @@ func rollupMin(rfa *rollupFuncArg) float64 {
 }

 func rollupMax(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		return rfa.prevValue
 	}
 	maxValue := values[0]
 	for _, v := range values {
@@ -454,11 +561,11 @@ func rollupMax(rfa *rollupFuncArg) float64 {
 }

 func rollupSum(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		return rfa.prevValue
 	}
 	var sum float64
 	for _, v := range values {
@@ -467,12 +574,43 @@ func rollupSum(rfa *rollupFuncArg) float64 {
 	return sum
 }

-func rollupCount(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+func rollupSum2(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		return rfa.prevValue * rfa.prevValue
+	}
+	var sum2 float64
+	for _, v := range values {
+		sum2 += v * v
+	}
+	return sum2
+}
+
+func rollupGeomean(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
+	// before calling rollup funcs.
+	values := rfa.values
+	if len(values) == 0 {
+		return rfa.prevValue
+	}
+	p := 1.0
+	for _, v := range values {
+		p *= v
+	}
+	return math.Pow(p, 1/float64(len(values)))
+}
+
+func rollupCount(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
+	// before calling rollup funcs.
+	values := rfa.values
+	if len(values) == 0 {
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return 0
 	}
 	return float64(len(values))
 }
@@ -485,11 +623,18 @@ func rollupStddev(rfa *rollupFuncArg) float64 {
 func rollupStdvar(rfa *rollupFuncArg) float64 {
 	// See `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation

-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return 0
+	}
+	if len(values) == 1 {
+		// Fast path.
+		return values[0]
 	}
 	var avg float64
 	var count float64
@@ -504,7 +649,7 @@ func rollupStdvar(rfa *rollupFuncArg) float64 {
 }

 func rollupDelta(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	prevValue := rfa.prevValue
@@ -512,43 +657,61 @@ func rollupDelta(rfa *rollupFuncArg) float64 {
 		if len(values) == 0 {
 			return nan
 		}
+		if len(values) == 1 {
+			// Assume that the previous non-existing value was 0.
+			return values[0]
+		}
 		prevValue = values[0]
 		values = values[1:]
 	}
 	if len(values) == 0 {
-		return nan
+		// Assume that the value didn't change on the given interval.
+		return 0
 	}
 	return values[len(values)-1] - prevValue
 }

 func rollupIdelta(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		// Assume that the value didn't change on the given interval.
+		return 0
 	}
 	lastValue := values[len(values)-1]
 	values = values[:len(values)-1]
 	if len(values) == 0 {
 		prevValue := rfa.prevValue
 		if math.IsNaN(prevValue) {
-			return nan
+			// Assume that the previous non-existing value was 0.
+			return lastValue
 		}
 		return lastValue - prevValue
 	}
 	return lastValue - values[len(values)-1]
 }

-func rollupDeriv(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+func rollupDerivSlow(rfa *rollupFuncArg) float64 {
+	// Use linear regression like Prometheus does.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/73
+	_, k := linearRegression(rfa)
+	return k
+}
+
+func rollupDerivFast(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	timestamps := rfa.timestamps
 	prevValue := rfa.prevValue
 	prevTimestamp := rfa.prevTimestamp
 	if math.IsNaN(prevValue) {
-		if len(values) == 0 {
+		if len(values) < 2 {
+			// It is impossible to calculate derivative on 0 or 1 values.
 			return nan
 		}
 		prevValue = values[0]
@@ -557,7 +720,8 @@ func rollupDeriv(rfa *rollupFuncArg) float64 {
 		timestamps = timestamps[1:]
 	}
 	if len(values) == 0 {
-		return nan
+		// Assume that the value didn't change on the given interval.
+		return 0
 	}
 	vEnd := values[len(values)-1]
 	tEnd := timestamps[len(timestamps)-1]
@@ -567,43 +731,85 @@ func rollupDeriv(rfa *rollupFuncArg) float64 {
 }

 func rollupIderiv(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	timestamps := rfa.timestamps
-	if len(values) == 0 {
-		return nan
+	if len(values) < 2 {
+		if len(values) == 0 || math.IsNaN(rfa.prevValue) {
+			// It is impossible to calculate derivative on 0 or 1 values.
+			return nan
+		}
+		return (values[0] - rfa.prevValue) / (float64(timestamps[0]-rfa.prevTimestamp) * 1e-3)
 	}
 	vEnd := values[len(values)-1]
 	tEnd := timestamps[len(timestamps)-1]
 	values = values[:len(values)-1]
 	timestamps = timestamps[:len(timestamps)-1]
-	prevValue := rfa.prevValue
-	prevTimestamp := rfa.prevTimestamp
-	if len(values) == 0 {
-		if math.IsNaN(prevValue) {
+	// Skip data points with duplicate timestamps.
+	for len(timestamps) > 0 && timestamps[len(timestamps)-1] >= tEnd {
+		timestamps = timestamps[:len(timestamps)-1]
+	}
+	var tStart int64
+	var vStart float64
+	if len(timestamps) == 0 {
+		if math.IsNaN(rfa.prevValue) {
+			return 0
+		}
+		tStart = rfa.prevTimestamp
+		vStart = rfa.prevValue
+	} else {
+		tStart = timestamps[len(timestamps)-1]
+		vStart = values[len(timestamps)-1]
+	}
+	dv := vEnd - vStart
+	dt := tEnd - tStart
+	return dv / (float64(dt) * 1e-3)
+}
+
+func rollupLifetime(rfa *rollupFuncArg) float64 {
+	// Calculate the duration between the first and the last data points.
+	timestamps := rfa.timestamps
+	if math.IsNaN(rfa.prevValue) {
+		if len(timestamps) < 2 {
 			return nan
 		}
-	} else {
-		prevValue = values[len(values)-1]
-		prevTimestamp = timestamps[len(timestamps)-1]
+		return float64(timestamps[len(timestamps)-1]-timestamps[0]) * 1e-3
 	}
-	dv := vEnd - prevValue
-	dt := tEnd - prevTimestamp
-	return dv / (float64(dt) / 1000)
+	if len(timestamps) == 0 {
+		return nan
+	}
+	return float64(timestamps[len(timestamps)-1]-rfa.prevTimestamp) * 1e-3
+}
+
+func rollupScrapeInterval(rfa *rollupFuncArg) float64 {
+	// Calculate the average interval between data points.
+	timestamps := rfa.timestamps
+	if math.IsNaN(rfa.prevValue) {
+		if len(timestamps) < 2 {
+			return nan
+		}
+		return float64(timestamps[len(timestamps)-1]-timestamps[0]) * 1e-3 / float64(len(timestamps)-1)
+	}
+	if len(timestamps) == 0 {
+		return nan
+	}
+	return (float64(timestamps[len(timestamps)-1]-rfa.prevTimestamp) * 1e-3) / float64(len(timestamps))
 }

 func rollupChanges(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
-	if len(values) == 0 {
-		return nan
-	}
-	n := 0
 	prevValue := rfa.prevValue
+	n := 0
 	if math.IsNaN(prevValue) {
+		if len(values) == 0 {
+			return nan
+		}
 		prevValue = values[0]
+		values = values[1:]
+		n++
 	}
 	for _, v := range values {
 		if v != prevValue {
@@ -615,11 +821,14 @@ func rollupChanges(rfa *rollupFuncArg) float64 {
 }

 func rollupResets(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return 0
 	}
 	prevValue := rfa.prevValue
 	if math.IsNaN(prevValue) {
@@ -627,7 +836,7 @@ func rollupResets(rfa *rollupFuncArg) float64 {
 		values = values[1:]
 	}
 	if len(values) == 0 {
-		return nan
+		return 0
 	}
 	n := 0
 	for _, v := range values {
@@ -646,7 +855,7 @@ func rollupFirst(rfa *rollupFuncArg) float64 {
 		return v
 	}

-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
@@ -655,24 +864,27 @@ func rollupFirst(rfa *rollupFuncArg) float64 {
 	return values[0]
 }

-var rollupDefault = rollupFirst
+var rollupDefault = rollupLast

 func rollupLast(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		return rfa.prevValue
 	}
 	return values[len(values)-1]
 }

 func rollupDistinct(rfa *rollupFuncArg) float64 {
-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	if len(values) == 0 {
-		return nan
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return 0
 	}
 	m := make(map[float64]struct{})
 	for _, v := range values {
@@ -684,12 +896,15 @@ func rollupDistinct(rfa *rollupFuncArg) float64 {
 func rollupIntegrate(rfa *rollupFuncArg) float64 {
 	prevTimestamp := rfa.prevTimestamp

-	// There is no need in handling NaNs here, since they must be cleanup up
+	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
 	values := rfa.values
 	timestamps := rfa.timestamps
 	if len(values) == 0 {
-		return nan
+		if math.IsNaN(rfa.prevValue) {
+			return nan
+		}
+		return 0
 	}
 	prevValue := rfa.prevValue
 	if math.IsNaN(prevValue) {
@@ -699,7 +914,7 @@ func rollupIntegrate(rfa *rollupFuncArg) float64 {
 		timestamps = timestamps[1:]
 	}
 	if len(values) == 0 {
-		return nan
+		return 0
 	}

 	var sum float64
--- a/app/vmselect/promql/rollup_result_cache.go
+++ b/app/vmselect/promql/rollup_result_cache.go
@@ -2,21 +2,26 @@ package promql

 import (
 	"crypto/rand"
+	"flag"
 	"fmt"
-	"runtime"
 	"sync"
 	"sync/atomic"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
 	"github.com/VictoriaMetrics/fastcache"
 	"github.com/VictoriaMetrics/metrics"
 )

+var disableCache = flag.Bool("search.disableCache", false, "Whether to disable response caching. This may be useful during data backfilling")
+
 var rollupResultCacheV = &rollupResultCache{
-	fastcache.New(1024 * 1024), // This is a cache for testing.
+	c: workingsetcache.New(1024*1024, time.Hour), // This is a cache for testing.
 }
 var rollupResultCachePath string

@@ -37,16 +42,23 @@ var (
 )

 // InitRollupResultCache initializes the rollupResult cache
+//
+// if cachePath is empty, then the cache isn't stored to persistent disk.
 func InitRollupResultCache(cachePath string) {
 	rollupResultCachePath = cachePath
 	startTime := time.Now()
-	var c *fastcache.Cache
+	cacheSize := getRollupResultCacheSize()
+	var c *workingsetcache.Cache
 	if len(rollupResultCachePath) > 0 {
 		logger.Infof("loading rollupResult cache from %q...", rollupResultCachePath)
-		c = fastcache.LoadFromFileOrNew(rollupResultCachePath, getRollupResultCacheSize())
+		c = workingsetcache.Load(rollupResultCachePath, cacheSize, time.Hour)
 	} else {
-		c = fastcache.New(getRollupResultCacheSize())
+		c = workingsetcache.New(cacheSize, time.Hour)
 	}
+	if *disableCache {
+		c.Reset()
+	}
+
 	stats := &fastcache.Stats{}
 	var statsLock sync.Mutex
 	var statsLastUpdate time.Time
@@ -64,7 +76,7 @@ func InitRollupResultCache(cachePath string) {
 		return stats
 	}
 	if len(rollupResultCachePath) > 0 {
-		logger.Infof("loaded rollupResult cache from %q in %s; entriesCount: %d, bytesSize: %d",
+		logger.Infof("loaded rollupResult cache from %q in %s; entriesCount: %d, sizeBytes: %d",
 			rollupResultCachePath, time.Since(startTime), fcs().EntriesCount, fcs().BytesSize)
 	}

@@ -89,25 +101,28 @@ func InitRollupResultCache(cachePath string) {
 // StopRollupResultCache closes the rollupResult cache.
 func StopRollupResultCache() {
 	if len(rollupResultCachePath) == 0 {
-		rollupResultCacheV.c.Reset()
+		rollupResultCacheV.c.Stop()
+		rollupResultCacheV.c = nil
 		return
 	}
-	gomaxprocs := runtime.GOMAXPROCS(-1)
 	logger.Infof("saving rollupResult cache to %q...", rollupResultCachePath)
 	startTime := time.Now()
-	if err := rollupResultCacheV.c.SaveToFileConcurrent(rollupResultCachePath, gomaxprocs); err != nil {
+	if err := rollupResultCacheV.c.Save(rollupResultCachePath); err != nil {
 		logger.Errorf("cannot close rollupResult cache at %q: %s", rollupResultCachePath, err)
-	} else {
-		var fcs fastcache.Stats
-		rollupResultCacheV.c.UpdateStats(&fcs)
-		rollupResultCacheV.c.Reset()
-		logger.Infof("saved rollupResult cache to %q in %s; entriesCount: %d, bytesSize: %d",
-			rollupResultCachePath, time.Since(startTime), fcs.EntriesCount, fcs.BytesSize)
+		return
 	}
+	var fcs fastcache.Stats
+	rollupResultCacheV.c.UpdateStats(&fcs)
+	rollupResultCacheV.c.Stop()
+	rollupResultCacheV.c = nil
+	logger.Infof("saved rollupResult cache to %q in %s; entriesCount: %d, sizeBytes: %d",
+		rollupResultCachePath, time.Since(startTime), fcs.EntriesCount, fcs.BytesSize)
 }

+// TODO: convert this cache to distributed cache shared among vmselect
+// instances in the cluster.
 type rollupResultCache struct {
-	c *fastcache.Cache
+	c *workingsetcache.Cache
 }

 var rollupResultCacheResets = metrics.NewCounter(`vm_cache_resets_total{type="promql/rollupResult"}`)
@@ -118,8 +133,8 @@ func ResetRollupResultCache() {
 	rollupResultCacheV.c.Reset()
 }

-func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExpr, window int64) (tss []*timeseries, newStart int64) {
-	if !ec.mayCache() {
+func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExpr, iafc *incrementalAggrFuncContext, window int64) (tss []*timeseries, newStart int64) {
+	if *disableCache || !ec.mayCache() {
 		return nil, ec.Start
 	}

@@ -127,7 +142,7 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
 	bb := bbPool.Get()
 	defer bbPool.Put(bb)

-	bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, window, ec.Step)
+	bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, ec.AuthToken, me, iafc, window, ec.Step)
 	metainfoBuf := rrc.c.Get(nil, bb.B)
 	if len(metainfoBuf) == 0 {
 		return nil, ec.Start
@@ -141,15 +156,23 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
 		return nil, ec.Start
 	}
 	bb.B = key.Marshal(bb.B[:0])
-	resultBuf := rrc.c.GetBig(nil, bb.B)
-	if len(resultBuf) == 0 {
+	compressedResultBuf := resultBufPool.Get()
+	defer resultBufPool.Put(compressedResultBuf)
+	compressedResultBuf.B = rrc.c.GetBig(compressedResultBuf.B[:0], bb.B)
+	if len(compressedResultBuf.B) == 0 {
 		mi.RemoveKey(key)
 		metainfoBuf = mi.Marshal(metainfoBuf[:0])
-		bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, window, ec.Step)
+		bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, ec.AuthToken, me, iafc, window, ec.Step)
 		rrc.c.Set(bb.B, metainfoBuf)
 		return nil, ec.Start
 	}
-	tss, err := unmarshalTimeseriesFast(resultBuf)
+	// Decompress into newly allocated byte slice, since tss returned from unmarshalTimeseriesFast
+	// refers to the byte slice, so it cannot be returned to the resultBufPool.
+	resultBuf, err := encoding.DecompressZSTD(nil, compressedResultBuf.B)
+	if err != nil {
+		logger.Panicf("BUG: cannot decompress resultBuf from rollupResultCache: %s; it looks like it was improperly saved", err)
+	}
+	tss, err = unmarshalTimeseriesFast(resultBuf)
 	if err != nil {
 		logger.Panicf("BUG: cannot unmarshal timeseries from rollupResultCache: %s; it looks like it was improperly saved", err)
 	}
@@ -189,8 +212,10 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
 	return tss, newStart
 }

-func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExpr, window int64, tss []*timeseries) {
-	if len(tss) == 0 || !ec.mayCache() {
+var resultBufPool bytesutil.ByteBufferPool
+
+func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExpr, iafc *incrementalAggrFuncContext, window int64, tss []*timeseries) {
+	if *disableCache || len(tss) == 0 || !ec.mayCache() {
 		return
 	}

@@ -220,11 +245,16 @@ func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExp

 	// Store tss in the cache.
 	maxMarshaledSize := getRollupResultCacheSize() / 4
-	tssMarshaled := marshalTimeseriesFast(tss, maxMarshaledSize, ec.Step)
-	if tssMarshaled == nil {
+	resultBuf := resultBufPool.Get()
+	defer resultBufPool.Put(resultBuf)
+	resultBuf.B = marshalTimeseriesFast(resultBuf.B[:0], tss, maxMarshaledSize, ec.Step)
+	if len(resultBuf.B) == 0 {
 		tooBigRollupResults.Inc()
 		return
 	}
+	compressedResultBuf := resultBufPool.Get()
+	defer resultBufPool.Put(compressedResultBuf)
+	compressedResultBuf.B = encoding.CompressZSTDLevel(compressedResultBuf.B[:0], resultBuf.B, 1)

 	bb := bbPool.Get()
 	defer bbPool.Put(bb)
@@ -233,9 +263,9 @@ func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExp
 	key.prefix = rollupResultCacheKeyPrefix
 	key.suffix = atomic.AddUint64(&rollupResultCacheKeySuffix, 1)
 	bb.B = key.Marshal(bb.B[:0])
-	rrc.c.SetBig(bb.B, tssMarshaled)
+	rrc.c.SetBig(bb.B, compressedResultBuf.B)

-	bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, window, ec.Step)
+	bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, ec.AuthToken, me, iafc, window, ec.Step)
 	metainfoBuf := rrc.c.Get(nil, bb.B)
 	var mi rollupResultCacheMetainfo
 	if len(metainfoBuf) > 0 {
@@ -263,10 +293,18 @@ var (
 var tooBigRollupResults = metrics.NewCounter("vm_too_big_rollup_results_total")

 // Increment this value every time the format of the cache changes.
-const rollupResultCacheVersion = 4
+const rollupResultCacheVersion = 6

-func marshalRollupResultCacheKey(dst []byte, funcName string, me *metricExpr, window, step int64) []byte {
+func marshalRollupResultCacheKey(dst []byte, funcName string, at *auth.Token, me *metricExpr, iafc *incrementalAggrFuncContext, window, step int64) []byte {
 	dst = append(dst, rollupResultCacheVersion)
+	if iafc == nil {
+		dst = append(dst, 0)
+	} else {
+		dst = append(dst, 1)
+		dst = iafc.ae.AppendString(dst)
+	}
+	dst = encoding.MarshalUint32(dst, at.AccountID)
+	dst = encoding.MarshalUint32(dst, at.ProjectID)
 	dst = encoding.MarshalUint64(dst, uint64(len(funcName)))
 	dst = append(dst, funcName...)
 	dst = encoding.MarshalInt64(dst, window)
--- a/app/vmselect/promql/rollup_result_cache_test.go
+++ b/app/vmselect/promql/rollup_result_cache_test.go
@@ -3,6 +3,7 @@ package promql
 import (
 	"testing"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 )

@@ -15,6 +16,11 @@ func TestRollupResultCache(t *testing.T) {
 		End:   2000,
 		Step:  200,

+		AuthToken: &auth.Token{
+			AccountID: 333,
+			ProjectID: 843,
+		},
+
 		MayCache: true,
 	}
 	me := &metricExpr{
@@ -23,10 +29,15 @@ func TestRollupResultCache(t *testing.T) {
 			Value: []byte("xxx"),
 		}},
 	}
+	iafc := &incrementalAggrFuncContext{
+		ae: &aggrFuncExpr{
+			Name: "foobar",
+		},
+	}

 	// Try obtaining an empty value.
 	t.Run("empty", func(t *testing.T) {
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != ec.Start {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, ec.Start)
 		}
@@ -36,21 +47,42 @@ func TestRollupResultCache(t *testing.T) {
 	})

 	// Store timeseries overlapping with start
-	t.Run("start-overlap", func(t *testing.T) {
+	t.Run("start-overlap-no-iafc", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{800, 1000, 1200},
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 1400 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
 		}
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
+				Timestamps: []int64{1000, 1200},
+				Values:     []float64{1, 2},
+			},
+		}
+		testTimeseriesEqual(t, tss, tssExpected)
+	})
+	t.Run("start-overlap-with-iafc", func(t *testing.T) {
+		ResetRollupResultCache()
+		tss := []*timeseries{
+			{
+				Timestamps: []int64{800, 1000, 1200},
+				Values:     []float64{0, 1, 2},
+			},
+		}
+		rollupResultCacheV.Put(funcName, ec, me, iafc, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, iafc, window)
+		if newStart != 1400 {
+			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
+		}
+		tssExpected := []*timeseries{
+			{
 				Timestamps: []int64{1000, 1200},
 				Values:     []float64{1, 2},
 			},
@@ -62,13 +94,13 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("end-overlap", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1800, 2000, 2200, 2400},
 				Values:     []float64{333, 0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 1000 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
 		}
@@ -81,13 +113,13 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("full-cover", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1200, 1400, 1600},
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 1000 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
 		}
@@ -100,13 +132,13 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("before-start", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{200, 400, 600},
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 1000 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
 		}
@@ -119,13 +151,13 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("after-end", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{2200, 2400, 2600},
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 1000 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1000)
 		}
@@ -138,18 +170,18 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("bigger-than-start-end", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{800, 1000, 1200, 1400, 1600, 1800, 2000, 2200},
 				Values:     []float64{0, 1, 2, 3, 4, 5, 6, 7},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 2200 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
 		}
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{1, 2, 3, 4, 5, 6},
 			},
@@ -161,18 +193,18 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("start-end-match", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{1, 2, 3, 4, 5, 6},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 2200 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
 		}
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{1, 2, 3, 4, 5, 6},
 			},
@@ -191,8 +223,8 @@ func TestRollupResultCache(t *testing.T) {
 			}
 			tss = append(tss, ts)
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss)
-		tssResult, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss)
+		tssResult, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 2200 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 2200)
 		}
@@ -203,32 +235,32 @@ func TestRollupResultCache(t *testing.T) {
 	t.Run("multi-timeseries", func(t *testing.T) {
 		ResetRollupResultCache()
 		tss1 := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{800, 1000, 1200},
 				Values:     []float64{0, 1, 2},
 			},
 		}
 		tss2 := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1800, 2000, 2200, 2400},
 				Values:     []float64{333, 0, 1, 2},
 			},
 		}
 		tss3 := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1200, 1400, 1600},
 				Values:     []float64{0, 1, 2},
 			},
 		}
-		rollupResultCacheV.Put(funcName, ec, me, window, tss1)
-		rollupResultCacheV.Put(funcName, ec, me, window, tss2)
-		rollupResultCacheV.Put(funcName, ec, me, window, tss3)
-		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, window)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss1)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss2)
+		rollupResultCacheV.Put(funcName, ec, me, nil, window, tss3)
+		tss, newStart := rollupResultCacheV.Get(funcName, ec, me, nil, window)
 		if newStart != 1400 {
 			t.Fatalf("unexpected newStart; got %d; want %d", newStart, 1400)
 		}
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200},
 				Values:     []float64{1, 2},
 			},
@@ -249,14 +281,14 @@ func TestMergeTimeseries(t *testing.T) {
 	t.Run("bStart=ec.Start", func(t *testing.T) {
 		a := []*timeseries{}
 		b := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{1, 2, 3, 4, 5, 6},
 			},
 		}
 		tss := mergeTimeseries(a, b, 1000, ec)
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{1, 2, 3, 4, 5, 6},
 			},
@@ -266,14 +298,14 @@ func TestMergeTimeseries(t *testing.T) {
 	t.Run("a-empty", func(t *testing.T) {
 		a := []*timeseries{}
 		b := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1400, 1600, 1800, 2000},
 				Values:     []float64{3, 4, 5, 6},
 			},
 		}
 		tss := mergeTimeseries(a, b, bStart, ec)
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{nan, nan, 3, 4, 5, 6},
 			},
@@ -282,7 +314,7 @@ func TestMergeTimeseries(t *testing.T) {
 	})
 	t.Run("b-empty", func(t *testing.T) {
 		a := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200},
 				Values:     []float64{2, 1},
 			},
@@ -290,7 +322,7 @@ func TestMergeTimeseries(t *testing.T) {
 		b := []*timeseries{}
 		tss := mergeTimeseries(a, b, bStart, ec)
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{2, 1, nan, nan, nan, nan},
 			},
@@ -299,20 +331,20 @@ func TestMergeTimeseries(t *testing.T) {
 	})
 	t.Run("non-empty", func(t *testing.T) {
 		a := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200},
 				Values:     []float64{2, 1},
 			},
 		}
 		b := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1400, 1600, 1800, 2000},
 				Values:     []float64{3, 4, 5, 6},
 			},
 		}
 		tss := mergeTimeseries(a, b, bStart, ec)
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{2, 1, 3, 4, 5, 6},
 			},
@@ -321,14 +353,14 @@ func TestMergeTimeseries(t *testing.T) {
 	})
 	t.Run("non-empty-distinct-metric-names", func(t *testing.T) {
 		a := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1000, 1200},
 				Values:     []float64{2, 1},
 			},
 		}
 		a[0].MetricName.MetricGroup = []byte("bar")
 		b := []*timeseries{
-			&timeseries{
+			{
 				Timestamps: []int64{1400, 1600, 1800, 2000},
 				Values:     []float64{3, 4, 5, 6},
 			},
@@ -336,14 +368,14 @@ func TestMergeTimeseries(t *testing.T) {
 		b[0].MetricName.MetricGroup = []byte("foo")
 		tss := mergeTimeseries(a, b, bStart, ec)
 		tssExpected := []*timeseries{
-			&timeseries{
+			{
 				MetricName: storage.MetricName{
 					MetricGroup: []byte("foo"),
 				},
 				Timestamps: []int64{1000, 1200, 1400, 1600, 1800, 2000},
 				Values:     []float64{nan, nan, 3, 4, 5, 6},
 			},
-			&timeseries{
+			{
 				MetricName: storage.MetricName{
 					MetricGroup: []byte("bar"),
 				},
--- a/app/vmselect/promql/rollup_test.go
+++ b/app/vmselect/promql/rollup_test.go
@@ -10,6 +10,79 @@ var (
 	testTimestamps = []int64{5, 15, 24, 36, 49, 60, 78, 80, 97, 115, 120, 130}
 )

+func TestRollupIderivDuplicateTimestamps(t *testing.T) {
+	rfa := &rollupFuncArg{
+		values:     []float64{1, 2, 3, 4, 5},
+		timestamps: []int64{100, 100, 200, 300, 300},
+	}
+	n := rollupIderiv(rfa)
+	if n != 20 {
+		t.Fatalf("unexpected value; got %v; want %v", n, 20)
+	}
+
+	rfa = &rollupFuncArg{
+		values:     []float64{1, 2, 3, 4, 5},
+		timestamps: []int64{100, 100, 300, 300, 300},
+	}
+	n = rollupIderiv(rfa)
+	if n != 15 {
+		t.Fatalf("unexpected value; got %v; want %v", n, 15)
+	}
+
+	rfa = &rollupFuncArg{
+		prevValue:  nan,
+		values:     []float64{},
+		timestamps: []int64{},
+	}
+	n = rollupIderiv(rfa)
+	if !math.IsNaN(n) {
+		t.Fatalf("unexpected value; got %v; want %v", n, nan)
+	}
+
+	rfa = &rollupFuncArg{
+		prevValue:  nan,
+		values:     []float64{15},
+		timestamps: []int64{100},
+	}
+	n = rollupIderiv(rfa)
+	if !math.IsNaN(n) {
+		t.Fatalf("unexpected value; got %v; want %v", n, nan)
+	}
+
+	rfa = &rollupFuncArg{
+		prevTimestamp: 90,
+		prevValue:     10,
+		values:        []float64{15},
+		timestamps:    []int64{100},
+	}
+	n = rollupIderiv(rfa)
+	if n != 500 {
+		t.Fatalf("unexpected value; got %v; want %v", n, 0.5)
+	}
+
+	rfa = &rollupFuncArg{
+		prevTimestamp: 100,
+		prevValue:     10,
+		values:        []float64{15},
+		timestamps:    []int64{100},
+	}
+	n = rollupIderiv(rfa)
+	if n != inf {
+		t.Fatalf("unexpected value; got %v; want %v", n, inf)
+	}
+
+	rfa = &rollupFuncArg{
+		prevTimestamp: 100,
+		prevValue:     10,
+		values:        []float64{15, 20},
+		timestamps:    []int64{100, 100},
+	}
+	n = rollupIderiv(rfa)
+	if n != inf {
+		t.Fatalf("unexpected value; got %v; want %v", n, inf)
+	}
+}
+
 func TestRemoveCounterResets(t *testing.T) {
 	removeCounterResets(nil)

@@ -38,19 +111,19 @@ func TestDeltaValues(t *testing.T) {

 	values := []float64{123}
 	deltaValues(values)
-	valuesExpected := []float64{nan}
+	valuesExpected := []float64{0}
 	testRowsEqual(t, values, testTimestamps[:1], valuesExpected, testTimestamps[:1])

 	values = append([]float64{}, testValues...)
 	deltaValues(values)
-	valuesExpected = []float64{-89, 10, -23, 33, -20, 65, -87, 32, -12, 2, 0, nan}
+	valuesExpected = []float64{-89, 10, -23, 33, -20, 65, -87, 32, -12, 2, 0, 0}
 	testRowsEqual(t, values, testTimestamps, valuesExpected, testTimestamps)

 	// remove counter resets
 	values = append([]float64{}, testValues...)
 	removeCounterResets(values)
 	deltaValues(values)
-	valuesExpected = []float64{34, 10, 21, 33, 34, 65, 12, 32, 32, 2, 0, nan}
+	valuesExpected = []float64{34, 10, 21, 33, 34, 65, 12, 32, 32, 2, 0, 0}
 	testRowsEqual(t, values, testTimestamps, valuesExpected, testTimestamps)
 }

@@ -59,13 +132,13 @@ func TestDerivValues(t *testing.T) {

 	values := []float64{123}
 	derivValues(values, testTimestamps[:1])
-	valuesExpected := []float64{nan}
+	valuesExpected := []float64{0}
 	testRowsEqual(t, values, testTimestamps[:1], valuesExpected, testTimestamps[:1])

 	values = append([]float64{}, testValues...)
 	derivValues(values, testTimestamps)
 	valuesExpected = []float64{-8900, 1111.111111111111, -1916.6666666666665, 2538.461538461538, -1818.1818181818182, 3611.111111111111,
-		-43500, 1882.3529411764705, -666.6666666666666, 400, 0, nan}
+		-43500, 1882.3529411764705, -666.6666666666666, 400, 0, 0}
 	testRowsEqual(t, values, testTimestamps, valuesExpected, testTimestamps)

 	// remove counter resets
@@ -73,8 +146,15 @@ func TestDerivValues(t *testing.T) {
 	removeCounterResets(values)
 	derivValues(values, testTimestamps)
 	valuesExpected = []float64{3400, 1111.111111111111, 1750, 2538.461538461538, 3090.909090909091, 3611.111111111111,
-		6000, 1882.3529411764705, 1777.7777777777776, 400, 0, nan}
+		6000, 1882.3529411764705, 1777.7777777777776, 400, 0, 0}
 	testRowsEqual(t, values, testTimestamps, valuesExpected, testTimestamps)
+
+	// duplicate timestamps
+	values = []float64{1, 2, 3, 4, 5, 6, 7}
+	timestamps := []int64{100, 100, 200, 200, 300, 400, 400}
+	derivValues(values, timestamps)
+	valuesExpected = []float64{0, 20, 20, 20, 10, 10, 10}
+	testRowsEqual(t, values, timestamps, valuesExpected, timestamps)
 }

 func testRollupFunc(t *testing.T, funcName string, args []interface{}, meExpected *metricExpr, vExpected float64) {
@@ -102,7 +182,8 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, meExpecte
 				t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
 			}
 		} else {
-			if v != vExpected {
+			eps := math.Abs(v - vExpected)
+			if eps > 1e-14 {
 				t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
 			}
 		}
@@ -143,10 +224,10 @@ func TestRollupPredictLinear(t *testing.T) {
 		testRollupFunc(t, "predict_linear", args, &me, vExpected)
 	}

-	f(0e-3, 63.739757761102624)
-	f(50e-3, 50.39682764539959)
-	f(100e-3, 37.053897529696556)
-	f(200e-3, 10.368037298290488)
+	f(0e-3, 30.382432471845043)
+	f(50e-3, 17.03950235614201)
+	f(100e-3, 3.696572240438975)
+	f(200e-3, -22.989287990967092)
 }

 func TestRollupHoltWinters(t *testing.T) {
@@ -189,10 +270,11 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
 		testRollupFunc(t, funcName, args, &me, vExpected)
 	}

-	f("default_rollup", 123)
-	f("changes", 10)
+	f("default_rollup", 34)
+	f("changes", 11)
 	f("delta", -89)
-	f("deriv", -712)
+	f("deriv", -266.85860231406065)
+	f("deriv_fast", -712)
 	f("idelta", 0)
 	f("increase", 275)
 	f("irate", 0)
@@ -202,12 +284,16 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
 	f("min_over_time", 12)
 	f("max_over_time", 123)
 	f("sum_over_time", 565)
+	f("sum2_over_time", 37951)
+	f("geomean_over_time", 39.33466603189148)
 	f("count_over_time", 12)
 	f("stddev_over_time", 30.752935722554287)
 	f("stdvar_over_time", 945.7430555555555)
 	f("first_over_time", 123)
 	f("last_over_time", 34)
 	f("integrate", 61.0275)
+	f("distinct_over_time", 8)
+	f("ideriv", 0)
 }

 func TestRollupNewRollupFuncError(t *testing.T) {
@@ -259,7 +345,7 @@ func TestRollupNoWindowNoPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{nan, nan, nan, nan, 123}
+		valuesExpected := []float64{nan, nan, nan, nan, nan}
 		timestampsExpected := []int64{0, 1, 2, 3, 4}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -267,14 +353,14 @@ func TestRollupNoWindowNoPoints(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupDelta,
 			Start:  120,
-			End:    144,
+			End:    148,
 			Step:   4,
 			Window: 0,
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{2, 2, 0, 0, 0, nan, nan}
-		timestampsExpected := []int64{120, 124, 128, 132, 136, 140, 144}
+		valuesExpected := []float64{2, 0, 0, 0, nan, nan, nan, nan}
+		timestampsExpected := []int64{120, 124, 128, 132, 136, 140, 144, 148}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
 }
@@ -290,22 +376,22 @@ func TestRollupWindowNoPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{nan, nan, nan, nan, 123}
+		valuesExpected := []float64{nan, nan, nan, nan, nan}
 		timestampsExpected := []int64{0, 1, 2, 3, 4}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
 	t.Run("afterEnd", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupFirst,
-			Start:  141,
-			End:    171,
+			Start:  161,
+			End:    191,
 			Step:   10,
 			Window: 3,
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{34, nan, nan, nan}
-		timestampsExpected := []int64{141, 151, 161, 171}
+		valuesExpected := []float64{nan, nan, nan, nan}
+		timestampsExpected := []int64{161, 171, 181, 191}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
 }
@@ -315,14 +401,14 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupFirst,
 			Start:  0,
-			End:    20,
+			End:    25,
 			Step:   5,
 			Window: 0,
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{123, 123, 123, 123, 123}
-		timestampsExpected := []int64{0, 5, 10, 15, 20}
+		valuesExpected := []float64{nan, 123, 123, 123, 34, 34}
+		timestampsExpected := []int64{0, 5, 10, 15, 20, 25}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
 	t.Run("afterEnd", func(t *testing.T) {
@@ -335,7 +421,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{44, 34, 34, nan}
+		valuesExpected := []float64{12, 44, 34, nan}
 		timestampsExpected := []int64{100, 120, 140, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -349,7 +435,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{nan, 123, 54, 44, nan}
+		valuesExpected := []float64{nan, nan, 123, 54, 44}
 		timestampsExpected := []int64{-50, 0, 50, 100, 150}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -366,7 +452,7 @@ func TestRollupWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{123, 123, 34, 34, 44}
+		valuesExpected := []float64{nan, 123, 123, 34, 34}
 		timestampsExpected := []int64{0, 5, 10, 15, 20}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -380,7 +466,7 @@ func TestRollupWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{34, 34, nan, nan}
+		valuesExpected := []float64{44, 34, 34, nan}
 		timestampsExpected := []int64{100, 120, 140, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -394,7 +480,7 @@ func TestRollupWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{54, 44, nan, nan}
+		valuesExpected := []float64{nan, 54, 44, nan}
 		timestampsExpected := []int64{0, 50, 100, 150}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -411,7 +497,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{123, 21, 12, 34, nan}
+		valuesExpected := []float64{nan, 123, 21, 12, 34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -425,7 +511,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{4, 4, 3, 1, nan}
+		valuesExpected := []float64{nan, 4, 4, 3, 1}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -439,7 +525,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{21, 12, 32, 34, nan}
+		valuesExpected := []float64{nan, 21, 12, 32, 34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -453,7 +539,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{123, 99, 44, 34, nan}
+		valuesExpected := []float64{nan, 123, 99, 44, 34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -467,7 +553,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{222, 199, 110, 34, nan}
+		valuesExpected := []float64{nan, 222, 199, 110, 34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -481,7 +567,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{-102, -9, 22, 0, nan}
+		valuesExpected := []float64{nan, -102, -9, 22, 0}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -495,10 +581,66 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{33, -87, 0, nan}
+		valuesExpected := []float64{123, 33, -87, 0}
 		timestampsExpected := []int64{10, 50, 90, 130}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
+	t.Run("lifetime_1", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupLifetime,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 0,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.031, 0.044, 0.04, 0.01}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("lifetime_2", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupLifetime,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 200,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.031, 0.075, 0.115, 0.125}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("scrape_interval_1", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupScrapeInterval,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 0,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.010333333333333333, 0.011, 0.013333333333333334, 0.01}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("scrape_interval_2", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupScrapeInterval,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 80,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.010333333333333333, 0.010714285714285714, 0.012, 0.0125}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
 	t.Run("changes", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupChanges,
@@ -509,10 +651,24 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{3, 4, 3, 0, nan}
+		valuesExpected := []float64{nan, 4, 4, 3, 0}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
+	t.Run("changes_small_window", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupChanges,
+			Start:  0,
+			End:    45,
+			Step:   9,
+			Window: 9,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 1, 1, 1, 1, 0}
+		timestampsExpected := []int64{0, 9, 18, 27, 36, 45}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
 	t.Run("resets", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupResets,
@@ -523,7 +679,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{2, 2, 1, 0, nan}
+		valuesExpected := []float64{nan, 2, 2, 1, 0}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -537,13 +693,13 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{55.5, 49.75, 36.666666666666664, 34, nan}
+		valuesExpected := []float64{nan, 55.5, 49.75, 36.666666666666664, 34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
 	t.Run("deriv", func(t *testing.T) {
 		rc := rollupConfig{
-			Func:   rollupDeriv,
+			Func:   rollupDerivSlow,
 			Start:  0,
 			End:    160,
 			Step:   40,
@@ -551,7 +707,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{-3290.3225806451615, -204.54545454545456, 550, 0, nan}
+		valuesExpected := []float64{0, -2879.310344827587, 558.0608793686592, 422.84569138276544, 0}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -565,7 +721,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{-1916.6666666666665, -43500, 400, 0, nan}
+		valuesExpected := []float64{nan, -1916.6666666666665, -43500, 400, 0}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -579,7 +735,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{39.81519810323691, 32.080952292598795, 5.2493385826745405, 0, nan}
+		valuesExpected := []float64{nan, 39.81519810323691, 32.080952292598795, 5.2493385826745405, 5.830951894845301}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -593,11 +749,11 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{4.6035, 4.3934999999999995, 2.166, 0.34, nan}
+		valuesExpected := []float64{nan, 4.6035, 4.3934999999999995, 2.166, 0.34}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
-	t.Run("distinct", func(t *testing.T) {
+	t.Run("distinct_over_time_1", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupDistinct,
 			Start:  0,
@@ -607,7 +763,21 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{4, 4, 3, 1, nan}
+		valuesExpected := []float64{nan, 4, 4, 3, 1}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("distinct_over_time_2", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupDistinct,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 80,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 4, 7, 6, 3}
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
--- a/app/vmselect/promql/timeseries.go
+++ b/app/vmselect/promql/timeseries.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"sort"
 	"strconv"
+	"sync"
 	"unsafe"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -38,11 +39,13 @@ func (ts *timeseries) String() string {
 	return fmt.Sprintf("MetricName=%s, Values=%g, Timestamps=%d", &ts.MetricName, ts.Values, ts.Timestamps)
 }

-func (ts *timeseries) CopyFrom(src *timeseries) {
+func (ts *timeseries) CopyFromShallowTimestamps(src *timeseries) {
 	ts.Reset()
 	ts.MetricName.CopyFrom(&src.MetricName)
 	ts.Values = append(ts.Values[:0], src.Values...)
-	ts.Timestamps = append(ts.Timestamps[:0], src.Timestamps...)
+	ts.Timestamps = src.Timestamps
+
+	ts.denyReuse = true
 }

 func (ts *timeseries) CopyFromMetricNames(src *timeseries) {
@@ -59,7 +62,21 @@ func (ts *timeseries) CopyShallow(src *timeseries) {
 	ts.denyReuse = true
 }

-func marshalTimeseriesFast(tss []*timeseries, maxSize int, step int64) []byte {
+func getTimeseries() *timeseries {
+	if v := timeseriesPool.Get(); v != nil {
+		return v.(*timeseries)
+	}
+	return &timeseries{}
+}
+
+func putTimeseries(ts *timeseries) {
+	ts.Reset()
+	timeseriesPool.Put(ts)
+}
+
+var timeseriesPool sync.Pool
+
+func marshalTimeseriesFast(dst []byte, tss []*timeseries, maxSize int, step int64) []byte {
 	if len(tss) == 0 {
 		logger.Panicf("BUG: tss cannot be empty")
 	}
@@ -75,13 +92,13 @@ func marshalTimeseriesFast(tss []*timeseries, maxSize int, step int64) []byte {

 	if size > maxSize {
 		// Do not marshal tss, since it would occupy too much space
-		return nil
+		return dst
 	}

 	// Allocate the buffer for the marshaled tss before its' marshaling.
 	// This should reduce memory fragmentation and memory usage.
-	dst := make([]byte, 0, size)
-	dst = marshalFastTimestamps(dst, tss[0].Timestamps)
+	dst = bytesutil.Resize(dst, size)
+	dst = marshalFastTimestamps(dst[:0], tss[0].Timestamps)
 	for _, ts := range tss {
 		dst = ts.marshalFastNoTimestamps(dst)
 	}
--- a/app/vmselect/promql/timeseries_test.go
+++ b/app/vmselect/promql/timeseries_test.go
@@ -74,7 +74,7 @@ func TestTimeseriesMarshalUnmarshalFast(t *testing.T) {

 			tssOrig = append(tssOrig, &ts)
 		}
-		buf := marshalTimeseriesFast(tssOrig, 1e6, 123)
+		buf := marshalTimeseriesFast(nil, tssOrig, 1e6, 123)
 		tssGot, err := unmarshalTimeseriesFast(buf)
 		if err != nil {
 			t.Fatalf("error in unmarshalTimeseriesFast: %s", err)
--- a/app/vmselect/promql/transform.go
+++ b/app/vmselect/promql/transform.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"math"
 	"math/rand"
+	"regexp"
 	"sort"
 	"strconv"
 	"strings"
@@ -61,6 +62,8 @@ var transformFuncs = map[string]transformFunc{
 	"label_keep":         transformLabelKeep,
 	"label_copy":         transformLabelCopy,
 	"label_move":         transformLabelMove,
+	"label_transform":    transformLabelTransform,
+	"label_value":        transformLabelValue,
 	"union":              transformUnion,
 	"":                   transformUnion, // empty func is a synonim to union
 	"keep_last_value":    transformKeepLastValue,
@@ -123,7 +126,8 @@ func newTransformFuncOneArg(tf func(v float64) float64) transformFunc {
 }

 func doTransformValues(arg []*timeseries, tf func(values []float64), fe *funcExpr) ([]*timeseries, error) {
-	keepMetricGroup := transformFuncsKeepMetricGroup[fe.Name]
+	name := strings.ToLower(fe.Name)
+	keepMetricGroup := transformFuncsKeepMetricGroup[name]
 	for _, ts := range arg {
 		if !keepMetricGroup {
 			ts.MetricName.ResetMetricGroup()
@@ -294,21 +298,27 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
 		if err != nil {
 			continue
 		}
-		var dst timeseries
-		dst.CopyFrom(ts)
-		dst.MetricName.ResetMetricGroup()
-		dst.MetricName.RemoveTag("le")
-		bb.B = marshalMetricTagsSorted(bb.B[:0], &dst.MetricName)
+		ts.MetricName.ResetMetricGroup()
+		ts.MetricName.RemoveTag("le")
+		bb.B = marshalMetricTagsSorted(bb.B[:0], &ts.MetricName)
 		m[string(bb.B)] = append(m[string(bb.B)], x{
 			le: le,
-			ts: &dst,
+			ts: ts,
 		})
 	}
 	bbPool.Put(bb)

 	// Calculate quantile for each group in m
-	lastNonInf := func(xss []x) float64 {
-		for len(xss) > 0 && math.IsInf(xss[len(xss)-1].le, 0) {
+
+	lastNonInf := func(i int, xss []x) float64 {
+		for len(xss) > 0 {
+			xsLast := xss[len(xss)-1]
+			if xsLast.ts.Values[i] == 0 {
+				return nan
+			}
+			if !math.IsInf(xsLast.le, 0) {
+				break
+			}
 			xss = xss[:len(xss)-1]
 		}
 		if len(xss) == 0 {
@@ -317,42 +327,57 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
 		return xss[len(xss)-1].le
 	}
 	quantile := func(i int, phis []float64, xss []x) float64 {
-		vPrev := float64(0)
-		lePrev := float64(0)
 		phi := phis[i]
 		if math.IsNaN(phi) {
 			return nan
 		}
+		// Fix broken buckets.
+		// They are already sorted by le, so their values must be in ascending order,
+		// since the next bucket value includes all the previous buckets.
+		vPrev := float64(0)
+		for _, xs := range xss {
+			v := xs.ts.Values[i]
+			if math.IsNaN(v) || v < vPrev {
+				xs.ts.Values[i] = vPrev
+			} else {
+				vPrev = v
+			}
+		}
+		if len(xss) == 0 {
+			return nan
+		}
 		if phi < 0 {
 			return -inf
 		}
 		if phi > 1 {
 			return inf
 		}
-		vReq := xss[len(xss)-1].ts.Values[i] * phi
+		vLast := xss[len(xss)-1].ts.Values[i]
+		if vLast == 0 {
+			return nan
+		}
+		vReq := vLast * phi
+		vPrev = 0
+		lePrev := float64(0)
 		for _, xs := range xss {
 			v := xs.ts.Values[i]
 			le := xs.le
-			if v <= vPrev {
-				v = vPrev
-				le = lePrev
-			}
 			if v < vReq {
 				vPrev = v
 				lePrev = le
 				continue
 			}
 			if math.IsInf(le, 0) {
-				return lastNonInf(xss)
+				return lastNonInf(i, xss)
 			}
 			if v == vPrev {
 				return lePrev
 			}
 			return lePrev + (le-lePrev)*(vReq-vPrev)/(v-vPrev)
 		}
-		return lastNonInf(xss)
+		return lastNonInf(i, xss)
 	}
-	var rvs []*timeseries
+	rvs := make([]*timeseries, 0, len(m))
 	for _, xss := range m {
 		sort.Slice(xss, func(i, j int) bool {
 			return xss[i].le < xss[j].le
@@ -394,13 +419,6 @@ func runningAvg(a, b float64, idx int) float64 {
 	return a + (b-a)/float64(idx+1)
 }

-func keepLastValue(a, b float64, idx int) float64 {
-	if math.IsNaN(b) {
-		return a
-	}
-	return b
-}
-
 func skipLeadingNaNs(values []float64) []float64 {
 	i := 0
 	for i < len(values) && math.IsNaN(values[i]) {
@@ -641,9 +659,7 @@ func transformUnion(tfa *transformFuncArg) ([]*timeseries, error) {
 				continue
 			}
 			m[string(bb.B)] = true
-			var dst timeseries
-			dst.CopyFrom(ts)
-			rvs = append(rvs, &dst)
+			rvs = append(rvs, ts)
 		}
 	}
 	bbPool.Put(bb)
@@ -816,6 +832,31 @@ func transformLabelJoin(tfa *transformFuncArg) ([]*timeseries, error) {
 	return rvs, nil
 }

+func transformLabelTransform(tfa *transformFuncArg) ([]*timeseries, error) {
+	args := tfa.args
+	if err := expectTransformArgsNum(args, 4); err != nil {
+		return nil, err
+	}
+	label, err := getString(args[1], 1)
+	if err != nil {
+		return nil, err
+	}
+	regex, err := getString(args[2], 2)
+	if err != nil {
+		return nil, err
+	}
+	replacement, err := getString(args[3], 3)
+	if err != nil {
+		return nil, err
+	}
+
+	r, err := compileRegexp(regex)
+	if err != nil {
+		return nil, fmt.Errorf(`cannot compile regex %q: %s`, regex, err)
+	}
+	return labelReplace(args[0], label, r, label, replacement)
+}
+
 func transformLabelReplace(tfa *transformFuncArg) ([]*timeseries, error) {
 	args := tfa.args
 	if err := expectTransformArgsNum(args, 5); err != nil {
@@ -842,11 +883,12 @@ func transformLabelReplace(tfa *transformFuncArg) ([]*timeseries, error) {
 	if err != nil {
 		return nil, fmt.Errorf(`cannot compile regex %q: %s`, regex, err)
 	}
+	return labelReplace(args[0], srcLabel, r, dstLabel, replacement)
+}

+func labelReplace(tss []*timeseries, srcLabel string, r *regexp.Regexp, dstLabel, replacement string) ([]*timeseries, error) {
 	replacementBytes := []byte(replacement)
-
-	rvs := args[0]
-	for _, ts := range rvs {
+	for _, ts := range tss {
 		mn := &ts.MetricName
 		dstValue := getDstValue(mn, dstLabel)
 		srcValue := mn.GetTagValue(srcLabel)
@@ -856,6 +898,33 @@ func transformLabelReplace(tfa *transformFuncArg) ([]*timeseries, error) {
 			mn.RemoveTag(dstLabel)
 		}
 	}
+	return tss, nil
+}
+
+func transformLabelValue(tfa *transformFuncArg) ([]*timeseries, error) {
+	args := tfa.args
+	if err := expectTransformArgsNum(args, 2); err != nil {
+		return nil, err
+	}
+	labelName, err := getString(args[1], 1)
+	if err != nil {
+		return nil, fmt.Errorf("cannot get label name: %s", err)
+	}
+	rvs := args[0]
+	for _, ts := range rvs {
+		ts.MetricName.ResetMetricGroup()
+		labelValue := ts.MetricName.GetTagValue(labelName)
+		v, err := strconv.ParseFloat(string(labelValue), 64)
+		if err != nil {
+			v = nan
+		}
+		values := ts.Values
+		for i := range values {
+			values[i] = v
+		}
+	}
+	// Do not remove timeseries with only NaN values, so `default` could be applied to them:
+	// label_value(q, "label") default 123
 	return rvs, nil
 }

--- a/app/vmstorage/Makefile
+++ b/app/vmstorage/Makefile
@@ -0,0 +1,38 @@
+# All these commands must run from repository root.
+
+run-vmstorage:
+	mkdir -p vmstorage-data
+	DOCKER_OPTS='-v $(shell pwd)/vmstorage-data:/vmstorage-data' \
+	APP_NAME=vmstorage \
+	ARGS='-retentionPeriod=12' \
+	$(MAKE) run-via-docker
+
+vmstorage:
+	APP_NAME=vmstorage $(MAKE) app-local
+
+vmstorage-race:
+	APP_NAME=vmstorage RACE=-race $(MAKE) app-local
+
+vmstorage-prod:
+	APP_NAME=vmstorage $(MAKE) app-via-docker
+
+vmstorage-prod-race:
+	APP_NAME=vmstorage RACE=-race $(MAKE) app-via-docker
+
+vmstorage-pure:
+	APP_NAME=vmstorage $(MAKE) app-local-pure
+
+vmstorage-pure-prod:
+	APP_NAME=vmstorage APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
+
+package-vmstorage:
+	APP_NAME=vmstorage $(MAKE) package-via-docker
+
+package-vmstorage-race:
+	APP_NAME=vmstorage RACE=-race $(MAKE) package-via-docker
+
+publish-vmstorage:
+	APP_NAME=vmstorage $(MAKE) publish-via-docker
+
+publish-vmstorage-race:
+	APP_NAME=vmstorage RACE=-race $(MAKE) publish-via-docker
--- a/app/vmstorage/README.md
+++ b/app/vmstorage/README.md
@@ -1,5 +1,5 @@
 `vmstorage` performs the following tasks:

- Accepts inserts from `vminsert` and stores them to local storage.
+- Accepts inserts from `vminsert` nodes and stores them to local storage.

- Performs select requests from `vmselect`.
+- Performs select requests from `vmselect` nodes.
--- a/app/vmstorage/deployment/Dockerfile
+++ b/app/vmstorage/deployment/Dockerfile
@@ -0,0 +1,7 @@
+FROM scratch
+COPY --from=local/certs:1.0.2 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+COPY bin/vmstorage-prod .
+EXPOSE 8482
+EXPOSE 8400
+EXPOSE 8401
+ENTRYPOINT ["/vmstorage-prod"]
--- a/app/vmstorage/main.go
+++ b/app/vmstorage/main.go
@@ -1,4 +1,4 @@
-package vmstorage
+package main

 import (
 	"flag"
@@ -8,122 +8,88 @@ import (
 	"sync"
 	"time"

-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/transport"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/syncwg"
 	"github.com/VictoriaMetrics/metrics"
 )

 var (
+	httpListenAddr  = flag.String("httpListenAddr", ":8482", "Address to listen for http connections")
 	retentionPeriod = flag.Int("retentionPeriod", 1, "Retention period in months")
+	storageDataPath = flag.String("storageDataPath", "vmstorage-data", "Path to storage data")
+	vminsertAddr    = flag.String("vminsertAddr", ":8400", "TCP address to accept connections from vminsert services")
+	vmselectAddr    = flag.String("vmselectAddr", ":8401", "TCP address to accept connections from vmselect services")
 	snapshotAuthKey = flag.String("snapshotAuthKey", "", "authKey, which must be passed in query string to /snapshot* pages")
-
-	precisionBits = flag.Int("precisionBits", 64, "The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss")
-
-	// DataPath is a path to storage data.
-	DataPath = flag.String("storageDataPath", "victoria-metrics-data", "Path to storage data")
 )

-// Init initializes vmstorage.
-func Init() {
-	if err := encoding.CheckPrecisionBits(uint8(*precisionBits)); err != nil {
-		logger.Fatalf("invalid `-precisionBits`: %s", err)
-	}
-	logger.Infof("opening storage at %q with retention period %d months", *DataPath, *retentionPeriod)
+func main() {
+	flag.Parse()
+	buildinfo.Init()
+	logger.Init()
+
+	logger.Infof("opening storage at %q with retention period %d months", *storageDataPath, *retentionPeriod)
 	startTime := time.Now()
-	strg, err := storage.OpenStorage(*DataPath, *retentionPeriod)
+	strg, err := storage.OpenStorage(*storageDataPath, *retentionPeriod)
 	if err != nil {
-		logger.Fatalf("cannot open a storage at %s with retention period %d months: %s", *DataPath, *retentionPeriod, err)
+		logger.Fatalf("cannot open a storage at %s with retention period %d months: %s", *storageDataPath, *retentionPeriod, err)
 	}
-	Storage = strg

 	var m storage.Metrics
-	Storage.UpdateMetrics(&m)
+	strg.UpdateMetrics(&m)
 	tm := &m.TableMetrics
 	partsCount := tm.SmallPartsCount + tm.BigPartsCount
 	blocksCount := tm.SmallBlocksCount + tm.BigBlocksCount
 	rowsCount := tm.SmallRowsCount + tm.BigRowsCount
-	logger.Infof("successfully opened storage %q in %s; partsCount: %d; blocksCount: %d; rowsCount: %d",
-		*DataPath, time.Since(startTime), partsCount, blocksCount, rowsCount)
+	sizeBytes := tm.SmallSizeBytes + tm.BigSizeBytes
+	logger.Infof("successfully opened storage %q in %s; partsCount: %d; blocksCount: %d; rowsCount: %d; sizeBytes: %d",
+		*storageDataPath, time.Since(startTime), partsCount, blocksCount, rowsCount, sizeBytes)

-	registerStorageMetrics(Storage)
-}
+	registerStorageMetrics(strg)

-// Storage is a storage.
-//
-// Every storage call must be wrapped into WG.Add(1) ... WG.Done()
-// for proper graceful shutdown when Stop is called.
-var Storage *storage.Storage
+	srv, err := transport.NewServer(*vminsertAddr, *vmselectAddr, strg)
+	if err != nil {
+		logger.Fatalf("cannot create a server with vminsertAddr=%s, vmselectAddr=%s: %s", *vminsertAddr, *vmselectAddr, err)
+	}

-// WG must be incremented before Storage call.
-//
-// Use syncwg instead of sync, since Add is called from concurrent goroutines.
-var WG syncwg.WaitGroup
+	go srv.RunVMInsert()
+	go srv.RunVMSelect()

-// AddRows adds mrs to the storage.
-func AddRows(mrs []storage.MetricRow) error {
-	WG.Add(1)
-	err := Storage.AddRows(mrs, uint8(*precisionBits))
-	WG.Done()
-	return err
-}
+	requestHandler := newRequestHandler(strg)
+	go func() {
+		httpserver.Serve(*httpListenAddr, requestHandler)
+	}()

-// DeleteMetrics deletes metrics matching tfss.
-//
-// Returns the number of deleted metrics.
-func DeleteMetrics(tfss []*storage.TagFilters) (int, error) {
-	WG.Add(1)
-	n, err := Storage.DeleteMetrics(tfss)
-	WG.Done()
-	return n, err
-}
+	sig := procutil.WaitForSigterm()
+	logger.Infof("service received signal %s", sig)

-// SearchTagKeys searches for tag keys
-func SearchTagKeys(maxTagKeys int) ([]string, error) {
-	WG.Add(1)
-	keys, err := Storage.SearchTagKeys(maxTagKeys)
-	WG.Done()
-	return keys, err
-}
+	logger.Infof("gracefully shutting down the service")
+	startTime = time.Now()
+	srv.MustClose()
+	logger.Infof("successfully shut down the service in %s", time.Since(startTime))

-// SearchTagValues searches for tag values for the given tagKey
-func SearchTagValues(tagKey []byte, maxTagValues int) ([]string, error) {
-	WG.Add(1)
-	values, err := Storage.SearchTagValues(tagKey, maxTagValues)
-	WG.Done()
-	return values, err
-}
-
-// GetSeriesCount returns the number of time series in the storage.
-func GetSeriesCount() (uint64, error) {
-	WG.Add(1)
-	n, err := Storage.GetSeriesCount()
-	WG.Done()
-	return n, err
-}
-
-// Stop stops the vmstorage
-func Stop() {
-	logger.Infof("gracefully closing the storage at %s", *DataPath)
-	startTime := time.Now()
-	WG.WaitAndBlock()
-	Storage.MustClose()
+	logger.Infof("gracefully closing the storage at %s", *storageDataPath)
+	startTime = time.Now()
+	strg.MustClose()
 	logger.Infof("successfully closed the storage in %s", time.Since(startTime))

-	logger.Infof("the storage has been stopped")
+	fs.MustStopDirRemover()
+
+	logger.Infof("the vmstorage has been stopped")
 }

-// RequestHandler is a storage request handler.
-func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
-	path := r.URL.Path
-	prometheusCompatibleResponse := false
-	if path == "/api/v1/admin/tsdb/snapshot" {
-		// Handle Prometheus API - https://prometheus.io/docs/prometheus/latest/querying/api/#snapshot .
-		prometheusCompatibleResponse = true
-		path = "/snapshot/create"
+func newRequestHandler(strg *storage.Storage) httpserver.RequestHandler {
+	return func(w http.ResponseWriter, r *http.Request) bool {
+		return requestHandler(w, r, strg)
 	}
+}
+
+func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storage) bool {
+	path := r.URL.Path
 	if !strings.HasPrefix(path, "/snapshot") {
 		return false
 	}
@@ -137,22 +103,18 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 	switch path {
 	case "/create":
 		w.Header().Set("Content-Type", "application/json")
-		snapshotPath, err := Storage.CreateSnapshot()
+		snapshotPath, err := strg.CreateSnapshot()
 		if err != nil {
 			msg := fmt.Sprintf("cannot create snapshot: %s", err)
 			logger.Errorf("%s", msg)
 			fmt.Fprintf(w, `{"status":"error","msg":%q}`, msg)
 			return true
 		}
-		if prometheusCompatibleResponse {
-			fmt.Fprintf(w, `{"status":"success","data":{"name":%q}}`, snapshotPath)
-		} else {
-			fmt.Fprintf(w, `{"status":"ok","snapshot":%q}`, snapshotPath)
-		}
+		fmt.Fprintf(w, `{"status":"ok","snapshot":%q}`, snapshotPath)
 		return true
 	case "/list":
 		w.Header().Set("Content-Type", "application/json")
-		snapshots, err := Storage.ListSnapshots()
+		snapshots, err := strg.ListSnapshots()
 		if err != nil {
 			msg := fmt.Sprintf("cannot list snapshots: %s", err)
 			logger.Errorf("%s", msg)
@@ -171,7 +133,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 	case "/delete":
 		w.Header().Set("Content-Type", "application/json")
 		snapshotName := r.FormValue("snapshot")
-		if err := Storage.DeleteSnapshot(snapshotName); err != nil {
+		if err := strg.DeleteSnapshot(snapshotName); err != nil {
 			msg := fmt.Sprintf("cannot delete snapshot %q: %s", snapshotName, err)
 			logger.Errorf("%s", msg)
 			fmt.Fprintf(w, `{"status":"error","msg":%q}`, msg)
@@ -181,7 +143,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 		return true
 	case "/delete_all":
 		w.Header().Set("Content-Type", "application/json")
-		snapshots, err := Storage.ListSnapshots()
+		snapshots, err := strg.ListSnapshots()
 		if err != nil {
 			msg := fmt.Sprintf("cannot list snapshots: %s", err)
 			logger.Errorf("%s", msg)
@@ -189,7 +151,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 			return true
 		}
 		for _, snapshotName := range snapshots {
-			if err := Storage.DeleteSnapshot(snapshotName); err != nil {
+			if err := strg.DeleteSnapshot(snapshotName); err != nil {
 				msg := fmt.Sprintf("cannot delete snapshot %q: %s", snapshotName, err)
 				logger.Errorf("%s", msg)
 				fmt.Fprintf(w, `{"status":"error","msg":%q}`, msg)
@@ -285,6 +247,18 @@ func registerStorageMetrics(strg *storage.Storage) {
 	metrics.NewGauge(`vm_missing_tsids_for_metric_id_total`, func() float64 {
 		return float64(idbm().MissingTSIDsForMetricID)
 	})
+	metrics.NewGauge(`vm_recent_hour_metric_ids_search_calls_total`, func() float64 {
+		return float64(idbm().RecentHourMetricIDsSearchCalls)
+	})
+	metrics.NewGauge(`vm_recent_hour_metric_ids_search_hits_total`, func() float64 {
+		return float64(idbm().RecentHourMetricIDsSearchHits)
+	})
+	metrics.NewGauge(`vm_date_metric_ids_search_calls_total`, func() float64 {
+		return float64(idbm().DateMetricIDsSearchCalls)
+	})
+	metrics.NewGauge(`vm_date_metric_ids_search_hits_total`, func() float64 {
+		return float64(idbm().DateMetricIDsSearchHits)
+	})

 	metrics.NewGauge(`vm_assisted_merges_total{type="storage/small"}`, func() float64 {
 		return float64(tm().SmallAssistedMerges)
@@ -320,6 +294,39 @@ func registerStorageMetrics(strg *storage.Storage) {
 		return float64(idbm().BlocksCount)
 	})

+	metrics.NewGauge(`vm_data_size_bytes{type="storage/big"}`, func() float64 {
+		return float64(tm().BigSizeBytes)
+	})
+	metrics.NewGauge(`vm_data_size_bytes{type="storage/small"}`, func() float64 {
+		return float64(tm().SmallSizeBytes)
+	})
+	metrics.NewGauge(`vm_data_size_bytes{type="indexdb"}`, func() float64 {
+		return float64(idbm().SizeBytes)
+	})
+
+	metrics.NewGauge(`vm_rows_ignored_total{reason="big_timestamp"}`, func() float64 {
+		return float64(m().TooBigTimestampRows)
+	})
+	metrics.NewGauge(`vm_rows_ignored_total{reason="small_timestamp"}`, func() float64 {
+		return float64(m().TooSmallTimestampRows)
+	})
+
+	metrics.NewGauge(`vm_concurrent_addrows_limit_reached_total`, func() float64 {
+		return float64(m().AddRowsConcurrencyLimitReached)
+	})
+	metrics.NewGauge(`vm_concurrent_addrows_limit_timeout_total`, func() float64 {
+		return float64(m().AddRowsConcurrencyLimitTimeout)
+	})
+	metrics.NewGauge(`vm_concurrent_addrows_dropped_rows_total`, func() float64 {
+		return float64(m().AddRowsConcurrencyDroppedRows)
+	})
+	metrics.NewGauge(`vm_concurrent_addrows_capacity`, func() float64 {
+		return float64(m().AddRowsConcurrencyCapacity)
+	})
+	metrics.NewGauge(`vm_concurrent_addrows_current`, func() float64 {
+		return float64(m().AddRowsConcurrencyCurrent)
+	})
+
 	metrics.NewGauge(`vm_rows{type="storage/big"}`, func() float64 {
 		return float64(tm().BigRowsCount)
 	})
@@ -342,6 +349,9 @@ func registerStorageMetrics(strg *storage.Storage) {
 	metrics.NewGauge(`vm_cache_entries{type="storage/date_metricID"}`, func() float64 {
 		return float64(m().DateMetricIDCacheSize)
 	})
+	metrics.NewGauge(`vm_cache_entries{type="storage/hour_metric_ids"}`, func() float64 {
+		return float64(m().HourMetricIDCacheSize)
+	})
 	metrics.NewGauge(`vm_cache_entries{type="storage/bigIndexBlocks"}`, func() float64 {
 		return float64(tm().BigIndexBlocksCacheSize)
 	})
@@ -357,24 +367,30 @@ func registerStorageMetrics(strg *storage.Storage) {
 	metrics.NewGauge(`vm_cache_entries{type="indexdb/tagFilters"}`, func() float64 {
 		return float64(idbm().TagCacheSize)
 	})
+	metrics.NewGauge(`vm_cache_entries{type="indexdb/uselessTagFilters"}`, func() float64 {
+		return float64(idbm().UselessTagFiltersCacheSize)
+	})
 	metrics.NewGauge(`vm_cache_entries{type="storage/regexps"}`, func() float64 {
 		return float64(storage.RegexpCacheSize())
 	})

 	metrics.NewGauge(`vm_cache_size_bytes{type="storage/tsid"}`, func() float64 {
-		return float64(m().TSIDCacheBytesSize)
+		return float64(m().TSIDCacheSizeBytes)
 	})
 	metrics.NewGauge(`vm_cache_size_bytes{type="storage/metricIDs"}`, func() float64 {
-		return float64(m().MetricIDCacheBytesSize)
+		return float64(m().MetricIDCacheSizeBytes)
 	})
 	metrics.NewGauge(`vm_cache_size_bytes{type="storage/metricName"}`, func() float64 {
-		return float64(m().MetricNameCacheBytesSize)
+		return float64(m().MetricNameCacheSizeBytes)
 	})
 	metrics.NewGauge(`vm_cache_size_bytes{type="storage/date_metricID"}`, func() float64 {
-		return float64(m().DateMetricIDCacheBytesSize)
+		return float64(m().DateMetricIDCacheSizeBytes)
 	})
 	metrics.NewGauge(`vm_cache_size_bytes{type="indexdb/tagFilters"}`, func() float64 {
-		return float64(idbm().TagCacheBytesSize)
+		return float64(idbm().TagCacheSizeBytes)
+	})
+	metrics.NewGauge(`vm_cache_size_bytes{type="indexdb/uselessTagFilters"}`, func() float64 {
+		return float64(idbm().UselessTagFiltersCacheSizeBytes)
 	})

 	metrics.NewGauge(`vm_cache_requests_total{type="storage/tsid"}`, func() float64 {
@@ -404,6 +420,9 @@ func registerStorageMetrics(strg *storage.Storage) {
 	metrics.NewGauge(`vm_cache_requests_total{type="indexdb/tagFilters"}`, func() float64 {
 		return float64(idbm().TagCacheRequests)
 	})
+	metrics.NewGauge(`vm_cache_requests_total{type="indexdb/uselessTagFilters"}`, func() float64 {
+		return float64(idbm().UselessTagFiltersCacheRequests)
+	})
 	metrics.NewGauge(`vm_cache_requests_total{type="storage/regexps"}`, func() float64 {
 		return float64(storage.RegexpCacheRequests())
 	})
@@ -435,6 +454,9 @@ func registerStorageMetrics(strg *storage.Storage) {
 	metrics.NewGauge(`vm_cache_misses_total{type="indexdb/tagFilters"}`, func() float64 {
 		return float64(idbm().TagCacheMisses)
 	})
+	metrics.NewGauge(`vm_cache_misses_total{type="indexdb/uselessTagFilters"}`, func() float64 {
+		return float64(idbm().UselessTagFiltersCacheMisses)
+	})
 	metrics.NewGauge(`vm_cache_misses_total{type="storage/regexps"}`, func() float64 {
 		return float64(storage.RegexpCacheMisses())
 	})
--- a/app/vmstorage/transport/server.go
+++ b/app/vmstorage/transport/server.go
@@ -0,0 +1,816 @@
+package transport
+
+import (
+	"flag"
+	"fmt"
+	"io"
+	"net"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/consts"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/metrics"
+)
+
+var (
+	maxTagKeysPerSearch   = flag.Int("search.maxTagKeys", 100e3, "The maximum number of tag keys returned per search")
+	maxTagValuesPerSearch = flag.Int("search.maxTagValues", 100e3, "The maximum number of tag values returned per search")
+	maxMetricsPerSearch   = flag.Int("search.maxUniqueTimeseries", 300e3, "The maximum number of unique time series each search can scan")
+
+	precisionBits         = flag.Int("precisionBits", 64, "The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss")
+	disableRPCCompression = flag.Bool(`rpc.disableCompression`, false, "Disable compression of RPC traffic. This reduces CPU usage at the cost of higher network bandwidth usage")
+)
+
+// Server processes connections from vminsert and vmselect.
+type Server struct {
+	storage *storage.Storage
+
+	vminsertLN net.Listener
+	vmselectLN net.Listener
+
+	vminsertWG sync.WaitGroup
+	vmselectWG sync.WaitGroup
+
+	vminsertConnsMap connsMap
+	vmselectConnsMap connsMap
+
+	stopFlag uint64
+}
+
+type connsMap struct {
+	mu sync.Mutex
+	m  map[net.Conn]struct{}
+}
+
+func (cm *connsMap) Init() {
+	cm.m = make(map[net.Conn]struct{})
+}
+
+func (cm *connsMap) Add(c net.Conn) {
+	cm.mu.Lock()
+	cm.m[c] = struct{}{}
+	cm.mu.Unlock()
+}
+
+func (cm *connsMap) Delete(c net.Conn) {
+	cm.mu.Lock()
+	delete(cm.m, c)
+	cm.mu.Unlock()
+}
+
+func (cm *connsMap) CloseAll() {
+	cm.mu.Lock()
+	for c := range cm.m {
+		_ = c.Close()
+	}
+	cm.mu.Unlock()
+}
+
+// NewServer returns new Server.
+func NewServer(vminsertAddr, vmselectAddr string, storage *storage.Storage) (*Server, error) {
+	vminsertLN, err := netutil.NewTCPListener("vminsert", vminsertAddr)
+	if err != nil {
+		return nil, fmt.Errorf("unable to listen vminsertAddr %s: %s", vminsertAddr, err)
+	}
+	vmselectLN, err := netutil.NewTCPListener("vmselect", vmselectAddr)
+	if err != nil {
+		return nil, fmt.Errorf("unable to listen vmselectAddr %s: %s", vmselectAddr, err)
+	}
+	if err := encoding.CheckPrecisionBits(uint8(*precisionBits)); err != nil {
+		return nil, fmt.Errorf("invalid -precisionBits: %s", err)
+	}
+
+	// Set network-level write timeouts to reasonable values in order to protect
+	// from broken networks.
+	// Do not set read timeouts, since they are managed separately -
+	// search for SetReadDeadline in this file.
+	vminsertLN.WriteTimeout = time.Minute
+	vmselectLN.WriteTimeout = time.Minute
+
+	s := &Server{
+		storage: storage,
+
+		vminsertLN: vminsertLN,
+		vmselectLN: vmselectLN,
+	}
+	s.vminsertConnsMap.Init()
+	s.vmselectConnsMap.Init()
+	return s, nil
+}
+
+// RunVMInsert runs a server accepting connections from vminsert.
+func (s *Server) RunVMInsert() {
+	logger.Infof("accepting vminsert conns at %s", s.vminsertLN.Addr())
+	for {
+		c, err := s.vminsertLN.Accept()
+		if err != nil {
+			if pe, ok := err.(net.Error); ok && pe.Temporary() {
+				continue
+			}
+			if s.isStopping() {
+				return
+			}
+			logger.Panicf("FATAL: cannot process vminsert conns at %s: %s", s.vminsertLN.Addr(), err)
+		}
+		logger.Infof("accepted vminsert conn from %s", c.RemoteAddr())
+
+		vminsertConns.Inc()
+		s.vminsertConnsMap.Add(c)
+		s.vminsertWG.Add(1)
+		go func() {
+			defer func() {
+				s.vminsertConnsMap.Delete(c)
+				vminsertConns.Dec()
+				s.vminsertWG.Done()
+			}()
+
+			// There is no need in response compression, since
+			// vmstorage doesn't send anything back to vminsert.
+			compressionLevel := 0
+			bc, err := handshake.VMInsertServer(c, compressionLevel)
+			if err != nil {
+				if s.isStopping() {
+					// c is stopped inside Server.MustClose
+					return
+				}
+				logger.Errorf("cannot perform vminsert handshake with client %q: %s", c.RemoteAddr(), err)
+				_ = c.Close()
+				return
+			}
+			defer func() {
+				if !s.isStopping() {
+					logger.Infof("closing vminsert conn from %s", c.RemoteAddr())
+				}
+				_ = bc.Close()
+			}()
+
+			logger.Infof("processing vminsert conn from %s", c.RemoteAddr())
+			if err := s.processVMInsertConn(bc); err != nil {
+				if s.isStopping() {
+					return
+				}
+				vminsertConnErrors.Inc()
+				logger.Errorf("cannot process vminsert conn from %s: %s", c.RemoteAddr(), err)
+			}
+		}()
+	}
+}
+
+var (
+	vminsertConns      = metrics.NewCounter("vm_vminsert_conns")
+	vminsertConnErrors = metrics.NewCounter("vm_vminsert_conn_errors_total")
+)
+
+// RunVMSelect runs a server accepting connections from vmselect.
+func (s *Server) RunVMSelect() {
+	logger.Infof("accepting vmselect conns at %s", s.vmselectLN.Addr())
+	for {
+		c, err := s.vmselectLN.Accept()
+		if err != nil {
+			if pe, ok := err.(net.Error); ok && pe.Temporary() {
+				continue
+			}
+			if s.isStopping() {
+				return
+			}
+			logger.Panicf("FATAL: cannot process vmselect conns at %s: %s", s.vmselectLN.Addr(), err)
+		}
+		logger.Infof("accepted vmselect conn from %s", c.RemoteAddr())
+
+		vmselectConns.Inc()
+		s.vmselectConnsMap.Add(c)
+		s.vmselectWG.Add(1)
+		go func() {
+			defer func() {
+				s.vmselectConnsMap.Delete(c)
+				vmselectConns.Dec()
+				s.vmselectWG.Done()
+			}()
+
+			// Compress responses to vmselect even if they already contain compressed blocks.
+			// Responses contain uncompressed metric names, which should compress well
+			// when the response contains high number of time series.
+			// Additionally, recently added metric blocks are usually uncompressed, so the compression
+			// should save network bandwidth.
+			compressionLevel := 1
+			if *disableRPCCompression {
+				compressionLevel = 0
+			}
+			bc, err := handshake.VMSelectServer(c, compressionLevel)
+			if err != nil {
+				if s.isStopping() {
+					// c is closed inside Server.MustClose
+					return
+				}
+				logger.Errorf("cannot perform vmselect handshake with client %q: %s", c.RemoteAddr(), err)
+				_ = c.Close()
+				return
+			}
+
+			defer func() {
+				if !s.isStopping() {
+					logger.Infof("closing vmselect conn from %s", c.RemoteAddr())
+				}
+				_ = bc.Close()
+			}()
+
+			logger.Infof("processing vmselect conn from %s", c.RemoteAddr())
+			if err := s.processVMSelectConn(bc); err != nil {
+				if s.isStopping() {
+					return
+				}
+				vmselectConnErrors.Inc()
+				logger.Errorf("cannot process vmselect conn %s: %s", c.RemoteAddr(), err)
+			}
+		}()
+	}
+}
+
+var (
+	vmselectConns      = metrics.NewCounter("vm_vmselect_conns")
+	vmselectConnErrors = metrics.NewCounter("vm_vmselect_conn_errors_total")
+)
+
+// MustClose gracefully closes the server,
+// so it no longer touches s.storage after returning.
+func (s *Server) MustClose() {
+	// Mark the server as stoping.
+	s.setIsStopping()
+
+	// Stop accepting new connections from vminsert and vmselect.
+	if err := s.vminsertLN.Close(); err != nil {
+		logger.Panicf("FATAL: cannot close vminsert listener: %s", err)
+	}
+	if err := s.vmselectLN.Close(); err != nil {
+		logger.Panicf("FATAL: cannot close vmselect listener: %s", err)
+	}
+
+	// Close existing connections from vminsert, so the goroutines
+	// processing these connections are finished.
+	s.vminsertConnsMap.CloseAll()
+
+	// Close existing connections from vmselect, so the goroutines
+	// processing these connections are finished.
+	s.vmselectConnsMap.CloseAll()
+
+	// Wait until all the goroutines processing vminsert and vmselect conns
+	// are finished.
+	s.vminsertWG.Wait()
+	s.vmselectWG.Wait()
+}
+
+func (s *Server) setIsStopping() {
+	atomic.StoreUint64(&s.stopFlag, 1)
+}
+
+func (s *Server) isStopping() bool {
+	return atomic.LoadUint64(&s.stopFlag) != 0
+}
+
+func (s *Server) processVMInsertConn(r io.Reader) error {
+	sizeBuf := make([]byte, 8)
+	var buf []byte
+	var mrs []storage.MetricRow
+	for {
+		if _, err := io.ReadFull(r, sizeBuf); err != nil {
+			if err == io.EOF {
+				// Remote end gracefully closed the connection.
+				return nil
+			}
+			return fmt.Errorf("cannot read packet size: %s", err)
+		}
+		packetSize := encoding.UnmarshalUint64(sizeBuf)
+		if packetSize > consts.MaxInsertPacketSize {
+			return fmt.Errorf("too big packet size: %d; shouldn't exceed %d", packetSize, consts.MaxInsertPacketSize)
+		}
+		buf = bytesutil.Resize(buf, int(packetSize))
+		if _, err := io.ReadFull(r, buf); err != nil {
+			return fmt.Errorf("cannot read packet with size %d: %s", packetSize, err)
+		}
+		vminsertPacketsRead.Inc()
+
+		// Read metric rows from the packet.
+		mrs = mrs[:0]
+		tail := buf
+		for len(tail) > 0 {
+			if len(mrs) < cap(mrs) {
+				mrs = mrs[:len(mrs)+1]
+			} else {
+				mrs = append(mrs, storage.MetricRow{})
+			}
+			mr := &mrs[len(mrs)-1]
+			var err error
+			tail, err = mr.Unmarshal(tail)
+			if err != nil {
+				return fmt.Errorf("cannot unmarshal MetricRow: %s", err)
+			}
+		}
+		vminsertMetricsRead.Add(len(mrs))
+		if err := s.storage.AddRows(mrs, uint8(*precisionBits)); err != nil {
+			return fmt.Errorf("cannot store metrics: %s", err)
+		}
+	}
+}
+
+var (
+	vminsertPacketsRead = metrics.NewCounter("vm_vminsert_packets_read_total")
+	vminsertMetricsRead = metrics.NewCounter("vm_vminsert_metrics_read_total")
+)
+
+func (s *Server) processVMSelectConn(bc *handshake.BufferedConn) error {
+	ctx := &vmselectRequestCtx{
+		bc:      bc,
+		sizeBuf: make([]byte, 8),
+	}
+	for {
+		err := s.processVMSelectRequest(ctx)
+		n := atomic.LoadUint64(&ctx.sr.MissingMetricNamesForMetricID)
+		missingMetricNamesForMetricID.Add(int(n))
+		if err != nil {
+			if err == io.EOF {
+				// Remote client gracefully closed the connection.
+				return nil
+			}
+			return fmt.Errorf("cannot process vmselect request: %s", err)
+		}
+		if err := bc.Flush(); err != nil {
+			return fmt.Errorf("cannot flush compressed buffers: %s", err)
+		}
+	}
+}
+
+var missingMetricNamesForMetricID = metrics.NewCounter(`vm_missing_metric_names_for_metric_id_total`)
+
+type vmselectRequestCtx struct {
+	bc      *handshake.BufferedConn
+	sizeBuf []byte
+	dataBuf []byte
+
+	sq   storage.SearchQuery
+	tfss []*storage.TagFilters
+	sr   storage.Search
+}
+
+func (ctx *vmselectRequestCtx) readUint32() (uint32, error) {
+	ctx.sizeBuf = bytesutil.Resize(ctx.sizeBuf, 4)
+	if _, err := io.ReadFull(ctx.bc, ctx.sizeBuf); err != nil {
+		if err == io.EOF {
+			return 0, err
+		}
+		return 0, fmt.Errorf("cannot read uint32: %s", err)
+	}
+	n := encoding.UnmarshalUint32(ctx.sizeBuf)
+	return n, nil
+}
+
+func (ctx *vmselectRequestCtx) readDataBufBytes(maxDataSize int) error {
+	ctx.sizeBuf = bytesutil.Resize(ctx.sizeBuf, 8)
+	if _, err := io.ReadFull(ctx.bc, ctx.sizeBuf); err != nil {
+		if err == io.EOF {
+			return err
+		}
+		return fmt.Errorf("cannot read data size: %s", err)
+	}
+	dataSize := encoding.UnmarshalUint64(ctx.sizeBuf)
+	if dataSize > uint64(maxDataSize) {
+		return fmt.Errorf("too big data size: %d; it mustn't exceed %d bytes", dataSize, maxDataSize)
+	}
+	ctx.dataBuf = bytesutil.Resize(ctx.dataBuf, int(dataSize))
+	if dataSize == 0 {
+		return nil
+	}
+	if _, err := io.ReadFull(ctx.bc, ctx.dataBuf); err != nil {
+		return fmt.Errorf("cannot read data with size %d: %s", dataSize, err)
+	}
+	return nil
+}
+
+func (ctx *vmselectRequestCtx) readBool() (bool, error) {
+	ctx.dataBuf = bytesutil.Resize(ctx.dataBuf, 1)
+	if _, err := io.ReadFull(ctx.bc, ctx.dataBuf); err != nil {
+		if err == io.EOF {
+			return false, err
+		}
+		return false, fmt.Errorf("cannot read bool: %s", err)
+	}
+	v := ctx.dataBuf[0] != 0
+	return v, nil
+}
+
+func (ctx *vmselectRequestCtx) writeDataBufBytes() error {
+	if err := ctx.writeUint64(uint64(len(ctx.dataBuf))); err != nil {
+		return fmt.Errorf("cannot write data size: %s", err)
+	}
+	if len(ctx.dataBuf) == 0 {
+		return nil
+	}
+	if _, err := ctx.bc.Write(ctx.dataBuf); err != nil {
+		return fmt.Errorf("cannot write data with size %d: %s", len(ctx.dataBuf), err)
+	}
+	return nil
+}
+
+func (ctx *vmselectRequestCtx) writeString(s string) error {
+	ctx.dataBuf = append(ctx.dataBuf[:0], s...)
+	return ctx.writeDataBufBytes()
+}
+
+func (ctx *vmselectRequestCtx) writeUint64(n uint64) error {
+	ctx.sizeBuf = encoding.MarshalUint64(ctx.sizeBuf[:0], n)
+	if _, err := ctx.bc.Write(ctx.sizeBuf); err != nil {
+		return fmt.Errorf("cannot write uint64 %d: %s", n, err)
+	}
+	return nil
+}
+
+const maxRPCNameSize = 128
+
+var zeroTime time.Time
+
+func (s *Server) processVMSelectRequest(ctx *vmselectRequestCtx) error {
+	// Read rpcName
+	// Do not set deadline on reading rpcName, since it may take a
+	// lot of time for idle connection.
+	if err := ctx.readDataBufBytes(maxRPCNameSize); err != nil {
+		if err == io.EOF {
+			// Remote client gracefully closed the connection.
+			return err
+		}
+		return fmt.Errorf("cannot read rpcName: %s", err)
+	}
+
+	// Limit the time required for reading request args.
+	if err := ctx.bc.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
+		return fmt.Errorf("cannot set read deadline for reading request args: %s", err)
+	}
+	defer func() {
+		_ = ctx.bc.SetReadDeadline(zeroTime)
+	}()
+
+	switch string(ctx.dataBuf) {
+	case "search_v3":
+		return s.processVMSelectSearchQuery(ctx)
+	case "labelValues":
+		return s.processVMSelectLabelValues(ctx)
+	case "labelEntries":
+		return s.processVMSelectLabelEntries(ctx)
+	case "labels":
+		return s.processVMSelectLabels(ctx)
+	case "seriesCount":
+		return s.processVMSelectSeriesCount(ctx)
+	case "deleteMetrics_v2":
+		return s.processVMSelectDeleteMetrics(ctx)
+	default:
+		return fmt.Errorf("unsupported rpcName: %q", ctx.dataBuf)
+	}
+}
+
+const maxTagFiltersSize = 64 * 1024
+
+func (s *Server) processVMSelectDeleteMetrics(ctx *vmselectRequestCtx) error {
+	vmselectDeleteMetricsRequests.Inc()
+
+	// Read request
+	if err := ctx.readDataBufBytes(maxTagFiltersSize); err != nil {
+		return fmt.Errorf("cannot read labelName: %s", err)
+	}
+	tail, err := ctx.sq.Unmarshal(ctx.dataBuf)
+	if err != nil {
+		return fmt.Errorf("cannot unmarshal SearchQuery: %s", err)
+	}
+	if len(tail) > 0 {
+		return fmt.Errorf("unexpected non-zero tail left after unmarshaling SearchQuery: (len=%d) %q", len(tail), tail)
+	}
+
+	// Setup ctx.tfss
+	if err := ctx.setupTfss(); err != nil {
+		// Send the error message to vmselect.
+		errMsg := err.Error()
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Delete the given metrics.
+	deletedCount, err := s.storage.DeleteMetrics(ctx.tfss)
+	if err != nil {
+		if err := ctx.writeString(err.Error()); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Send an empty error message to vmselect.
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send empty error message: %s", err)
+	}
+	// Send deletedCount to vmselect.
+	if err := ctx.writeUint64(uint64(deletedCount)); err != nil {
+		return fmt.Errorf("cannot send deletedCount=%d: %s", deletedCount, err)
+	}
+	return nil
+}
+
+func (s *Server) processVMSelectLabels(ctx *vmselectRequestCtx) error {
+	vmselectLabelsRequests.Inc()
+
+	// Read request
+	accountID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read accountID: %s", err)
+	}
+	projectID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read projectID: %s", err)
+	}
+
+	// Search for tag keys
+	labels, err := s.storage.SearchTagKeys(accountID, projectID, *maxTagKeysPerSearch)
+	if err != nil {
+		// Send the error message to vmselect.
+		errMsg := fmt.Sprintf("error during labels search: %s", err)
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Send an empty error message to vmselect.
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send empty error message: %s", err)
+	}
+
+	// Send labels to vmselect
+	for _, label := range labels {
+		if len(label) == 0 {
+			// Do this substitution in order to prevent clashing with 'end of response' marker.
+			label = "__name__"
+		}
+		if err := ctx.writeString(label); err != nil {
+			return fmt.Errorf("cannot write label %q: %s", label, err)
+		}
+	}
+
+	// Send 'end of response' marker
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send 'end of response' marker")
+	}
+	return nil
+}
+
+const maxLabelValueSize = 16 * 1024
+
+func (s *Server) processVMSelectLabelValues(ctx *vmselectRequestCtx) error {
+	vmselectLabelValuesRequests.Inc()
+
+	// Read request
+	accountID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read accountID: %s", err)
+	}
+	projectID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read projectID: %s", err)
+	}
+	if err := ctx.readDataBufBytes(maxLabelValueSize); err != nil {
+		return fmt.Errorf("cannot read labelName: %s", err)
+	}
+	labelName := ctx.dataBuf
+
+	// Search for tag values
+	labelValues, err := s.storage.SearchTagValues(accountID, projectID, labelName, *maxTagValuesPerSearch)
+	if err != nil {
+		// Send the error message to vmselect.
+		errMsg := fmt.Sprintf("error during label values search for labelName=%q: %s", labelName, err)
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Send an empty error message to vmselect.
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send empty error message: %s", err)
+	}
+
+	return writeLabelValues(ctx, labelValues)
+}
+
+func writeLabelValues(ctx *vmselectRequestCtx, labelValues []string) error {
+	for _, labelValue := range labelValues {
+		if len(labelValue) == 0 {
+			// Skip empty label values, since they have no sense for prometheus.
+			continue
+		}
+		if err := ctx.writeString(labelValue); err != nil {
+			return fmt.Errorf("cannot write labelValue %q: %s", labelValue, err)
+		}
+	}
+	// Send 'end of label values' marker
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send 'end of response' marker")
+	}
+	return nil
+}
+
+func (s *Server) processVMSelectLabelEntries(ctx *vmselectRequestCtx) error {
+	vmselectLabelEntriesRequests.Inc()
+
+	// Read request
+	accountID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read accountID: %s", err)
+	}
+	projectID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read projectID: %s", err)
+	}
+
+	// Perform the request
+	labelEntries, err := s.storage.SearchTagEntries(accountID, projectID, *maxTagKeysPerSearch, *maxTagValuesPerSearch)
+	if err != nil {
+		// Send the error message to vmselect.
+		errMsg := fmt.Sprintf("error during label entries search: %s", err)
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Send an empty error message to vmselect.
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send empty error message: %s", err)
+	}
+
+	// Send labelEntries to vmselect
+	for i := range labelEntries {
+		e := &labelEntries[i]
+		label := e.Key
+		if label == "" {
+			// Do this substitution in order to prevent clashing with 'end of response' marker.
+			label = "__name__"
+		}
+		if err := ctx.writeString(label); err != nil {
+			return fmt.Errorf("cannot write label %q: %s", label, err)
+		}
+		if err := writeLabelValues(ctx, e.Values); err != nil {
+			return fmt.Errorf("cannot write label values for %q: %s", label, err)
+		}
+	}
+
+	// Send 'end of response' marker
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send 'end of response' marker")
+	}
+	return nil
+}
+
+func (s *Server) processVMSelectSeriesCount(ctx *vmselectRequestCtx) error {
+	vmselectSeriesCountRequests.Inc()
+
+	// Read request
+	accountID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read accountID: %s", err)
+	}
+	projectID, err := ctx.readUint32()
+	if err != nil {
+		return fmt.Errorf("cannot read projectID: %s", err)
+	}
+
+	// Execute the request
+	n, err := s.storage.GetSeriesCount(accountID, projectID)
+	if err != nil {
+		// Send the error message to vmselect.
+		errMsg := fmt.Sprintf("error during obtaining series count: %s", err)
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Send an empty error message to vmselect.
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send empty error message: %s", err)
+	}
+
+	// Send series count to vmselect.
+	if err := ctx.writeUint64(n); err != nil {
+		return fmt.Errorf("cannot write series count to vmselect: %s", err)
+	}
+	return nil
+}
+
+// maxSearchQuerySize is the maximum size of SearchQuery packet in bytes.
+const maxSearchQuerySize = 1024 * 1024
+
+func (s *Server) processVMSelectSearchQuery(ctx *vmselectRequestCtx) error {
+	vmselectSearchQueryRequests.Inc()
+
+	// Read search query.
+	if err := ctx.readDataBufBytes(maxSearchQuerySize); err != nil {
+		return fmt.Errorf("cannot read searchQuery: %s", err)
+	}
+	tail, err := ctx.sq.Unmarshal(ctx.dataBuf)
+	if err != nil {
+		return fmt.Errorf("cannot unmarshal SearchQuery: %s", err)
+	}
+	if len(tail) > 0 {
+		return fmt.Errorf("unexpected non-zero tail left after unmarshaling SearchQuery: (len=%d) %q", len(tail), tail)
+	}
+	fetchData, err := ctx.readBool()
+	if err != nil {
+		return fmt.Errorf("cannot read `fetchData` bool: %s", err)
+	}
+
+	// Setup search.
+	if err := ctx.setupTfss(); err != nil {
+		// Send the error message to vmselect.
+		errMsg := err.Error()
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+	tr := storage.TimeRange{
+		MinTimestamp: ctx.sq.MinTimestamp,
+		MaxTimestamp: ctx.sq.MaxTimestamp,
+	}
+	ctx.sr.Init(s.storage, ctx.tfss, tr, fetchData, *maxMetricsPerSearch)
+	defer ctx.sr.MustClose()
+	if err := ctx.sr.Error(); err != nil {
+		// Send the error message to vmselect.
+		errMsg := fmt.Sprintf("search error: %s", err)
+		if err := ctx.writeString(errMsg); err != nil {
+			return fmt.Errorf("cannot send error message: %s", err)
+		}
+		return nil
+	}
+
+	// Send empty error message to vmselect.
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send empty error message: %s", err)
+	}
+
+	// Send found blocks to vmselect.
+	for ctx.sr.NextMetricBlock() {
+		mb := ctx.sr.MetricBlock
+
+		vmselectMetricBlocksRead.Inc()
+		vmselectMetricRowsRead.Add(mb.Block.RowsCount())
+
+		ctx.dataBuf = mb.Marshal(ctx.dataBuf[:0])
+		if err := ctx.writeDataBufBytes(); err != nil {
+			return fmt.Errorf("cannot send MetricBlock: %s", err)
+		}
+	}
+	if err := ctx.sr.Error(); err != nil {
+		return fmt.Errorf("search error: %s", err)
+	}
+
+	// Send 'end of response' marker
+	if err := ctx.writeString(""); err != nil {
+		return fmt.Errorf("cannot send 'end of response' marker")
+	}
+	return nil
+}
+
+var (
+	vmselectDeleteMetricsRequests = metrics.NewCounter("vm_vmselect_delete_metrics_requests_total")
+	vmselectLabelsRequests        = metrics.NewCounter("vm_vmselect_labels_requests_total")
+	vmselectLabelValuesRequests   = metrics.NewCounter("vm_vmselect_label_values_requests_total")
+	vmselectLabelEntriesRequests  = metrics.NewCounter("vm_vmselect_label_entries_requests_total")
+	vmselectSeriesCountRequests   = metrics.NewCounter("vm_vmselect_series_count_requests_total")
+	vmselectSearchQueryRequests   = metrics.NewCounter("vm_vmselect_search_query_requests_total")
+	vmselectMetricBlocksRead      = metrics.NewCounter("vm_vmselect_metric_blocks_read_total")
+	vmselectMetricRowsRead        = metrics.NewCounter("vm_vmselect_metric_rows_read_total")
+)
+
+func (ctx *vmselectRequestCtx) setupTfss() error {
+	tfss := ctx.tfss[:0]
+	for _, tagFilters := range ctx.sq.TagFilterss {
+		if len(tfss) < cap(tfss) {
+			tfss = tfss[:len(tfss)+1]
+		} else {
+			tfss = append(tfss, &storage.TagFilters{})
+		}
+		tfs := tfss[len(tfss)-1]
+		tfs.Reset(ctx.sq.AccountID, ctx.sq.ProjectID)
+		for i := range tagFilters {
+			tf := &tagFilters[i]
+			if err := tfs.Add(tf.Key, tf.Value, tf.IsNegative, tf.IsRegexp); err != nil {
+				return fmt.Errorf("cannot parse tag filter %s: %s", tf, err)
+			}
+		}
+	}
+	ctx.tfss = tfss
+	return nil
+}
--- a/dashboards/vminsert.json
+++ b/dashboards/vminsert.json
--- a/dashboards/vmselect.json
+++ b/dashboards/vmselect.json
--- a/dashboards/vmstorage.json
+++ b/dashboards/vmstorage.json
--- a/deployment/docker/Makefile
+++ b/deployment/docker/Makefile
@@ -1,5 +1,7 @@
-DOCKER_NAMESPACE := valyala
-BUILDER_IMAGE := local/builder:go1.12.5
+# All these commands must run from repository root.
+
+DOCKER_NAMESPACE := victoriametrics
+BUILDER_IMAGE := local/builder:go1.13.0
 CERTS_IMAGE := local/certs:1.0.2

 package-certs:
@@ -18,8 +20,10 @@ app-via-docker: package-certs package-builder
 		-w /VictoriaMetrics \
 		--mount type=bind,src="$(shell pwd)/gocache-for-docker",dst=/gocache \
 		--env GOCACHE=/gocache \
+		--env GO111MODULE=on \
+		$(DOCKER_OPTS) \
 		$(BUILDER_IMAGE) \
-		go build $(RACE) -mod=vendor -ldflags "-s -w -extldflags '-static' $(GO_BUILDINFO)" -tags 'netgo osusergo' -o bin/$(APP_NAME)-prod $(PKG_PREFIX)/app/$(APP_NAME)
+		go build $(RACE) -mod=vendor -ldflags "-s -w -extldflags '-static' $(GO_BUILDINFO)" -tags 'netgo osusergo' -o bin/$(APP_NAME)$(APP_SUFFIX)-prod $(PKG_PREFIX)/app/$(APP_NAME)

 package-via-docker:
 	(docker image ls --format '{{.Repository}}:{{.Tag}}' | grep -q '$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE)') || (\
--- a/deployment/docker/builder/Dockerfile
+++ b/deployment/docker/builder/Dockerfile
@@ -1 +1,2 @@
-FROM golang:1.12.5
+FROM golang:1.13.0
+STOPSIGNAL SIGINT
--- a/deployment/docker/docker-compose.yml
+++ b/deployment/docker/docker-compose.yml
@@ -0,0 +1,65 @@
+version: '3.5'
+services:
+  prometheus:
+    container_name: prometheus
+    image: prom/prometheus:v2.3.2
+    depends_on:
+    - "vminsert"
+    - "vmselect"
+    ports:
+      - 9090:9090
+    volumes:
+      - promdata:/prometheus
+      - ./prometheus.yml:/etc/prometheus/prometheus.yml
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
+      - '--web.console.templates=/usr/share/prometheus/consoles'
+    networks:
+      - docker_net
+    restart: always
+  vmstorage:
+    container_name: vmstorage
+    image: victoriametrics/vmstorage:v1.27.1-cluster
+    ports:
+      - 8482:8482
+      - 8400:8400
+      - 8401:8401
+    volumes:
+      - strgdata:/storage
+    command:
+      - '--storageDataPath=/storage'
+    networks:
+    - docker_net
+    restart: always
+  vminsert:
+    container_name: vminsert
+    image: victoriametrics/vminsert:v1.27.1-cluster
+    depends_on:
+      - "vmstorage"
+    command:
+      - '--storageNode=vmstorage:8400'
+    ports:
+      - 8480:8480
+    networks:
+      - docker_net
+    restart: always
+  vmselect:
+    container_name: vmselect
+    image: victoriametrics/vmselect:v1.27.1-cluster
+    depends_on:
+      - "vmstorage"
+    command:
+      - '--storageNode=vmstorage:8401'
+    ports:
+      - 8481:8481
+    networks:
+      - docker_net
+    restart: always
+volumes:
+  promdata: {}
+  strgdata: {}
+networks:
+  docker_net:
+    driver: bridge
--- a/deployment/docker/prometheus.yml
+++ b/deployment/docker/prometheus.yml
@@ -0,0 +1,23 @@
+global:
+  scrape_interval:     10s
+  evaluation_interval: 10s
+
+remote_write:
+  - url: "http://vminsert:8480/insert/0/prometheus/"
+
+scrape_configs:
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['prometheus:9090']
+
+  - job_name: 'vminsert'
+    static_configs:
+      - targets: ['vminsert:8480']
+
+  - job_name: 'vmselect'
+    static_configs:
+      - targets: ['vmselect:8481']
+
+  - job_name: 'vmstorage'
+    static_configs:
+      - targets: ['vmstorage:8482']
--- a/deployment/k8s/helm/Makefile
+++ b/deployment/k8s/helm/Makefile
@@ -0,0 +1,26 @@
+# All these commands must run from repository root.
+
+HELM_PROJECT=victoria-metrics
+HELM_PATH=deployment/k8s/helm/${HELM_PROJECT}
+HELM_APP_VERSION=1.0
+
+helm-init:
+	@helm init
+
+helm-install:
+	helm install $(HELM_PATH) -n $(ENV)
+
+helm-install-dev:
+	ENV=dev $(MAKE) helm-install
+
+helm-upgrade:
+	helm upgrade $(ENV) $(HELM_PATH)
+
+helm-upgrade-dev:
+	ENV=dev $(MAKE) helm-upgrade
+
+helm-delete:
+	helm del --purge $(ENV)
+
+helm-delete-dev:
+	ENV=dev $(MAKE) helm-delete
--- a/deployment/k8s/helm/README.md
+++ b/deployment/k8s/helm/README.md
@@ -0,0 +1,37 @@
+### Victoria metrics helm chart
+
+#### Create cluster from chart
+
+```$bash
+$ ENV=<env> make helm-install
+```
+
+for DEV env :
+
+```$bash
+$ make helm-install-dev
+```
+
+#### Upgrade cluster from chart
+
+```$bash
+$ ENV=<env> make helm-upgrade
+```
+
+for DEV env :
+
+```$bash
+$ make helm-upgrade-dev
+```
+
+#### Delete chart from cluster
+
+```$bash
+$ ENV=<env> make helm-delete
+```
+
+for DEV env :
+
+```$bash
+$ make helm-delete-dev
+```
--- a/deployment/k8s/helm/victoria-metrics/.helmignore
+++ b/deployment/k8s/helm/victoria-metrics/.helmignore
@@ -0,0 +1,22 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
--- a/deployment/k8s/helm/victoria-metrics/Chart.yaml
+++ b/deployment/k8s/helm/victoria-metrics/Chart.yaml
@@ -0,0 +1,5 @@
+apiVersion: v1
+appVersion: 1.26.0
+description: Victoria Metrics Helm chart for Kubernetes
+name: victoria-metrics
+version: 0.2.0
--- a/deployment/k8s/helm/victoria-metrics/README.md
+++ b/deployment/k8s/helm/victoria-metrics/README.md
@@ -0,0 +1,8 @@
+# Victoria Metrics
+
+## TL;DR;
+
+1. Install helm chart. Check the output.
+2. Specify Remote Write URL in Prometheus. 
+3. Configure Grafana's Prometheus Data Source.
+
--- a/deployment/k8s/helm/victoria-metrics/templates/NOTES.txt
+++ b/deployment/k8s/helm/victoria-metrics/templates/NOTES.txt
@@ -0,0 +1,76 @@
+{{ if .Values.vminsert.enabled }}
+Write API:
+
+The Victoria Metrics write api can be accessed via port {{ .Values.vminsert.service.servicePort }} on the following DNS name from within your cluster:
+{{ template "victoria-metrics.vminsert.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomainSuffix }}
+
+Get the Victoria Metrics insert service URL by running these commands in the same shell:
+{{- if contains "NodePort" .Values.vminsert.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "victoria-metrics.vminsert.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.vminsert.service.type }}
+  NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+        You can watch the status of by running 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template "victoria-metrics.vminsert.fullname" . }}'
+
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "victoria-metrics.vminsert.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
+  echo http://$SERVICE_IP:{{ .Values.vminsert.service.servicePort }}
+{{- else if contains "ClusterIP"  .Values.vminsert.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ .Values.vminsert.name }}" -o jsonpath="{.items[0].metadata.name}")
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME {{ .Values.vminsert.service.servicePort }}
+{{- end }}
+
+You need to update your prometheus configuration file and add next lines into it:
+
+prometheus.yml
+```yaml
+remote_write:
+  - url: "http://<insert-service>/insert/0/prometheus/"
+
+```
+
+for e.g. inside the kubernetes cluster:
+```yaml
+remote_write:
+  - url: "http://{{ template "victoria-metrics.vminsert.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomainSuffix }}:{{ .Values.vminsert.service.servicePort }}/insert/0/prometheus/"
+
+```
+{{- end }}
+
+{{- if .Values.vmselect.enabled }}
+Read API:
+
+The Victoria Metrics read api can be accessed via port {{ .Values.vmselect.service.servicePort }} on the following DNS name from within your cluster:
+{{ template "victoria-metrics.vmselect.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomainSuffix }}
+
+Get the Victoria Metrics select service URL by running these commands in the same shell:
+{{- if contains "NodePort" .Values.vmselect.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "victoria-metrics.vminsert.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.vmselect.service.type }}
+  NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+        You can watch the status of by running 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template "victoria-metrics.vminsert.fullname" . }}'
+
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "victoria-metrics.vmselect.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
+  echo http://$SERVICE_IP:{{ .Values.vmselect.service.servicePort }}
+{{- else if contains "ClusterIP"  .Values.vmselect.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ .Values.vmselect.name }}" -o jsonpath="{.items[0].metadata.name}")
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME {{ .Values.vmselect.service.servicePort }}
+{{- end }}
+
+You need to update specify select service URL in your Grafana:
+ NOTE: you need to use Prometheus Data Source
+
+Input for URL field in Grafana
+
+```
+http://<select-service>/select/0/prometheus/
+```
+
+for e.g. inside the kubernetes cluster:
+```
+http://{{ template "victoria-metrics.vmselect.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomainSuffix }}:{{ .Values.vmselect.service.servicePort }}/select/0/prometheus/"
+```
+{{- end }}
+
--- a/deployment/k8s/helm/victoria-metrics/templates/_helpers.tpl
+++ b/deployment/k8s/helm/victoria-metrics/templates/_helpers.tpl
@@ -0,0 +1,128 @@
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "victoria-metrics.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "victoria-metrics.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create unified labels for victoria-metrics components
+*/}}
+{{- define "victoria-metrics.common.matchLabels" -}}
+app.kubernetes.io/name: {{ include "victoria-metrics.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end -}}
+
+{{- define "victoria-metrics.common.metaLabels" -}}
+helm.sh/chart: {{ include "victoria-metrics.chart" . }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end -}}
+
+{{- define "victoria-metrics.vmstorage.labels" -}}
+{{ include "victoria-metrics.vmstorage.matchLabels" . }}
+{{ include "victoria-metrics.common.metaLabels" . }}
+{{- end -}}
+
+{{- define "victoria-metrics.vmstorage.matchLabels" -}}
+app: {{ .Values.vmstorage.name }}
+{{ include "victoria-metrics.common.matchLabels" . }}
+{{- end -}}
+
+{{- define "victoria-metrics.vmselect.labels" -}}
+{{ include "victoria-metrics.vmselect.matchLabels" . }}
+{{ include "victoria-metrics.common.metaLabels" . }}
+{{- end -}}
+
+{{- define "victoria-metrics.vmselect.matchLabels" -}}
+app: {{ .Values.vmselect.name }}
+{{ include "victoria-metrics.common.matchLabels" . }}
+{{- end -}}
+
+{{- define "victoria-metrics.vminsert.labels" -}}
+{{ include "victoria-metrics.vminsert.matchLabels" . }}
+{{ include "victoria-metrics.common.metaLabels" . }}
+{{- end -}}
+
+{{- define "victoria-metrics.vminsert.matchLabels" -}}
+app: {{ .Values.vminsert.name }}
+{{ include "victoria-metrics.common.matchLabels" . }}
+{{- end -}}
+
+{{/*
+Create a fully qualified vmstorage name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+*/}}
+{{- define "victoria-metrics.vmstorage.fullname" -}}
+{{- if .Values.vmstorage.fullnameOverride -}}
+{{- .Values.vmstorage.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- printf "%s-%s" .Release.Name .Values.vmstorage.name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s-%s" .Release.Name $name .Values.vmstorage.name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Create a fully qualified vmselect name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+*/}}
+{{- define "victoria-metrics.vmselect.fullname" -}}
+{{- if .Values.vmselect.fullnameOverride -}}
+{{- .Values.vmselect.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- printf "%s-%s" .Release.Name .Values.vmselect.name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s-%s" .Release.Name $name .Values.vmselect.name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Create a fully qualified vmselect name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+*/}}
+{{- define "victoria-metrics.vminsert.fullname" -}}
+{{- if .Values.vminsert.fullnameOverride -}}
+{{- .Values.vminsert.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- printf "%s-%s" .Release.Name .Values.vminsert.name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s-%s" .Release.Name $name .Values.vminsert.name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{- define "victoria-metrics.vminsert.vmstorage-pod-fqdn" -}}
+{{- $pod := include "victoria-metrics.vmstorage.fullname" . -}}
+{{- $svc := include "victoria-metrics.vmstorage.fullname" . -}}
+{{- $namespace := .Release.Namespace -}}
+{{- $dnsSuffix := .Values.clusterDomainSuffix -}}
+{{- range $i := until (.Values.vmstorage.replicaCount | int) -}}
+{{- printf "- --storageNode=%s-%d.%s.%s.svc.%s:8400\n" $pod $i $svc $namespace $dnsSuffix -}}
+{{- end -}}
+{{- end -}}
+
+{{- define "victoria-metrics.vmselect.vmstorage-pod-fqdn" -}}
+{{- $pod := include "victoria-metrics.vmstorage.fullname" . -}}
+{{- $svc := include "victoria-metrics.vmstorage.fullname" . -}}
+{{- $namespace := .Release.Namespace -}}
+{{- $dnsSuffix := .Values.clusterDomainSuffix -}}
+{{- range $i := until (.Values.vmstorage.replicaCount | int) -}}
+{{- printf "- --storageNode=%s-%d.%s.%s.svc.%s:8401\n" $pod $i $svc $namespace $dnsSuffix -}}
+{{- end -}}
+{{- end -}}
--- a/deployment/k8s/helm/victoria-metrics/templates/vminsert-deployment.yaml
+++ b/deployment/k8s/helm/victoria-metrics/templates/vminsert-deployment.yaml
@@ -0,0 +1,71 @@
+{{- if .Values.vminsert.enabled -}}
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  labels:
+    {{- include "victoria-metrics.vminsert.labels" . | nindent 4 }}
+  name: {{ template "victoria-metrics.vminsert.fullname" . }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "victoria-metrics.vminsert.matchLabels" . | nindent 6 }}
+  replicas: {{ .Values.vminsert.replicaCount }}
+  template:
+    metadata:
+    {{- if .Values.vminsert.podAnnotations }}
+      annotations:
+{{ toYaml .Values.vminsert.podAnnotations | indent 8 }}
+    {{- end }}
+      labels:
+        {{- include "victoria-metrics.vminsert.labels" . | nindent 8 }}
+    spec:
+{{- if .Values.vminsert.priorityClassName }}
+      priorityClassName: "{{ .Values.vminsert.priorityClassName }}"
+{{- end }}
+      containers:
+        - name: {{ template "victoria-metrics.name" . }}-{{ .Values.vminsert.name }}
+          image: "{{ .Values.vminsert.image.repository }}:{{ .Values.vminsert.image.tag }}"
+          imagePullPolicy: "{{ .Values.vminsert.image.pullPolicy }}"
+          args:
+          {{- include "victoria-metrics.vminsert.vmstorage-pod-fqdn" . | nindent 12 }}
+          {{- range $key, $value := .Values.vminsert.extraArgs }}
+            - --{{ $key }}={{ $value }}
+          {{- end }}
+          ports:
+            - name: http
+              containerPort: 8480
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 15
+          livenessProbe:
+            tcpSocket:
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 15
+            timeoutSeconds: 5
+          resources:
+{{ toYaml .Values.vminsert.resources | indent 12 }}
+    {{- if .Values.imagePullSecrets }}
+      imagePullSecrets:
+      {{ toYaml .Values.imagePullSecrets | indent 2 }}
+    {{- end }}
+    {{- if .Values.vminsert.nodeSelector }}
+      nodeSelector:
+{{ toYaml .Values.vminsert.nodeSelector | indent 8 }}
+    {{- end }}
+    {{- if .Values.vminsert.securityContext }}
+      securityContext:
+{{ toYaml .Values.vminsert.securityContext | indent 8 }}
+    {{- end }}
+    {{- if .Values.vminsert.tolerations }}
+      tolerations:
+{{ toYaml .Values.vminsert.tolerations | indent 8 }}
+    {{- end }}
+    {{- if .Values.vminsert.affinity }}
+      affinity:
+{{ toYaml .Values.vminsert.affinity | indent 8 }}
+    {{- end }}
+{{- end }}
--- a/deployment/k8s/helm/victoria-metrics/templates/vminsert-ingress.yaml
+++ b/deployment/k8s/helm/victoria-metrics/templates/vminsert-ingress.yaml
@@ -0,0 +1,29 @@
+{{- if and .Values.vminsert.enabled .Values.vminsert.ingress.enabled }}
+apiVersion: extensions/v1beta1
+kind: Ingress
+metadata:
+{{- if .Values.vminsert.ingress.annotations }}
+  annotations:
+{{ toYaml .Values.vminsert.ingress.annotations | indent 4 }}
+{{- end }}
+  labels:
+  {{- include "victoria-metrics.vminsert.labels" . | nindent 4 }}
+{{ toYaml .Values.vminsert.ingress.extraLabels | indent 4 }}
+  name: {{ template "victoria-metrics.vminsert.fullname" . }}
+spec:
+  rules:
+  {{- $serviceName := include "victoria-metrics.vminsert.fullname" . }}
+  {{- range .Values.vminsert.ingress.hosts }}
+  - host: {{ .name }}
+    http:
+      paths:
+        - path: {{ .path }}
+          backend:
+            serviceName: {{ $serviceName }}
+            servicePort: http
+  {{- end -}}
+{{- if .Values.vminsert.ingress.tls }}
+tls:
+{{ toYaml .Values.vminsert.ingress.tls | indent 4 }}
+{{- end -}}
+{{- end -}}
--- a/deployment/k8s/helm/victoria-metrics/templates/vminsert-service-monitor.yaml
+++ b/deployment/k8s/helm/victoria-metrics/templates/vminsert-service-monitor.yaml
@@ -0,0 +1,30 @@
+{{- if and .Values.vminsert.enabled .Values.vminsert.serviceMonitor.enabled -}}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  {{- if .Values.vminsert.serviceMonitor.annotations }}
+  annotations:
+{{ toYaml .Values.vminsert.serviceMonitor.annotations | indent 4 }}
+  {{- end }}
+  labels:
+  {{- include "victoria-metrics.vminsert.labels" . | nindent 4 }}
+  {{- if .Values.vminsert.serviceMonitor.extraLabels }}
+{{ toYaml .Values.vminsert.serviceMonitor.extraLabels | indent 4 }}
+  {{- end }}
+  name: {{ template "victoria-metrics.vminsert.fullname" . }}
+spec:
+  namespaceSelector:
+    matchNames:
+      - {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      {{- include "victoria-metrics.vminsert.matchLabels" . | nindent 6 }} 
+  endpoints:
+    - port: http
+      {{- if .Values.vminsert.serviceMonitor.interval }}
+      interval: {{ .Values.vminsert.serviceMonitor.interval }}
+      {{- end }}
+      {{- if .Values.vminsert.serviceMonitor.scrapeTimeout }}
+      scrapeTimeout: {{ .Values.vminsert.serviceMonitor.scrapeTimeout }}
+      {{- end }}
+  {{- end }}
--- a/deployment/k8s/helm/victoria-metrics/templates/vminsert-service.yaml
+++ b/deployment/k8s/helm/victoria-metrics/templates/vminsert-service.yaml
@@ -0,0 +1,40 @@
+{{- if .Values.vminsert.enabled -}}
+apiVersion: v1
+kind: Service
+metadata:
+{{- if .Values.vminsert.service.annotations }}
+  annotations:
+{{ toYaml .Values.vminsert.service.annotations | indent 4}}
+{{- end }}
+  labels:
+    {{- include "victoria-metrics.vminsert.labels" . | nindent 4 }}
+{{- if .Values.vminsert.service.labels }}
+{{ toYaml .Values.vminsert.service.labels | indent 4}}
+{{- end }}
+  name: {{ template "victoria-metrics.vminsert.fullname" . }}
+spec:
+{{- if .Values.vminsert.service.clusterIP }}
+  clusterIP: {{ .Values.vminsert.service.clusterIP }}
+{{- end }}
+{{- if .Values.vminsert.service.externalIPs }}
+  externalIPs:
+{{ toYaml .Values.vminsert.service.externalIPs | indent 4 }}
+{{- end }}
+{{- if .Values.vminsert.service.loadBalancerIP }}
+  loadBalancerIP: {{ .Values.vminsert.service.loadBalancerIP }}
+{{- end }}
+{{- if .Values.vminsert.service.loadBalancerSourceRanges }}
+  loadBalancerSourceRanges:
+  {{- range $cidr := .Values.vminsert.service.loadBalancerSourceRanges }}
+    - {{ $cidr }}
+  {{- end }}
+{{- end }}
+  ports:
+    - name: http
+      port: {{ .Values.vminsert.service.servicePort }}
+      protocol: TCP
+      targetPort: http
+  selector:
+    {{- include "victoria-metrics.vminsert.matchLabels" . | nindent 4 }}
+  type: "{{ .Values.vminsert.service.type }}"
+{{- end }}
--- a/deployment/k8s/helm/victoria-metrics/templates/vmselect-deployment.yaml
+++ b/deployment/k8s/helm/victoria-metrics/templates/vmselect-deployment.yaml
@@ -0,0 +1,83 @@
+{{- if and .Values.vmselect.enabled (not .Values.vmselect.statefulSet.enabled) -}}
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  labels:
+    {{- include "victoria-metrics.vmselect.labels" . | nindent 4 }}
+  name: {{ template "victoria-metrics.vmselect.fullname" . }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "victoria-metrics.vmselect.matchLabels" . | nindent 6 }}
+  replicas: {{ .Values.vmselect.replicaCount }}
+  template:
+    metadata:
+    {{- if .Values.vmselect.podAnnotations }}
+      annotations:
+{{ toYaml .Values.vmselect.podAnnotations | indent 8 }}
+    {{- end }}
+      labels:
+        {{- include "victoria-metrics.vmselect.labels" . | nindent 8 }}
+    spec:
+{{- if .Values.vmselect.priorityClassName }}
+      priorityClassName: "{{ .Values.vmselect.priorityClassName }}"
+{{- end }}
+      containers:
+        - name: {{ template "victoria-metrics.name" . }}-{{ .Values.vmselect.name }}
+          image: "{{ .Values.vmselect.image.repository }}:{{ .Values.vmselect.image.tag }}"
+          imagePullPolicy: "{{ .Values.vmselect.image.pullPolicy }}"
+          args:
+            - {{ printf "%s=%s" "--cacheDataPath" .Values.vmselect.cacheMountPath | quote}}
+          {{- include "victoria-metrics.vmselect.vmstorage-pod-fqdn" . | nindent 12 }}
+          {{- range $key, $value := .Values.vmselect.extraArgs }}
+            - --{{ $key }}={{ $value }}
+          {{- end }}
+          ports:
+            - name: http
+              containerPort: 8481
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 15
+          livenessProbe:
+            tcpSocket:
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 15
+            timeoutSeconds: 5
+          volumeMounts:
+            - mountPath: {{ .Values.vmselect.cacheMountPath }}
+              name: cache-volume
+          resources:
+{{ toYaml .Values.vmselect.resources | indent 12 }}
+    {{- if .Values.imagePullSecrets }}
+      imagePullSecrets:
+      {{ toYaml .Values.imagePullSecrets | indent 2 }}
+    {{- end }}
+    {{- if .Values.vmselect.nodeSelector }}
+      nodeSelector:
+{{ toYaml .Values.vmselect.nodeSelector | indent 8 }}
+    {{- end }}
+    {{- if .Values.vmselect.securityContext }}
+      securityContext:
+{{ toYaml .Values.vmselect.securityContext | indent 8 }}
+    {{- end }}
+    {{- if .Values.vmselect.tolerations }}
+      tolerations:
+{{ toYaml .Values.vmselect.tolerations | indent 8 }}
+    {{- end }}
+    {{- if .Values.vmselect.affinity }}
+      affinity:
+{{ toYaml .Values.vmselect.affinity | indent 8 }}
+    {{- end }}
+      volumes:
+        - name: cache-volume
+        {{- if .Values.vmselect.persistentVolume.enabled }}
+          persistentVolumeClaim:
+            claimName: {{ if .Values.vmselect.persistentVolume.existingClaim }}{{ .Values.vmselect.persistentVolume.existingClaim }}{{- else }}{{ template "victoria-metrics.vmselect.fullname" . }}{{- end }}
+            {{- else }}
+          emptyDir: {}
+            {{- end -}}
+{{- end }}
--- a/deployment/k8s/helm/victoria-metrics/templates/vmselect-ingress.yaml
+++ b/deployment/k8s/helm/victoria-metrics/templates/vmselect-ingress.yaml
@@ -0,0 +1,29 @@
+{{- if and .Values.vmselect.enabled .Values.vmselect.ingress.enabled  }}
+apiVersion: extensions/v1beta1
+kind: Ingress
+metadata:
+{{- if .Values.vmselect.ingress.annotations }}
+  annotations:
+{{ toYaml .Values.vmselect.ingress.annotations | indent 4 }}
+{{- end }}
+  labels:
+  {{- include "victoria-metrics.vmselect.labels" . | nindent 4 }}
+{{ toYaml .Values.vmselect.ingress.extraLabels | indent 4 }}
+  name: {{ template "victoria-metrics.vmselect.fullname" . }}
+spec:
+  rules:
+  {{- $serviceName := include "victoria-metrics.vmselect.fullname" . }}
+  {{- range .Values.vmselect.ingress.hosts }}
+  - host: {{ .name }}
+    http:
+      paths:
+        - path: {{ .path }}
+          backend:
+            serviceName: {{ $serviceName }}
+            servicePort: http
+    {{- end -}}
+  {{- if .Values.vmselect.ingress.tls }}
+tls:
+  {{ toYaml .Values.vmselect.ingress.tls | indent 4 }}
+  {{- end -}}
+{{- end -}}
--- a/deployment/k8s/helm/victoria-metrics/templates/vmselect-service-headless.yaml
+++ b/deployment/k8s/helm/victoria-metrics/templates/vmselect-service-headless.yaml
@@ -0,0 +1,24 @@
+{{- if and .Values.vmselect.enabled .Values.vmselect.statefulSet.enabled -}}
+apiVersion: v1
+kind: Service
+metadata:
+{{- if .Values.vmselect.statefulSet.service.annotations }}
+  annotations:
+{{ toYaml .Values.vmselect.statefulSet.service.annotations | indent 4}}
+{{- end }}
+  labels:
+  {{- include "victoria-metrics.vmselect.labels" . | nindent 4 }}
+{{- if .Values.vmselect.statefulSet.service.labels }}
+{{ toYaml .Values.vmselect.statefulSet.service.labels | indent 4}}
+{{- end }}
+  name: {{ template "victoria-metrics.vmselect.fullname" . }}
+spec:
+  clusterIP: None
+  ports:
+    - name: http
+      port: {{ .Values.vmselect.statefulSet.service.servicePort }}
+      protocol: TCP
+      targetPort: http
+  selector:
+    {{- include "victoria-metrics.vmselect.matchLabels" . | nindent 4 }}
+{{- end -}}
--- a/deployment/k8s/helm/victoria-metrics/templates/vmselect-service-monitor.yaml
+++ b/deployment/k8s/helm/victoria-metrics/templates/vmselect-service-monitor.yaml
@@ -0,0 +1,30 @@
+{{- if and .Values.vmselect.enabled .Values.vmselect.serviceMonitor.enabled -}}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  {{- if .Values.vmselect.serviceMonitor.annotations }}
+  annotations:
+{{ toYaml .Values.vmselect.serviceMonitor.annotations | indent 4 }}
+  {{- end }}
+  labels:
+  {{- include "victoria-metrics.vmselect.labels" . | nindent 4 }}
+  {{- if .Values.vmselect.serviceMonitor.extraLabels }}
+{{ toYaml .Values.vmselect.serviceMonitor.extraLabels | indent 4 }}
+  {{- end }}
+  name: {{ template "victoria-metrics.vmselect.fullname" . }}
+spec:
+  namespaceSelector:
+    matchNames:
+      - {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      {{- include "victoria-metrics.vmselect.matchLabels" . | nindent 6 }} 
+  endpoints:
+    - port: http
+      {{- if .Values.vmselect.serviceMonitor.interval }}
+      interval: {{ .Values.vmselect.serviceMonitor.interval }}
+      {{- end }}
+      {{- if .Values.vmselect.serviceMonitor.scrapeTimeout }}
+      scrapeTimeout: {{ .Values.vmselect.serviceMonitor.scrapeTimeout }}
+      {{- end }}
+  {{- end }}
--- a/deployment/k8s/helm/victoria-metrics/templates/vmselect-service.yaml
+++ b/deployment/k8s/helm/victoria-metrics/templates/vmselect-service.yaml
@@ -0,0 +1,40 @@
+{{- if and .Values.vmselect.enabled (not .Values.vmselect.statefulSet.enabled) -}}
+apiVersion: v1
+kind: Service
+metadata:
+{{- if .Values.vmselect.service.annotations }}
+  annotations:
+{{ toYaml .Values.vmselect.service.annotations | indent 4}}
+{{- end }}
+  labels:
+    {{- include "victoria-metrics.vmselect.labels" . | nindent 4 }}
+{{- if .Values.vmselect.service.labels }}
+{{ toYaml .Values.vmselect.service.labels | indent 4}}
+{{- end }}
+  name: {{ template "victoria-metrics.vmselect.fullname" . }}
+spec:
+{{- if .Values.vmselect.service.clusterIP }}
+  clusterIP: {{ .Values.vmselect.service.clusterIP }}
+{{- end }}
+{{- if .Values.vmselect.service.externalIPs }}
+  externalIPs:
+{{ toYaml .Values.vmselect.service.externalIPs | indent 4 }}
+{{- end }}
+{{- if .Values.vmselect.service.loadBalancerIP }}
+  loadBalancerIP: {{ .Values.vmselect.service.loadBalancerIP }}
+{{- end }}
+{{- if .Values.vmselect.service.loadBalancerSourceRanges }}
+  loadBalancerSourceRanges:
+  {{- range $cidr := .Values.vmselect.service.loadBalancerSourceRanges }}
+    - {{ $cidr }}
+  {{- end }}
+{{- end }}
+  ports:
+    - name: http
+      port: {{ .Values.vmselect.service.servicePort }}
+      protocol: TCP
+      targetPort: http
+  selector:
+    {{- include "victoria-metrics.vmselect.matchLabels" . | nindent 4 }}
+  type: "{{ .Values.vmselect.service.type }}"
+{{- end }}
--- a/deployment/k8s/helm/victoria-metrics/templates/vmselect-statefulset.yaml
+++ b/deployment/k8s/helm/victoria-metrics/templates/vmselect-statefulset.yaml
@@ -0,0 +1,103 @@
+{{- if and .Values.vmselect.enabled .Values.vmselect.statefulSet.enabled -}}
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  labels:
+    {{- include "victoria-metrics.vmselect.labels" . | nindent 4 }}
+  name: {{ template "victoria-metrics.vmselect.fullname" . }}
+spec:
+  serviceName: {{ template "victoria-metrics.vmselect.fullname" . }}
+  selector:
+    matchLabels:
+      {{- include "victoria-metrics.vmselect.matchLabels" . | nindent 6 }}
+  replicas: {{ .Values.vmselect.replicaCount }}
+  podManagementPolicy: {{ .Values.vmselect.podManagementPolicy }}
+  template:
+    metadata:
+    {{- if .Values.vmselect.podAnnotations }}
+      annotations:
+{{ toYaml .Values.vmselect.podAnnotations | indent 8 }}
+    {{- end }}
+      labels:
+        {{- include "victoria-metrics.vmselect.labels" . | nindent 8 }}
+    spec:
+{{- if .Values.vmselect.priorityClassName }}
+      priorityClassName: "{{ .Values.vmselect.priorityClassName }}"
+{{- end }}
+      containers:
+        - name: {{ template "victoria-metrics.name" . }}-{{ .Values.vmselect.name }}
+          image: "{{ .Values.vmselect.image.repository }}:{{ .Values.vmselect.image.tag }}"
+          imagePullPolicy: "{{ .Values.vmselect.image.pullPolicy }}"
+          args:
+            - {{ printf "%s=%s" "--cacheDataPath" .Values.vmselect.cacheMountPath | quote}}
+          {{- include "victoria-metrics.vmselect.vmstorage-pod-fqdn" . | nindent 12 }}
+          {{- range $key, $value := .Values.vmselect.extraArgs }}
+            - --{{ $key }}={{ $value }}
+          {{- end }}
+          ports:
+            - name: http
+              containerPort: 8481
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 15
+          livenessProbe:
+            tcpSocket:
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 15
+            timeoutSeconds: 5
+          volumeMounts:
+            - mountPath: {{ .Values.vmselect.cacheMountPath }}
+              name: cache-volume
+          resources:
+{{ toYaml .Values.vmselect.resources | indent 12 }}
+    {{- if .Values.imagePullSecrets }}
+      imagePullSecrets:
+      {{ toYaml .Values.imagePullSecrets | indent 2 }}
+    {{- end }}
+    {{- if .Values.vmselect.nodeSelector }}
+      nodeSelector:
+{{ toYaml .Values.vmselect.nodeSelector | indent 8 }}
+    {{- end }}
+    {{- if .Values.vmselect.securityContext }}
+      securityContext:
+{{ toYaml .Values.vmselect.securityContext | indent 8 }}
+    {{- end }}
+    {{- if .Values.vmselect.tolerations }}
+      tolerations:
+{{ toYaml .Values.vmselect.tolerations | indent 8 }}
+    {{- end }}
+    {{- if .Values.vmselect.affinity }}
+      affinity:
+{{ toYaml .Values.vmselect.affinity | indent 8 }}
+    {{- end }}
+{{- if .Values.vmselect.persistentVolume.enabled }}
+  volumeClaimTemplates:
+    - metadata:
+        name: cache-volume
+        {{- if .Values.vmselect.persistentVolume.annotations }}
+        annotations:
+    {{ toYaml .Values.vmselect.persistentVolume.annotations | indent 10 }}
+    {{- end }}
+      spec:
+        accessModes:
+        {{ toYaml .Values.vmselect.persistentVolume.accessModes | indent 10 }}
+        resources:
+          requests:
+            storage: "{{ .Values.vmselect.persistentVolume.size }}"
+          {{- if .Values.vmselect.persistentVolume.storageClass }}
+        {{- if (eq "-" .Values.vmselect.persistentVolume.storageClass) }}
+        storageClassName: ""
+        {{- else }}
+        storageClassName: "{{ .Values.vmselect.persistentVolume.storageClass }}"
+        {{- end }}
+        {{- end }}
+    {{- else }}
+      volumes:
+        - name: cache-volume
+          emptyDir: {}
+{{- end }}
+{{- end }}
--- a/deployment/k8s/helm/victoria-metrics/templates/vmstorage-service-monitor.yaml
+++ b/deployment/k8s/helm/victoria-metrics/templates/vmstorage-service-monitor.yaml
@@ -0,0 +1,30 @@
+{{- if and .Values.vmstorage.enabled .Values.vmstorage.serviceMonitor.enabled -}}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  {{- if .Values.vmstorage.serviceMonitor.annotations }}
+  annotations:
+{{ toYaml .Values.vmstorage.serviceMonitor.annotations | indent 4 }}
+  {{- end }}
+  labels:
+  {{- include "victoria-metrics.vmstorage.labels" . | nindent 4 }}
+  {{- if .Values.vmstorage.serviceMonitor.extraLabels }}
+{{ toYaml .Values.vmstorage.serviceMonitor.extraLabels | indent 4 }}
+  {{- end }}
+  name: {{ template "victoria-metrics.vmstorage.fullname" . }}
+spec:
+  namespaceSelector:
+    matchNames:
+      - {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      {{- include "victoria-metrics.vmstorage.matchLabels" . | nindent 6 }} 
+  endpoints:
+    - port: http
+      {{- if .Values.vmstorage.serviceMonitor.interval }}
+      interval: {{ .Values.vmstorage.serviceMonitor.interval }}
+      {{- end }}
+      {{- if .Values.vmstorage.serviceMonitor.scrapeTimeout }}
+      scrapeTimeout: {{ .Values.vmstorage.serviceMonitor.scrapeTimeout }}
+      {{- end }}
+  {{- end }}
--- a/Show More
+++ b/Show More