lib/fs: add MustStopDirRemover for waiting until pending directories are removed on graceful shutdown

This patch is mainly required for laggy NFS. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/162
lib/storage: typo fix
2026-06-07 10:56:50 +03:00 · 2019-09-05 11:13:17 +03:00 · 2019-09-04 19:58:01 +03:00 · 2019-09-04 19:58:01 +03:00 · 2019-09-04 18:40:39 +03:00 · 2019-09-04 18:13:45 +03:00
160 changed files with 5840 additions and 1345 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -0,0 +1,42 @@
+name: main
+on:
+  - push
+jobs:
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Setup Go
+        uses: actions/setup-go@v1
+        with:
+          go-version: 1.12
+        id: go
+      - name: Code checkout
+        uses: actions/checkout@v1
+      - name: Dependencies
+        env:
+          GO111MODULE: off
+        run: |
+          go get -v golang.org/x/lint/golint
+          go get -u github.com/kisielk/errcheck
+      - name: Build
+        env:
+          GO111MODULE: on
+        run: |
+            export PATH=$PATH:$(go env GOPATH)/bin # temporary fix. See https://github.com/actions/setup-go/issues/14
+            make check-all
+            git diff --exit-code
+            make test-full
+            make test-pure
+            make victoria-metrics
+            make victoria-metrics-pure
+            make victoria-metrics-arm
+            make victoria-metrics-arm64
+            GOOS=freebsd go build -mod=vendor ./app/victoria-metrics
+            GOOS=darwin go build -mod=vendor ./app/victoria-metrics
+      - name: Publish coverage
+        uses: codecov/codecov-action@v1.0.0
+        with:
+          token: ${{secrets.CODECOV_TOKEN}}
+          file: ./coverage.txt
+
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,26 +0,0 @@
-language: go
-
-go:
-  - 1.12.x
-
-install: make
-
-env:
-  - GO111MODULE=on
-
-before_install:
-  - GO111MODULE=off go get -v golang.org/x/lint/golint
-  - GO111MODULE=off go get -u github.com/kisielk/errcheck
-
-script:
-  - make check_all
-  - git diff --exit-code
-  - make test-full
-  - make test-pure
-  - make victoria-metrics
-  - make victoria-metrics-pure
-  - make victoria-metrics-arm
-  - make victoria-metrics-arm64
-
-after_success:
-  - bash <(curl -s https://codecov.io/bash)
--- a/10
+++ b/10
@@ -1,7 +1,7 @@
 PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics

 BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
-	      git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | sha1sum | grep -oP '^.{8}')))
+	      git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -c 10-17)))

 PKG_TAG ?= $(shell git tag -l --points-at HEAD)
 ifeq ($(PKG_TAG),)
@@ -50,7 +50,7 @@ errcheck: install-errcheck
 install-errcheck:
 	which errcheck || GO111MODULE=off go get -u github.com/kisielk/errcheck

-check_all: fmt vet lint errcheck golangci-lint
+check-all: fmt vet lint errcheck golangci-lint

 test:
 	GO111MODULE=on go test -tags=integration -mod=vendor ./lib/... ./app/...
@@ -75,6 +75,12 @@ vendor-update:
 	GO111MODULE=on go mod tidy
 	GO111MODULE=on go mod vendor

+app-local:
+	CGO_ENABLED=1 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
+
+app-local-pure:
+	CGO_ENABLED=0 GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)-pure$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
+
 quicktemplate-gen: install-qtc
 	qtc

--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 [![Slack](https://img.shields.io/badge/join%20slack-%23victoriametrics-brightgreen.svg)](http://slack.victoriametrics.com/)
 [![GitHub license](https://img.shields.io/github/license/VictoriaMetrics/VictoriaMetrics.svg)](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
 [![Go Report](https://goreportcard.com/badge/github.com/VictoriaMetrics/VictoriaMetrics)](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
-[![Build Status](https://travis-ci.org/VictoriaMetrics/VictoriaMetrics.svg?branch=master)](https://travis-ci.org/VictoriaMetrics/VictoriaMetrics)
+[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/workflows/main/badge.svg)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions)
 [![codecov](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics/branch/master/graph/badge.svg)](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics)

 <img alt="Victoria Metrics" src="logo.png">
@@ -21,7 +21,7 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM

 * Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
  Additionally, VictoriaMetrics extends PromQL with opt-in [useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL).
-* Global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query.
+* Supports global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query.
 * High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
  and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
  [Outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
@@ -38,13 +38,14 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
  * All the data is stored in a single directory pointed by `-storageDataPath` flag.
  * Easy backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
 * Storage is protected from corruption on unclean shutdown (i.e. hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
-* Supports metrics' ingestion and backfilling via the following protocols:
+* Supports metrics' ingestion and [backfilling](#backfilling) via the following protocols:
  * [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
  * [InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
  * [Graphite plaintext protocol](https://graphite.readthedocs.io/en/latest/feeding-carbon.html) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
    if `-graphiteListenAddr` is set.
  * [OpenTSDB put message](http://opentsdb.net/docs/build/html/api_telnet/put.html) if `-opentsdbListenAddr` is set.
-* Ideally works with big amounts of time series data from Kubernetes, IoT sensors, connected cars and industrial telemetry.
+  * [HTTP OpenTSDB /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html) if `-opentsdbHTTPListenAddr` is set.
+* Ideally works with big amounts of time series data from Kubernetes, IoT sensors, connected cars, industrial telemetry and various Enterprise workloads.
 * Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).


@@ -86,6 +87,8 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
  - [Tuning](#tuning)
  - [Monitoring](#monitoring)
  - [Troubleshooting](#troubleshooting)
+  - [Backfilling](#backfilling)
+  - [Profiling](#profiling)
 - [Roadmap](#roadmap)
 - [Contacts](#contacts)
 - [Community and contributions](#community-and-contributions)
@@ -108,7 +111,8 @@ The following command-line flags are used the most:
 * `-retentionPeriod` - retention period in months for the data. Older data is automatically deleted.
 * `-httpListenAddr` - TCP address to listen to for http requests. By default, it listens port `8428` on all the network interfaces.
 * `-graphiteListenAddr` - TCP and UDP address to listen to for Graphite data. By default, it is disabled.
-* `-opentsdbListenAddr` - TCP and UDP address to listen to for OpenTSDB data. By default, it is disabled.
+* `-opentsdbListenAddr` - TCP and UDP address to listen to for OpenTSDB data over telnet protocol. By default, it is disabled.
+* `-opentsdbHTTPListenAddr` - TCP address to listen to for HTTP OpenTSDB data over `/api/put`. By default, it is disabled.

 Pass `-help` to see all the available flags with description and default values.

@@ -124,7 +128,7 @@ remote_write:
  - url: http://<victoriametrics-addr>:8428/api/v1/write
    queue_config:
      max_samples_per_send: 10000
-      max_shards: 100
+      max_shards: 30
 ```

 Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
@@ -182,6 +186,9 @@ Follow the following steps during the upgrade:
 2) Wait until the process stops. This can take a few seconds.
 3) Start the upgraded VictoriaMetrics.

+Prometheus doesn't drop data during VictoriaMetrics restart.
+See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details.
+

 ### How to apply new config to VictoriaMetrics?

@@ -191,6 +198,9 @@ VictoriaMetrics must be restarted for applying new config:
 2) Wait until the process stops. This can take a few seconds.
 3) Start VictoriaMetrics with the new config.

+Prometheus doesn't drop data during VictoriaMetrics restart.
+See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details.
+

 ### How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)?

@@ -205,7 +215,8 @@ For instance, put the following lines into `Telegraf` config, so it sends data t
 Do not forget substituting `<victoriametrics-addr>` with the real address where VictoriaMetrics runs.

 VictoriaMetrics maps Influx data using the following rules:
-* [`db` query arg](https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint) is mapped into `db` label value.
+* [`db` query arg](https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint) is mapped into `db` label value
+  unless `db` tag exists in the Influx line.
 * Field names are mapped to time series names prefixed with `{measurement}{separator}` value,
  where `{separator}` equals to `_` by default. It can be changed with `-influxMeasurementFieldSeparator` command-line flag.
  See also `-influxSkipSingleField` command-line flag.
@@ -221,8 +232,8 @@ foo,tag1=value1,tag2=value2 field1=12,field2=40
 is converted into the following Prometheus data points:

 ```
-foo.field1{tag1="value1", tag2="value2"} 12
-foo.field2{tag1="value1", tag2="value2"} 40
+foo_field1{tag1="value1", tag2="value2"} 12
+foo_field2{tag1="value1", tag2="value2"} 40
 ```

 Example for writing data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
@@ -236,7 +247,7 @@ An arbitrary number of lines delimited by '\n' may be sent in a single request.
 After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:

 ```
-curl -G 'http://localhost:8428/api/v1/export' --data-urlencode 'match={__name__!=""}'
+curl -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}'
 ```

 The `/api/v1/export` endpoint should return the following response:
@@ -246,6 +257,9 @@ The `/api/v1/export` endpoint should return the following response:
 {"metric":{"__name__":"measurement.field2","tag1":"value1","tag2":"value2"},"values":[1.23],"timestamps":[1560272508147]}
 ```

+Note that Influx line protocol expects [timestamps in *nanoseconds* by default](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/#timestamp),
+while VictoriaMetrics stores them with *milliseconds* precision.
+

 ### How to send data from Graphite-compatible agents such as [StatsD](https://github.com/etsy/statsd)?

@@ -271,7 +285,7 @@ An arbitrary number of lines delimited by `\n` may be sent in one go.
 After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:

 ```
-curl -G 'http://localhost:8428/api/v1/export' --data-urlencode 'match={__name__!=""}'
+curl -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}'
 ```

 The `/api/v1/export` endpoint should return the following response:
@@ -291,8 +305,13 @@ or via [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi/blob/mas

 ### How to send data from OpenTSDB-compatible agents?

+VictoriaMetrics supports [telnet put protocol](http://opentsdb.net/docs/build/html/api_telnet/put.html)
+and [HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html) for ingesting OpenTSDB data.
+
+#### Sending data via `telnet put` protocol
+
 1) Enable OpenTSDB receiver in VictoriaMetrics by setting `-opentsdbListenAddr` command line flag. For instance,
-the following command will enable OpenTSDB receiver in VictoriaMetrics on TCP and UDP port `4242`:
+the following command enables OpenTSDB receiver in VictoriaMetrics on TCP and UDP port `4242`:

 ```
 /path/to/victoria-metrics-prod -opentsdbListenAddr=:4242
@@ -311,7 +330,7 @@ An arbitrary number of lines delimited by `\n` may be sent in one go.
 After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:

 ```
-curl -G 'http://localhost:8428/api/v1/export' --data-urlencode 'match={__name__!=""}'
+curl -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}'
 ```

 The `/api/v1/export` endpoint should return the following response:
@@ -321,6 +340,44 @@ The `/api/v1/export` endpoint should return the following response:
 ```


+#### Sending OpenTSDB data via HTTP `/api/put` requests
+
+1) Enable HTTP server for OpenTSDB `/api/put` requests by setting `-opentsdbHTTPListenAddr` command line flag. For instance,
+the following command enables OpenTSDB HTTP server on port `4242`:
+
+```
+/path/to/victoria-metrics-prod -opentsdbHTTPListenAddr=:4242
+```
+
+2) Send data to the given address from OpenTSDB-compatible agents.
+
+Example for writing a single data point:
+
+```
+curl -H 'Content-Type: application/json' -d '{"metric":"x.y.z","value":45.34,"tags":{"t1":"v1","t2":"v2"}}' http://localhost:4242/api/put
+```
+
+Example for writing multiple data points in a single request:
+
+```
+curl -H 'Content-Type: application/json' -d '[{"metric":"foo","value":45.34},{"metric":"bar","value":43}]' http://localhost:4242/api/put
+```
+
+After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:
+
+```
+curl -G 'http://localhost:8428/api/v1/export' -d 'match[]=x.y.z' -d 'match[]=foo' -d 'match[]=bar'
+```
+
+The `/api/v1/export` endpoint should return the following response:
+
+```
+{"metric":{"__name__":"foo"},"values":[45.34],"timestamps":[1566464846000]}
+{"metric":{"__name__":"bar"},"values":[43],"timestamps":[1566464846000]}
+{"metric":{"__name__":"x.y.z","t1":"v1","t2":"v2"},"values":[45.34],"timestamps":[1566464763000]}
+```
+
+
 ### How to build from sources

 We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
@@ -427,6 +484,9 @@ where `<timeseries_selector_for_delete>` may contain any [time series selector](
 for metrics to delete. After that all the time series matching the given selector are deleted. Storage space for
 the deleted time series isn't freed instantly - it is freed during subsequent merges of data files.

+It is recommended verifying which metrics will be deleted with the call to `http://<victoria-metrics-addr>:8428/api/v1/series?match[]=<timeseries_selector_for_delete>`
+before actually deleting the metrics.
+

 ### How to export time series?

@@ -524,7 +584,7 @@ kill -HUP `pidof prometheus`


 If you have Prometheus HA pairs with replicas `r1` and `r2` in each pair, then configure each `r1`
-to write data to `<victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.
+to write data to `victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`.


 ### Multiple retentions
@@ -634,6 +694,35 @@ The most interesting metrics are:
  of data loss stored in the broken parts. In the future, `vmrecover` tool will be created
  for automatic recovering from such errors.

+
+### Backfilling
+
+Make sure that configured `-retentionPeriod` covers timestamps for the backfilled data.
+
+It is recommended disabling query cache with `-search.disableCache` command-line flag when writing
+historical data with timestamps from the past, since the cache assumes that the data is written with
+the current timestamps. Query cache can be enabled after the backfilling is complete.
+
+
+### Profiling
+
+VictoriaMetrics provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
+
+- Memory profile. It can be collected with the following command:
+```
+curl -s http://<victoria-metrics-host>:8428/debug/pprof/heap > mem.pprof
+```
+
+- CPU profile. It can be collected with the following command:
+```
+curl -s http://<victoria-metrics-host>:8428/debug/pprof/profile > cpu.pprof
+```
+
+The command for collecting CPU profile waits for 30 seconds before returning.
+
+The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
+
+
 ## Roadmap

 - [ ] Replication [#118](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/118)
--- a/app/victoria-metrics/Makefile
+++ b/app/victoria-metrics/Makefile
@@ -1,7 +1,7 @@
 # All these commands must run from repository root.

 victoria-metrics:
-	GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/victoria-metrics ./app/victoria-metrics
+	APP_NAME=victoria-metrics $(MAKE) app-local

 victoria-metrics-prod:
 	APP_NAME=victoria-metrics $(MAKE) app-via-docker
@@ -33,7 +33,7 @@ victoria-metrics-arm64-prod:
 	APP_NAME=victoria-metrics APP_SUFFIX='-arm64' DOCKER_OPTS='--env CGO_ENABLED=0 --env GOARCH=arm64' $(MAKE) app-via-docker

 victoria-metrics-pure:
-	GO111MODULE=on CGO_ENABLED=0 go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/victoria-metrics-pure ./app/victoria-metrics
+	APP_NAME=victoria-metrics $(MAKE) app-local-pure

 victoria-metrics-pure-prod:
 	APP_NAME=victoria-metrics APP_SUFFIX='-pure' DOCKER_OPTS='--env CGO_ENABLED=0' $(MAKE) app-via-docker
--- a/app/victoria-metrics/main.go
+++ b/app/victoria-metrics/main.go
@@ -9,6 +9,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
@@ -43,6 +44,8 @@ func main() {
 	vmstorage.Stop()
 	vmselect.Stop()

+	fs.MustStopDirRemover()
+
 	logger.Infof("the VictoriaMetrics has been stopped in %s", time.Since(startTime))
 }

--- a/app/victoria-metrics/main_test.go
+++ b/app/victoria-metrics/main_test.go
@@ -21,6 +21,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 )
@@ -92,7 +93,7 @@ func setUp() {

 func processFlags() {
 	flag.Parse()
-	for _, fs := range []struct {
+	for _, fv := range []struct {
 		flag  string
 		value string
 	}{
@@ -103,8 +104,8 @@ func processFlags() {
 		{flag: "loggerLevel", value: testLogLevel},
 	} {
 		// panics if flag doesn't exist
-		if err := flag.Lookup(fs.flag).Value.Set(fs.value); err != nil {
-			log.Fatalf("unable to set %q with value %q, err: %v", fs.flag, fs.value, err)
+		if err := flag.Lookup(fv.flag).Value.Set(fv.value); err != nil {
+			log.Fatalf("unable to set %q with value %q, err: %v", fv.flag, fv.value, err)
 		}
 	}
 }
@@ -121,13 +122,14 @@ func waitFor(timeout time.Duration, f func() bool) error {
 }

 func tearDown() {
-	vminsert.Stop()
-	vmstorage.Stop()
-	vmselect.Stop()
 	if err := httpserver.Stop(*httpListenAddr); err != nil {
 		log.Fatalf("cannot stop the webservice: %s", err)
 	}
-	os.RemoveAll(storagePath)
+	vminsert.Stop()
+	vmstorage.Stop()
+	vmselect.Stop()
+	fs.MustRemoveAll(storagePath)
+	fs.MustStopDirRemover()
 }

 func TestWriteRead(t *testing.T) {
--- a/app/vminsert/common/gzip_reader.go
+++ b/app/vminsert/common/gzip_reader.go
@@ -0,0 +1,30 @@
+package common
+
+import (
+	"compress/gzip"
+	"io"
+	"sync"
+)
+
+// GetGzipReader returns new gzip reader from the pool.
+//
+// Return back the gzip reader when it no longer needed with PutGzipReader.
+func GetGzipReader(r io.Reader) (*gzip.Reader, error) {
+	v := gzipReaderPool.Get()
+	if v == nil {
+		return gzip.NewReader(r)
+	}
+	zr := v.(*gzip.Reader)
+	if err := zr.Reset(r); err != nil {
+		return nil, err
+	}
+	return zr, nil
+}
+
+// PutGzipReader returns back gzip reader obtained via GetGzipReader.
+func PutGzipReader(zr *gzip.Reader) {
+	_ = zr.Close()
+	gzipReaderPool.Put(zr)
+}
+
+var gzipReaderPool sync.Pool
--- a/app/vminsert/common/insert_ctx.go
+++ b/app/vminsert/common/insert_ctx.go
@@ -2,9 +2,11 @@ package common

 import (
 	"fmt"
+	"net/http"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 )
@@ -99,7 +101,10 @@ func (ctx *InsertCtx) AddLabel(name, value string) {
 // FlushBufs flushes buffered rows to the underlying storage.
 func (ctx *InsertCtx) FlushBufs() error {
 	if err := vmstorage.AddRows(ctx.mrs); err != nil {
-		return fmt.Errorf("cannot store metrics: %s", err)
+		return &httpserver.ErrorWithStatusCode{
+			Err:        fmt.Errorf("cannot store metrics: %s", err),
+			StatusCode: http.StatusServiceUnavailable,
+		}
 	}
 	return nil
 }
--- a/app/vminsert/concurrencylimiter/concurrencylimiter.go
+++ b/app/vminsert/concurrencylimiter/concurrencylimiter.go
@@ -3,9 +3,11 @@ package concurrencylimiter
 import (
 	"flag"
 	"fmt"
+	"net/http"
 	"runtime"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
 	"github.com/VictoriaMetrics/metrics"
 )
@@ -32,6 +34,17 @@ func Init() {
 func Do(f func() error) error {
 	// Limit the number of conurrent f calls in order to prevent from excess
 	// memory usage and CPU trashing.
+	select {
+	case ch <- struct{}{}:
+		err := f()
+		<-ch
+		return err
+	default:
+	}
+
+	// All the workers are busy.
+	// Sleep for up to waitDuration.
+	concurrencyLimitReached.Inc()
 	t := timerpool.Get(waitDuration)
 	select {
 	case ch <- struct{}{}:
@@ -41,9 +54,22 @@ func Do(f func() error) error {
 		return err
 	case <-t.C:
 		timerpool.Put(t)
-		concurrencyLimitErrors.Inc()
-		return fmt.Errorf("the server is overloaded with %d concurrent inserts; either increase -maxConcurrentInserts or reduce the load", cap(ch))
+		concurrencyLimitTimeout.Inc()
+		return &httpserver.ErrorWithStatusCode{
+			Err:        fmt.Errorf("the server is overloaded with %d concurrent inserts; either increase -maxConcurrentInserts or reduce the load", cap(ch)),
+			StatusCode: http.StatusServiceUnavailable,
+		}
 	}
 }

-var concurrencyLimitErrors = metrics.NewCounter(`vm_concurrency_limit_errors_total`)
+var (
+	concurrencyLimitReached = metrics.NewCounter(`vm_concurrent_insert_limit_reached_total`)
+	concurrencyLimitTimeout = metrics.NewCounter(`vm_concurrent_insert_limit_timeout_total`)
+
+	_ = metrics.NewGauge(`vm_concurrent_insert_capacity`, func() float64 {
+		return float64(cap(ch))
+	})
+	_ = metrics.NewGauge(`vm_concurrent_insert_current`, func() float64 {
+		return float64(len(ch))
+	})
+)
--- a/app/vminsert/graphite/parser.go
+++ b/app/vminsert/graphite/parser.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"strings"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/fastjson/fastfloat"
 )

@@ -34,13 +36,8 @@ func (rs *Rows) Reset() {
 // See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
 //
 // s must be unchanged until rs is in use.
-func (rs *Rows) Unmarshal(s string) error {
-	var err error
-	rs.Rows, rs.tagsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
-	if err != nil {
-		return err
-	}
-	return err
+func (rs *Rows) Unmarshal(s string) {
+	rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
 }

 // Row is a single graphite row.
@@ -83,6 +80,9 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
 		tags := tagsPool[tagsStart:]
 		r.Tags = tags[:len(tags):len(tags)]
 	}
+	if len(r.Metric) == 0 {
+		return tagsPool, fmt.Errorf("metric cannot be empty")
+	}

 	n = strings.IndexByte(tail, ' ')
 	if n < 0 {
@@ -95,41 +95,46 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
 	return tagsPool, nil
 }

-func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag, error) {
+func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
 	for len(s) > 0 {
 		n := strings.IndexByte(s, '\n')
-		if n == 0 {
-			// Skip empty line
-			s = s[1:]
-			continue
-		}
-		if cap(dst) > len(dst) {
-			dst = dst[:len(dst)+1]
-		} else {
-			dst = append(dst, Row{})
-		}
-		r := &dst[len(dst)-1]
 		if n < 0 {
 			// The last line.
-			var err error
-			tagsPool, err = r.unmarshal(s, tagsPool)
-			if err != nil {
-				err = fmt.Errorf("cannot unmarshal Graphite line %q: %s", s, err)
-				return dst, tagsPool, err
-			}
-			return dst, tagsPool, nil
-		}
-		var err error
-		tagsPool, err = r.unmarshal(s[:n], tagsPool)
-		if err != nil {
-			err = fmt.Errorf("cannot unmarshal Graphite line %q: %s", s[:n], err)
-			return dst, tagsPool, err
+			return unmarshalRow(dst, s, tagsPool)
 		}
+		dst, tagsPool = unmarshalRow(dst, s[:n], tagsPool)
 		s = s[n+1:]
 	}
-	return dst, tagsPool, nil
+	return dst, tagsPool
 }

+func unmarshalRow(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
+	if len(s) > 0 && s[len(s)-1] == '\r' {
+		s = s[:len(s)-1]
+	}
+	if len(s) == 0 {
+		// Skip empty line
+		return dst, tagsPool
+	}
+
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	var err error
+	tagsPool, err = r.unmarshal(s, tagsPool)
+	if err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal Graphite line %q: %s", s, err)
+		invalidLines.Inc()
+	}
+	return dst, tagsPool
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="graphite"}`)
+
 func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
 	for {
 		if cap(dst) > len(dst) {
@@ -145,12 +150,20 @@ func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
 			if err := tag.unmarshal(s); err != nil {
 				return dst[:len(dst)-1], err
 			}
+			if len(tag.Key) == 0 || len(tag.Value) == 0 {
+				// Skip empty tag
+				dst = dst[:len(dst)-1]
+			}
 			return dst, nil
 		}
 		if err := tag.unmarshal(s[:n]); err != nil {
 			return dst[:len(dst)-1], err
 		}
 		s = s[n+1:]
+		if len(tag.Key) == 0 || len(tag.Value) == 0 {
+			// Skip empty tag
+			dst = dst[:len(dst)-1]
+		}
 	}
 }

@@ -172,9 +185,6 @@ func (t *Tag) unmarshal(s string) error {
 		return fmt.Errorf("missing tag value for %q", s)
 	}
 	t.Key = s[:n]
-	if len(t.Key) == 0 {
-		return fmt.Errorf("tag key cannot be empty for %q", s)
-	}
 	t.Value = s[n+1:]
 	return nil
 }
--- a/app/vminsert/graphite/parser_test.go
+++ b/app/vminsert/graphite/parser_test.go
@@ -9,45 +9,42 @@ func TestRowsUnmarshalFailure(t *testing.T) {
 	f := func(s string) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
 		}

 		// Try again
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
 		}
 	}

+	// Missing metric
+	f(" 123 455")
+
 	// Missing value
 	f("aaa")

-	// Invalid multiline
-	f("aaa\nbbb 123 34")
-
 	// missing tag
 	f("aa; 12 34")

 	// missing tag value
 	f("aa;bb 23 34")
-	f("aa;=dsd 234 45")
 }

 func TestRowsUnmarshalSuccess(t *testing.T) {
 	f := func(s string, rowsExpected *Rows) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}

 		// Try unmarshaling again
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}
@@ -60,7 +57,9 @@ func TestRowsUnmarshalSuccess(t *testing.T) {

 	// Empty line
 	f("", &Rows{})
+	f("\r", &Rows{})
 	f("\n\n", &Rows{})
+	f("\n\r\n", &Rows{})

 	// Single line
 	f("foobar -123.456 789", &Rows{
@@ -98,7 +97,8 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			Timestamp: 2,
 		}},
 	})
-	f("foo;bar=baz;aa=;x=y 1 2", &Rows{
+	// Empty tags
+	f("foo;bar=baz;aa=;x=y;=z 1 2", &Rows{
 		Rows: []Row{{
 			Metric: "foo",
 			Tags: []Tag{
@@ -106,10 +106,6 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 					Key:   "bar",
 					Value: "baz",
 				},
-				{
-					Key:   "aa",
-					Value: "",
-				},
 				{
 					Key:   "x",
 					Value: "y",
@@ -139,4 +135,20 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			},
 		},
 	})
+
+	// Multi lines with invalid line
+	f("foo 0.3 2\naaa\nbar.baz 0.34 43\n", &Rows{
+		Rows: []Row{
+			{
+				Metric:    "foo",
+				Value:     0.3,
+				Timestamp: 2,
+			},
+			{
+				Metric:    "bar.baz",
+				Value:     0.34,
+				Timestamp: 43,
+			},
+		},
+	})
 }
--- a/app/vminsert/graphite/parser_timing_test.go
+++ b/app/vminsert/graphite/parser_timing_test.go
@@ -16,8 +16,9 @@ cpu.usage_irq 0.34432 1234556768
 	b.RunParallel(func(pb *testing.PB) {
 		var rows Rows
 		for pb.Next() {
-			if err := rows.Unmarshal(s); err != nil {
-				panic(fmt.Errorf("cannot unmarshal %q: %s", s, err))
+			rows.Unmarshal(s)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of rows unmarshaled: got %d; want 4", len(rows.Rows)))
 			}
 		}
 	})
--- a/app/vminsert/graphite/request_handler.go
+++ b/app/vminsert/graphite/request_handler.go
@@ -85,11 +85,7 @@ func (ctx *pushCtx) Read(r io.Reader) bool {
 			return false
 		}
 	}
-	if err := ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf)); err != nil {
-		graphiteUnmarshalErrors.Inc()
-		ctx.err = fmt.Errorf("cannot unmarshal graphite plaintext protocol data with size %d: %s", len(ctx.reqBuf), err)
-		return false
-	}
+	ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))

 	// Fill missing timestamps with the current timestamp rounded to seconds.
 	currentTimestamp := time.Now().Unix()
@@ -136,9 +132,8 @@ func (ctx *pushCtx) reset() {
 }

 var (
-	graphiteReadCalls       = metrics.NewCounter(`vm_read_calls_total{name="graphite"}`)
-	graphiteReadErrors      = metrics.NewCounter(`vm_read_errors_total{name="graphite"}`)
-	graphiteUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="graphite"}`)
+	graphiteReadCalls  = metrics.NewCounter(`vm_read_calls_total{name="graphite"}`)
+	graphiteReadErrors = metrics.NewCounter(`vm_read_errors_total{name="graphite"}`)
 )

 func getPushCtx() *pushCtx {
--- a/app/vminsert/influx/parser.go
+++ b/app/vminsert/influx/parser.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"strings"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/fastjson/fastfloat"
 )

@@ -41,13 +43,8 @@ func (rs *Rows) Reset() {
 // See https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/
 //
 // s must be unchanged until rs is in use.
-func (rs *Rows) Unmarshal(s string) error {
-	var err error
-	rs.Rows, rs.tagsPool, rs.fieldsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0], rs.fieldsPool[:0])
-	if err != nil {
-		return err
-	}
-	return err
+func (rs *Rows) Unmarshal(s string) {
+	rs.Rows, rs.tagsPool, rs.fieldsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0], rs.fieldsPool[:0])
 }

 // Row is a single influx row.
@@ -65,9 +62,8 @@ func (r *Row) reset() {
 	r.Timestamp = 0
 }

-func (r *Row) unmarshal(s string, tagsPool []Tag, fieldsPool []Field) ([]Tag, []Field, error) {
+func (r *Row) unmarshal(s string, tagsPool []Tag, fieldsPool []Field, noEscapeChars bool) ([]Tag, []Field, error) {
 	r.reset()
-	noEscapeChars := strings.IndexByte(s, '\\') < 0
 	n := nextUnescapedChar(s, ' ', noEscapeChars)
 	if n < 0 {
 		return tagsPool, fieldsPool, fmt.Errorf("cannot find Whitespace I in %q", s)
@@ -141,9 +137,6 @@ func (tag *Tag) unmarshal(s string, noEscapeChars bool) error {
 		return fmt.Errorf("missing tag value for %q", s)
 	}
 	tag.Key = unescapeTagValue(s[:n], noEscapeChars)
-	if len(tag.Key) == 0 {
-		return fmt.Errorf("tag key cannot be empty")
-	}
 	tag.Value = unescapeTagValue(s[n+1:], noEscapeChars)
 	return nil
 }
@@ -177,41 +170,51 @@ func (f *Field) unmarshal(s string, noEscapeChars, hasQuotedFields bool) error {
 	return nil
 }

-func unmarshalRows(dst []Row, s string, tagsPool []Tag, fieldsPool []Field) ([]Row, []Tag, []Field, error) {
+func unmarshalRows(dst []Row, s string, tagsPool []Tag, fieldsPool []Field) ([]Row, []Tag, []Field) {
+	noEscapeChars := strings.IndexByte(s, '\\') < 0
 	for len(s) > 0 {
 		n := strings.IndexByte(s, '\n')
-		if n == 0 {
-			// Skip empty line
-			s = s[1:]
-			continue
-		}
-		if cap(dst) > len(dst) {
-			dst = dst[:len(dst)+1]
-		} else {
-			dst = append(dst, Row{})
-		}
-		r := &dst[len(dst)-1]
 		if n < 0 {
 			// The last line.
-			var err error
-			tagsPool, fieldsPool, err = r.unmarshal(s, tagsPool, fieldsPool)
-			if err != nil {
-				err = fmt.Errorf("cannot unmarshal Influx line %q: %s", s, err)
-				return dst, tagsPool, fieldsPool, err
-			}
-			return dst, tagsPool, fieldsPool, nil
-		}
-		var err error
-		tagsPool, fieldsPool, err = r.unmarshal(s[:n], tagsPool, fieldsPool)
-		if err != nil {
-			err = fmt.Errorf("cannot unmarshal Influx line %q: %s", s[:n], err)
-			return dst, tagsPool, fieldsPool, err
+			return unmarshalRow(dst, s, tagsPool, fieldsPool, noEscapeChars)
 		}
+		dst, tagsPool, fieldsPool = unmarshalRow(dst, s[:n], tagsPool, fieldsPool, noEscapeChars)
 		s = s[n+1:]
 	}
-	return dst, tagsPool, fieldsPool, nil
+	return dst, tagsPool, fieldsPool
 }

+func unmarshalRow(dst []Row, s string, tagsPool []Tag, fieldsPool []Field, noEscapeChars bool) ([]Row, []Tag, []Field) {
+	if len(s) > 0 && s[len(s)-1] == '\r' {
+		s = s[:len(s)-1]
+	}
+	if len(s) == 0 {
+		// Skip empty line
+		return dst, tagsPool, fieldsPool
+	}
+	if s[0] == '#' {
+		// Skip comment
+		return dst, tagsPool, fieldsPool
+	}
+
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	var err error
+	tagsPool, fieldsPool, err = r.unmarshal(s, tagsPool, fieldsPool, noEscapeChars)
+	if err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal Influx line %q: %s; skipping it", s, err)
+		invalidLines.Inc()
+	}
+	return dst, tagsPool, fieldsPool
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="influx"}`)
+
 func unmarshalTags(dst []Tag, s string, noEscapeChars bool) ([]Tag, error) {
 	for {
 		if cap(dst) > len(dst) {
@@ -223,14 +226,22 @@ func unmarshalTags(dst []Tag, s string, noEscapeChars bool) ([]Tag, error) {
 		n := nextUnescapedChar(s, ',', noEscapeChars)
 		if n < 0 {
 			if err := tag.unmarshal(s, noEscapeChars); err != nil {
-				return dst, err
+				return dst[:len(dst)-1], err
+			}
+			if len(tag.Key) == 0 || len(tag.Value) == 0 {
+				// Skip empty tag
+				dst = dst[:len(dst)-1]
 			}
 			return dst, nil
 		}
 		if err := tag.unmarshal(s[:n], noEscapeChars); err != nil {
-			return dst, err
+			return dst[:len(dst)-1], err
 		}
 		s = s[n+1:]
+		if len(tag.Key) == 0 || len(tag.Value) == 0 {
+			// Skip empty tag
+			dst = dst[:len(dst)-1]
+		}
 	}
 }

--- a/app/vminsert/influx/parser_test.go
+++ b/app/vminsert/influx/parser_test.go
@@ -74,13 +74,15 @@ func TestRowsUnmarshalFailure(t *testing.T) {
 	f := func(s string) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("expecting zero rows; got %d rows", len(rows.Rows))
 		}

 		// Try again
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("expecting zero rows; got %d rows", len(rows.Rows))
 		}
 	}

@@ -94,12 +96,8 @@ func TestRowsUnmarshalFailure(t *testing.T) {
 	// Missing tag value
 	f("foo,bar")
 	f("foo,bar baz")
-	f("foo,bar= baz")
 	f("foo,bar=123, 123")

-	// Missing tag name
-	f("foo,=bar baz=234")
-
 	// Missing field value
 	f("foo bar")
 	f("foo bar=")
@@ -122,17 +120,13 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	f := func(s string, rowsExpected *Rows) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}

 		// Try unmarshaling again
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}
@@ -146,6 +140,12 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	// Empty line
 	f("", &Rows{})
 	f("\n\n", &Rows{})
+	f("\n\r\n", &Rows{})
+
+	// Comment
+	f("\n# foobar\n", &Rows{})
+	f("#foobar baz", &Rows{})
+	f("#foobar baz\n#sss", &Rows{})

 	// Minimal line without tags and timestamp
 	f("foo bar=123", &Rows{
@@ -157,6 +157,15 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			}},
 		}},
 	})
+	f("# comment\nfoo bar=123\r\n#comment2 sdsf dsf", &Rows{
+		Rows: []Row{{
+			Measurement: "foo",
+			Fields: []Field{{
+				Key:   "bar",
+				Value: 123,
+			}},
+		}},
+	})
 	f("foo bar=123\n", &Rows{
 		Rows: []Row{{
 			Measurement: "foo",
@@ -216,7 +225,7 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	})

 	// Line with empty tag values
-	f("foo,tag1=xyz,tagN=,tag2=43as bar=123", &Rows{
+	f("foo,tag1=xyz,tagN=,tag2=43as,=xxx bar=123", &Rows{
 		Rows: []Row{{
 			Measurement: "foo",
 			Tags: []Tag{
@@ -224,10 +233,6 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 					Key:   "tag1",
 					Value: "xyz",
 				},
-				{
-					Key:   "tagN",
-					Value: "",
-				},
 				{
 					Key:   "tag2",
 					Value: "43as",
@@ -309,11 +314,11 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 	})

 	// Escape chars
-	f(`fo\,bar\=baz,x\==\\a\,\=\q\  \\\a\=\,=4.34`, &Rows{
+	f(`fo\,bar\=baz,x\=\b=\\a\,\=\q\  \\\a\=\,=4.34`, &Rows{
 		Rows: []Row{{
 			Measurement: `fo,bar=baz`,
 			Tags: []Tag{{
-				Key:   `x=`,
+				Key:   `x=\b`,
 				Value: `\a,=\q `,
 			}},
 			Fields: []Field{{
@@ -348,6 +353,34 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			},
 		},
 	})
+
+	// Multiple lines with invalid line in the middle.
+	f("foo,tag=xyz field=1.23 48934\n"+
+		"invalid line\n"+
+		"bar x=-1i\n\n", &Rows{
+		Rows: []Row{
+			{
+				Measurement: "foo",
+				Tags: []Tag{{
+					Key:   "tag",
+					Value: "xyz",
+				}},
+				Fields: []Field{{
+					Key:   "field",
+					Value: 1.23,
+				}},
+				Timestamp: 48934,
+			},
+			{
+				Measurement: "bar",
+				Fields: []Field{{
+					Key:   "x",
+					Value: -1,
+				}},
+			},
+		},
+	})
+
 	// No newline after the second line.
 	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/82
 	f("foo,tag=xyz field=1.23 48934\n"+
@@ -374,4 +407,24 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			},
 		},
 	})
+
+	f("x,y=z,g=p:\\ \\ 5432\\,\\ gp\\ mon\\ [lol]\\ con10\\ cmd5\\ SELECT f=1", &Rows{
+		Rows: []Row{{
+			Measurement: "x",
+			Tags: []Tag{
+				{
+					Key:   "y",
+					Value: "z",
+				},
+				{
+					Key:   "g",
+					Value: "p:  5432, gp mon [lol] con10 cmd5 SELECT",
+				},
+			},
+			Fields: []Field{{
+				Key:   "f",
+				Value: 1,
+			}},
+		}},
+	})
 }
--- a/app/vminsert/influx/parser_timing_test.go
+++ b/app/vminsert/influx/parser_timing_test.go
@@ -6,14 +6,19 @@ import (
 )

 func BenchmarkRowsUnmarshal(b *testing.B) {
-	s := `cpu usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 1234556768`
+	s := `cpu usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 1234556768
+cpu usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 123455676344
+aaa usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 123455676344
+bbb usage_user=1.23,usage_system=4.34,usage_iowait=0.1112 123455676344
+`
 	b.SetBytes(int64(len(s)))
 	b.ReportAllocs()
 	b.RunParallel(func(pb *testing.PB) {
 		var rows Rows
 		for pb.Next() {
-			if err := rows.Unmarshal(s); err != nil {
-				panic(fmt.Errorf("cannot unmarshal %q: %s", s, err))
+			rows.Unmarshal(s)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of rows parsed; got %d; want 4", len(rows.Rows)))
 			}
 		}
 	})
--- a/app/vminsert/influx/request_handler.go
+++ b/app/vminsert/influx/request_handler.go
@@ -1,7 +1,6 @@
 package influx

 import (
-	"compress/gzip"
 	"flag"
 	"fmt"
 	"io"
@@ -41,11 +40,11 @@ func insertHandlerInternal(req *http.Request) error {

 	r := req.Body
 	if req.Header.Get("Content-Encoding") == "gzip" {
-		zr, err := getGzipReader(r)
+		zr, err := common.GetGzipReader(r)
 		if err != nil {
 			return fmt.Errorf("cannot read gzipped influx line protocol data: %s", err)
 		}
-		defer putGzipReader(zr)
+		defer common.PutGzipReader(zr)
 		r = zr
 	}

@@ -91,11 +90,17 @@ func (ctx *pushCtx) InsertRows(db string) error {
 	for i := range rows {
 		r := &rows[i]
 		ic.Labels = ic.Labels[:0]
-		ic.AddLabel("db", db)
+		hasDBLabel := false
 		for j := range r.Tags {
 			tag := &r.Tags[j]
+			if tag.Key == "db" {
+				hasDBLabel = true
+			}
 			ic.AddLabel(tag.Key, tag.Value)
 		}
+		if len(db) > 0 && !hasDBLabel {
+			ic.AddLabel("db", db)
+		}
 		ctx.metricNameBuf = storage.MarshalMetricNameRaw(ctx.metricNameBuf[:0], ic.Labels)
 		ctx.metricGroupBuf = append(ctx.metricGroupBuf[:0], r.Measurement...)
 		skipFieldKey := len(r.Fields) == 1 && *skipSingleField
@@ -120,25 +125,6 @@ func (ctx *pushCtx) InsertRows(db string) error {
 	return ic.FlushBufs()
 }

-func getGzipReader(r io.Reader) (*gzip.Reader, error) {
-	v := gzipReaderPool.Get()
-	if v == nil {
-		return gzip.NewReader(r)
-	}
-	zr := v.(*gzip.Reader)
-	if err := zr.Reset(r); err != nil {
-		return nil, err
-	}
-	return zr, nil
-}
-
-func putGzipReader(zr *gzip.Reader) {
-	_ = zr.Close()
-	gzipReaderPool.Put(zr)
-}
-
-var gzipReaderPool sync.Pool
-
 func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
 	if ctx.err != nil {
 		return false
@@ -151,11 +137,7 @@ func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
 		}
 		return false
 	}
-	if err := ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf)); err != nil {
-		influxUnmarshalErrors.Inc()
-		ctx.err = fmt.Errorf("cannot unmarshal influx line protocol data with size %d: %s", len(ctx.reqBuf), err)
-		return false
-	}
+	ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))

 	// Adjust timestamps according to tsMultiplier
 	currentTs := time.Now().UnixNano() / 1e6
@@ -184,9 +166,8 @@ func (ctx *pushCtx) Read(r io.Reader, tsMultiplier int64) bool {
 }

 var (
-	influxReadCalls       = metrics.NewCounter(`vm_read_calls_total{name="influx"}`)
-	influxReadErrors      = metrics.NewCounter(`vm_read_errors_total{name="influx"}`)
-	influxUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="influx"}`)
+	influxReadCalls  = metrics.NewCounter(`vm_read_calls_total{name="influx"}`)
+	influxReadErrors = metrics.NewCounter(`vm_read_errors_total{name="influx"}`)
 )

 type pushCtx struct {
--- a/app/vminsert/main.go
+++ b/app/vminsert/main.go
@@ -10,19 +10,25 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/graphite"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/influx"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdb"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdbhttp"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/prometheus"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 )

 var (
-	graphiteListenAddr   = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
-	opentsdbListenAddr   = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB put messages. Usually :4242 must be set. Doesn't work if empty")
-	maxInsertRequestSize = flag.Int("maxInsertRequestSize", 32*1024*1024, "The maximum size of a single insert request in bytes")
+	graphiteListenAddr     = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
+	opentsdbListenAddr     = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB put messages. Usually :4242 must be set. Doesn't work if empty")
+	opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
+	maxInsertRequestSize   = flag.Int("maxInsertRequestSize", 32*1024*1024, "The maximum size of a single insert request in bytes")
+	maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 30, "The maximum number of labels accepted per time series. Superflouos labels are dropped")
 )

 // Init initializes vminsert.
 func Init() {
+	storage.SetMaxLabelsPerTimeseries(*maxLabelsPerTimeseries)
+
 	concurrencylimiter.Init()
 	if len(*graphiteListenAddr) > 0 {
 		go graphite.Serve(*graphiteListenAddr)
@@ -30,6 +36,9 @@ func Init() {
 	if len(*opentsdbListenAddr) > 0 {
 		go opentsdb.Serve(*opentsdbListenAddr)
 	}
+	if len(*opentsdbHTTPListenAddr) > 0 {
+		go opentsdbhttp.Serve(*opentsdbHTTPListenAddr, int64(*maxInsertRequestSize))
+	}
 }

 // Stop stops vminsert.
@@ -40,6 +49,9 @@ func Stop() {
 	if len(*opentsdbListenAddr) > 0 {
 		opentsdb.Stop()
 	}
+	if len(*opentsdbHTTPListenAddr) > 0 {
+		opentsdbhttp.Stop()
+	}
 }

 // RequestHandler is a handler for Prometheus remote storage write API
--- a/app/vminsert/opentsdb/parser.go
+++ b/app/vminsert/opentsdb/parser.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"strings"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
 	"github.com/valyala/fastjson/fastfloat"
 )

@@ -34,13 +36,8 @@ func (rs *Rows) Reset() {
 // See http://opentsdb.net/docs/build/html/api_telnet/put.html
 //
 // s must be unchanged until rs is in use.
-func (rs *Rows) Unmarshal(s string) error {
-	var err error
-	rs.Rows, rs.tagsPool, err = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
-	if err != nil {
-		return err
-	}
-	return err
+func (rs *Rows) Unmarshal(s string) {
+	rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], s, rs.tagsPool[:0])
 }

 // Row is a single OpenTSDB row.
@@ -69,6 +66,9 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
 		return tagsPool, fmt.Errorf("cannot find whitespace between metric and timestamp in %q", s)
 	}
 	r.Metric = s[:n]
+	if len(r.Metric) == 0 {
+		return tagsPool, fmt.Errorf("metric cannot be empty")
+	}
 	tail := s[n+1:]
 	n = strings.IndexByte(tail, ' ')
 	if n < 0 {
@@ -92,41 +92,46 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) {
 	return tagsPool, nil
 }

-func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag, error) {
+func unmarshalRows(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
 	for len(s) > 0 {
 		n := strings.IndexByte(s, '\n')
-		if n == 0 {
-			// Skip empty line
-			s = s[1:]
-			continue
-		}
-		if cap(dst) > len(dst) {
-			dst = dst[:len(dst)+1]
-		} else {
-			dst = append(dst, Row{})
-		}
-		r := &dst[len(dst)-1]
 		if n < 0 {
 			// The last line.
-			var err error
-			tagsPool, err = r.unmarshal(s, tagsPool)
-			if err != nil {
-				err = fmt.Errorf("cannot unmarshal OpenTSDB line %q: %s", s, err)
-				return dst, tagsPool, err
-			}
-			return dst, tagsPool, nil
-		}
-		var err error
-		tagsPool, err = r.unmarshal(s[:n], tagsPool)
-		if err != nil {
-			err = fmt.Errorf("cannot unmarshal OpenTSDB line %q: %s", s[:n], err)
-			return dst, tagsPool, err
+			return unmarshalRow(dst, s, tagsPool)
 		}
+		dst, tagsPool = unmarshalRow(dst, s[:n], tagsPool)
 		s = s[n+1:]
 	}
-	return dst, tagsPool, nil
+	return dst, tagsPool
 }

+func unmarshalRow(dst []Row, s string, tagsPool []Tag) ([]Row, []Tag) {
+	if len(s) > 0 && s[len(s)-1] == '\r' {
+		s = s[:len(s)-1]
+	}
+	if len(s) == 0 {
+		// Skip empty line
+		return dst, tagsPool
+	}
+
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	var err error
+	tagsPool, err = r.unmarshal(s, tagsPool)
+	if err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal OpenTSDB line %q: %s", s, err)
+		invalidLines.Inc()
+	}
+	return dst, tagsPool
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="opentsdb"}`)
+
 func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
 	for {
 		if cap(dst) > len(dst) {
@@ -142,12 +147,20 @@ func unmarshalTags(dst []Tag, s string) ([]Tag, error) {
 			if err := tag.unmarshal(s); err != nil {
 				return dst[:len(dst)-1], err
 			}
+			if len(tag.Key) == 0 || len(tag.Value) == 0 {
+				// Skip empty tag
+				dst = dst[:len(dst)-1]
+			}
 			return dst, nil
 		}
 		if err := tag.unmarshal(s[:n]); err != nil {
 			return dst[:len(dst)-1], err
 		}
 		s = s[n+1:]
+		if len(tag.Key) == 0 || len(tag.Value) == 0 {
+			// Skip empty tag
+			dst = dst[:len(dst)-1]
+		}
 	}
 }

@@ -169,9 +182,6 @@ func (t *Tag) unmarshal(s string) error {
 		return fmt.Errorf("missing tag value for %q", s)
 	}
 	t.Key = s[:n]
-	if len(t.Key) == 0 {
-		return fmt.Errorf("tag key cannot be empty for %q", s)
-	}
 	t.Value = s[n+1:]
 	return nil
 }
--- a/app/vminsert/opentsdb/parser_test.go
+++ b/app/vminsert/opentsdb/parser_test.go
@@ -9,19 +9,24 @@ func TestRowsUnmarshalFailure(t *testing.T) {
 	f := func(s string) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
 		}

 		// Try again
-		if err := rows.Unmarshal(s); err == nil {
-			t.Fatalf("expecting non-nil error when parsing %q", s)
+		rows.Unmarshal(s)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
 		}
 	}

 	// Missing put prefix
 	f("xx")

+	// Missing metric
+	f("put  111 34")
+
 	// Missing timestamp
 	f("put aaa")

@@ -42,26 +47,19 @@ func TestRowsUnmarshalFailure(t *testing.T) {

 	// Invalid tag
 	f("put aaa 123 4.5 foo")
-	f("put aaa 123 4.5 =")
-	f("put aaa 123 4.5 =foo")
-	f("put aaa 123 4.5 =foo a=b")
 }

 func TestRowsUnmarshalSuccess(t *testing.T) {
 	f := func(s string, rowsExpected *Rows) {
 		t.Helper()
 		var rows Rows
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}

 		// Try unmarshaling again
-		if err := rows.Unmarshal(s); err != nil {
-			t.Fatalf("cannot unmarshal %q: %s", s, err)
-		}
+		rows.Unmarshal(s)
 		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
 			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
 		}
@@ -74,7 +72,9 @@ func TestRowsUnmarshalSuccess(t *testing.T) {

 	// Empty line
 	f("", &Rows{})
+	f("\r", &Rows{})
 	f("\n\n", &Rows{})
+	f("\n\r\n", &Rows{})

 	// Single line
 	f("put foobar 789 -123.456 a=b", &Rows{
@@ -88,17 +88,13 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			}},
 		}},
 	})
-	// Empty tag value
-	f("put foobar 789 -123.456 a= b=c", &Rows{
+	// Empty tag
+	f("put foobar 789 -123.456 a= b=c =d", &Rows{
 		Rows: []Row{{
 			Metric:    "foobar",
 			Value:     -123.456,
 			Timestamp: 789,
 			Tags: []Tag{
-				{
-					Key:   "a",
-					Value: "",
-				},
 				{
 					Key:   "b",
 					Value: "c",
@@ -200,4 +196,27 @@ func TestRowsUnmarshalSuccess(t *testing.T) {
 			},
 		},
 	})
+	// Multi lines with invalid line
+	f("put foo 2 0.3 a=b\naaa bbb\nput bar.baz 43 0.34 a=b\n", &Rows{
+		Rows: []Row{
+			{
+				Metric:    "foo",
+				Value:     0.3,
+				Timestamp: 2,
+				Tags: []Tag{{
+					Key:   "a",
+					Value: "b",
+				}},
+			},
+			{
+				Metric:    "bar.baz",
+				Value:     0.34,
+				Timestamp: 43,
+				Tags: []Tag{{
+					Key:   "a",
+					Value: "b",
+				}},
+			},
+		},
+	})
 }
--- a/app/vminsert/opentsdb/parser_timing_test.go
+++ b/app/vminsert/opentsdb/parser_timing_test.go
@@ -6,18 +6,19 @@ import (
 )

 func BenchmarkRowsUnmarshal(b *testing.B) {
-	s := `cpu.usage_user 1234556768 1.23 a=b
-cpu.usage_system 1234556768 23.344 a=b
-cpu.usage_iowait 1234556769 3.3443 a=b
-cpu.usage_irq 1234556768 0.34432 a=b
+	s := `put cpu.usage_user 1234556768 1.23 a=b
+put cpu.usage_system 1234556768 23.344 a=b
+put cpu.usage_iowait 1234556769 3.3443 a=b
+put cpu.usage_irq 1234556768 0.34432 a=b
 `
 	b.SetBytes(int64(len(s)))
 	b.ReportAllocs()
 	b.RunParallel(func(pb *testing.PB) {
 		var rows Rows
 		for pb.Next() {
-			if err := rows.Unmarshal(s); err != nil {
-				panic(fmt.Errorf("cannot unmarshal %q: %s", s, err))
+			rows.Unmarshal(s)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of parsed rows; got %d; want 4", len(rows.Rows)))
 			}
 		}
 	})
--- a/app/vminsert/opentsdb/request_handler.go
+++ b/app/vminsert/opentsdb/request_handler.go
@@ -85,15 +85,21 @@ func (ctx *pushCtx) Read(r io.Reader) bool {
 			return false
 		}
 	}
-	if err := ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf)); err != nil {
-		opentsdbUnmarshalErrors.Inc()
-		ctx.err = fmt.Errorf("cannot unmarshal OpenTSDB put protocol data with size %d: %s", len(ctx.reqBuf), err)
-		return false
+	ctx.Rows.Unmarshal(bytesutil.ToUnsafeString(ctx.reqBuf))
+
+	// Fill in missing timestamps
+	currentTimestamp := time.Now().Unix()
+	rows := ctx.Rows.Rows
+	for i := range rows {
+		r := &rows[i]
+		if r.Timestamp == 0 {
+			r.Timestamp = currentTimestamp
+		}
 	}

 	// Convert timestamps from seconds to milliseconds
-	for i := range ctx.Rows.Rows {
-		ctx.Rows.Rows[i].Timestamp *= 1e3
+	for i := range rows {
+		rows[i].Timestamp *= 1e3
 	}
 	return true
 }
@@ -125,9 +131,8 @@ func (ctx *pushCtx) reset() {
 }

 var (
-	opentsdbReadCalls       = metrics.NewCounter(`vm_read_calls_total{name="opentsdb"}`)
-	opentsdbReadErrors      = metrics.NewCounter(`vm_read_errors_total{name="opentsdb"}`)
-	opentsdbUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="opentsdb"}`)
+	opentsdbReadCalls  = metrics.NewCounter(`vm_read_calls_total{name="opentsdb"}`)
+	opentsdbReadErrors = metrics.NewCounter(`vm_read_errors_total{name="opentsdb"}`)
 )

 func getPushCtx() *pushCtx {
--- a/app/vminsert/opentsdbhttp/parser.go
+++ b/app/vminsert/opentsdbhttp/parser.go
@@ -0,0 +1,198 @@
+package opentsdbhttp
+
+import (
+	"fmt"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/fastjson"
+	"github.com/valyala/fastjson/fastfloat"
+)
+
+// Rows contains parsed OpenTSDB rows.
+type Rows struct {
+	Rows []Row
+
+	tagsPool []Tag
+}
+
+// Reset resets rs.
+func (rs *Rows) Reset() {
+	// Release references to objects, so they can be GC'ed.
+	for i := range rs.Rows {
+		rs.Rows[i].reset()
+	}
+	rs.Rows = rs.Rows[:0]
+
+	for i := range rs.tagsPool {
+		rs.tagsPool[i].reset()
+	}
+	rs.tagsPool = rs.tagsPool[:0]
+}
+
+// Unmarshal unmarshals OpenTSDB rows from av.
+//
+// See http://opentsdb.net/docs/build/html/api_http/put.html
+//
+// s must be unchanged until rs is in use.
+func (rs *Rows) Unmarshal(av *fastjson.Value) {
+	rs.Rows, rs.tagsPool = unmarshalRows(rs.Rows[:0], av, rs.tagsPool[:0])
+}
+
+// Row is a single OpenTSDB row.
+type Row struct {
+	Metric    string
+	Tags      []Tag
+	Value     float64
+	Timestamp int64
+}
+
+func (r *Row) reset() {
+	r.Metric = ""
+	r.Tags = nil
+	r.Value = 0
+	r.Timestamp = 0
+}
+
+func (r *Row) unmarshal(o *fastjson.Value, tagsPool []Tag) ([]Tag, error) {
+	r.reset()
+	m := o.GetStringBytes("metric")
+	if len(m) == 0 {
+		return tagsPool, fmt.Errorf("missing `metric` in %s", o)
+	}
+	r.Metric = bytesutil.ToUnsafeString(m)
+
+	rawTs := o.Get("timestamp")
+	if rawTs != nil {
+		ts, err := getFloat64(rawTs)
+		if err != nil {
+			return tagsPool, fmt.Errorf("invalid `timestamp` in %s: %s", o, err)
+		}
+		r.Timestamp = int64(ts)
+	} else {
+		// Allow missing timestamp. It is automatically populated
+		// with the current time in this case.
+		r.Timestamp = 0
+	}
+
+	rawV := o.Get("value")
+	if rawV == nil {
+		return tagsPool, fmt.Errorf("missing `value` in %s", o)
+	}
+	v, err := getFloat64(rawV)
+	if err != nil {
+		return tagsPool, fmt.Errorf("invalid `value` in %s: %s", o, err)
+	}
+	r.Value = v
+
+	vt := o.Get("tags")
+	if vt == nil {
+		// Allow empty tags.
+		return tagsPool, nil
+	}
+	rawTags, err := vt.Object()
+	if err != nil {
+		return tagsPool, fmt.Errorf("invalid `tags` in %s: %s", o, err)
+	}
+
+	tagsStart := len(tagsPool)
+	tagsPool, err = unmarshalTags(tagsPool, rawTags)
+	if err != nil {
+		return tagsPool, fmt.Errorf("cannot parse tags %s: %s", rawTags, err)
+	}
+	tags := tagsPool[tagsStart:]
+	r.Tags = tags[:len(tags):len(tags)]
+	return tagsPool, nil
+}
+
+func getFloat64(v *fastjson.Value) (float64, error) {
+	switch v.Type() {
+	case fastjson.TypeNumber:
+		return v.Float64()
+	case fastjson.TypeString:
+		vStr, _ := v.StringBytes()
+		vFloat := fastfloat.ParseBestEffort(bytesutil.ToUnsafeString(vStr))
+		if vFloat == 0 && string(vStr) != "0" && string(vStr) != "0.0" {
+			return 0, fmt.Errorf("invalid float64 value: %q", vStr)
+		}
+		return vFloat, nil
+	default:
+		return 0, fmt.Errorf("value doesn't contain float64; it contains %s", v.Type())
+	}
+}
+
+func unmarshalRows(dst []Row, av *fastjson.Value, tagsPool []Tag) ([]Row, []Tag) {
+	switch av.Type() {
+	case fastjson.TypeObject:
+		return unmarshalRow(dst, av, tagsPool)
+	case fastjson.TypeArray:
+		a, _ := av.Array()
+		for _, o := range a {
+			dst, tagsPool = unmarshalRow(dst, o, tagsPool)
+		}
+		return dst, tagsPool
+	default:
+		logger.Errorf("OpenTSDB JSON must be either object or array; got %s; body=%s", av.Type(), av)
+		invalidLines.Inc()
+		return dst, tagsPool
+	}
+}
+
+func unmarshalRow(dst []Row, o *fastjson.Value, tagsPool []Tag) ([]Row, []Tag) {
+	if cap(dst) > len(dst) {
+		dst = dst[:len(dst)+1]
+	} else {
+		dst = append(dst, Row{})
+	}
+	r := &dst[len(dst)-1]
+	var err error
+	tagsPool, err = r.unmarshal(o, tagsPool)
+	if err != nil {
+		dst = dst[:len(dst)-1]
+		logger.Errorf("cannot unmarshal OpenTSDB object %s: %s", o, err)
+		invalidLines.Inc()
+	}
+	return dst, tagsPool
+}
+
+var invalidLines = metrics.NewCounter(`vm_rows_invalid_total{type="opentsdb-http"}`)
+
+func unmarshalTags(dst []Tag, o *fastjson.Object) ([]Tag, error) {
+	var err error
+	o.Visit(func(k []byte, v *fastjson.Value) {
+		if v.Type() != fastjson.TypeString {
+			err = fmt.Errorf("tag value must be string; got %s; value=%s", v.Type(), v)
+			return
+		}
+		if len(k) == 0 {
+			// Skip empty tags
+			return
+		}
+		vStr, _ := v.StringBytes()
+		if len(vStr) == 0 {
+			// Skip empty tags
+			return
+		}
+		if cap(dst) > len(dst) {
+			dst = dst[:len(dst)+1]
+		} else {
+			dst = append(dst, Tag{})
+		}
+		tag := &dst[len(dst)-1]
+		tag.Key = bytesutil.ToUnsafeString(k)
+		tag.Value = bytesutil.ToUnsafeString(vStr)
+	})
+	return dst, err
+}
+
+// Tag is an OpenTSDB tag.
+type Tag struct {
+	Key   string
+	Value string
+}
+
+func (t *Tag) reset() {
+	t.Key = ""
+	t.Value = ""
+}
--- a/app/vminsert/opentsdbhttp/parser_test.go
+++ b/app/vminsert/opentsdbhttp/parser_test.go
@@ -0,0 +1,246 @@
+package opentsdbhttp
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestRowsUnmarshalFailure(t *testing.T) {
+	f := func(s string) {
+		t.Helper()
+		var rows Rows
+		p := parserPool.Get()
+		defer parserPool.Put(p)
+		v, err := p.Parse(s)
+		if err != nil {
+			// Expected JSON parser error
+			return
+		}
+		// Verify OpenTSDB body parsing error
+		rows.Unmarshal(v)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
+		}
+		// Try again
+		rows.Unmarshal(v)
+		if len(rows.Rows) != 0 {
+			t.Fatalf("unexpected number of rows parsed; got %d; want 0", len(rows.Rows))
+		}
+	}
+
+	// invalid json
+	f("{g")
+
+	// Invalid json type
+	f(`1`)
+	f(`"foo"`)
+	f(`[1,2]`)
+	f(`null`)
+
+	// Incomplete object
+	f(`{}`)
+	f(`{"metric": "aaa"}`)
+	f(`{"metric": "aaa", "timestamp": 1122}`)
+	f(`{"metric": "aaa", "timestamp": "tststs"}`)
+	f(`{"timestamp": 1122, "value": 33}`)
+	f(`{"value": 33}`)
+	f(`{"value": 33, "tags": {"fooo":"bar"}}`)
+
+	// Invalid value
+	f(`{"metric": "aaa", "timestamp": 1122, "value": "0.0.0"}`)
+
+	// Invalid metric type
+	f(`{"metric": "", "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": ["aaa"], "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": {"aaa":1}, "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": 1, "timestamp": 1122, "value": 0.45, "tags": {"foo": "bar"}}`)
+
+	// Invalid timestamp type
+	f(`{"metric": "aaa", "timestamp": "foobar", "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": "aaa", "timestamp": [1,2], "value": 0.45, "tags": {"foo": "bar"}}`)
+	f(`{"metric": "aaa", "timestamp": {"a":1}, "value": 0.45, "tags": {"foo": "bar"}}`)
+
+	// Invalid value type
+	f(`{"metric": "aaa", "timestamp": 1122, "value": [0,1], "tags": {"foo":"bar"}}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": {"a":1}, "tags": {"foo":"bar"}}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": "foobar", "tags": {"foo":"bar"}}`)
+
+	// Invalid tags type
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": 1}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": [1,2]}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": "foo"}`)
+
+	// Invalid tag value type
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": {"foo": ["bar"]}}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": {"foo": {"bar":"baz"}}}`)
+	f(`{"metric": "aaa", "timestamp": 1122, "value": 0.45, "tags": {"foo": 1}}`)
+
+	// Invalid multiline
+	f(`[{"metric": "aaa", "timestamp": 1122, "value": "trt", "tags":{"foo":"bar"}}, {"metric": "aaa", "timestamp": [1122], "value": 111}]`)
+}
+
+func TestRowsUnmarshalSuccess(t *testing.T) {
+	f := func(s string, rowsExpected *Rows) {
+		t.Helper()
+		var rows Rows
+
+		p := parserPool.Get()
+		defer parserPool.Put(p)
+		v, err := p.Parse(s)
+		if err != nil {
+			t.Fatalf("cannot parse json %s: %s", s, err)
+		}
+		rows.Unmarshal(v)
+		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
+			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
+		}
+
+		// Try unmarshaling again
+		rows.Unmarshal(v)
+		if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) {
+			t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows)
+		}
+
+		rows.Reset()
+		if len(rows.Rows) != 0 {
+			t.Fatalf("non-empty rows after reset: %+v", rows.Rows)
+		}
+	}
+
+	// Normal line
+	f(`{"metric": "foobar", "timestamp": 789, "value": -123.456, "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 789,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+	// Timestamp as string
+	f(`{"metric": "foobar", "timestamp": "1789", "value": -123.456, "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 1789,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+	// Timestamp as float64 (it is truncated to integer)
+	f(`{"metric": "foobar", "timestamp": 17.89, "value": -123.456, "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 17,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+	// Empty tags
+	f(`{"metric": "foobar", "timestamp": 789, "value": -123.456, "tags": {}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 789,
+			Tags:      nil,
+		}},
+	})
+	// Missing tags
+	f(`{"metric": "foobar", "timestamp": 789, "value": -123.456}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 789,
+			Tags:      nil,
+		}},
+	})
+	// Empty tag value
+	f(`{"metric": "foobar", "timestamp": 123, "value": -123.456, "tags": {"a":"", "b":"c", "": "d"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -123.456,
+			Timestamp: 123,
+			Tags: []Tag{
+				{
+					Key:   "b",
+					Value: "c",
+				},
+			},
+		}},
+	})
+	// Value as string
+	f(`{"metric": "foobar", "timestamp": 789, "value": "-12.456", "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -12.456,
+			Timestamp: 789,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+	// Missing timestamp
+	f(`{"metric": "foobar", "value": "-12.456", "tags": {"a":"b"}}`, &Rows{
+		Rows: []Row{{
+			Metric:    "foobar",
+			Value:     -12.456,
+			Timestamp: 0,
+			Tags: []Tag{{
+				Key:   "a",
+				Value: "b",
+			}},
+		}},
+	})
+
+	// Multiple tags
+	f(`{"metric": "foo", "value": 1, "timestamp": 2, "tags": {"bar":"baz", "x": "y"}}`, &Rows{
+		Rows: []Row{{
+			Metric: "foo",
+			Tags: []Tag{
+				{
+					Key:   "bar",
+					Value: "baz",
+				},
+				{
+					Key:   "x",
+					Value: "y",
+				},
+			},
+			Value:     1,
+			Timestamp: 2,
+		}},
+	})
+
+	// Multi lines
+	f(`[{"metric": "foo", "value": "0.3", "timestamp": 2, "tags": {"a":"b"}},
+{"metric": "bar.baz", "value": 0.34, "timestamp": 43, "tags": {"a":"b"}}]`, &Rows{
+		Rows: []Row{
+			{
+				Metric:    "foo",
+				Value:     0.3,
+				Timestamp: 2,
+				Tags: []Tag{{
+					Key:   "a",
+					Value: "b",
+				}},
+			},
+			{
+				Metric:    "bar.baz",
+				Value:     0.34,
+				Timestamp: 43,
+				Tags: []Tag{{
+					Key:   "a",
+					Value: "b",
+				}},
+			},
+		},
+	})
+}
--- a/app/vminsert/opentsdbhttp/parser_timing_test.go
+++ b/app/vminsert/opentsdbhttp/parser_timing_test.go
@@ -0,0 +1,33 @@
+package opentsdbhttp
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/valyala/fastjson"
+)
+
+func BenchmarkRowsUnmarshal(b *testing.B) {
+	s := `[{"metric": "cpu.usage_user", "timestamp": 1234556768, "value": 1.23, "tags": {"a":"b", "x": "y"}},
+{"metric": "cpu.usage_system", "timestamp": 1234556768, "value": 23.344, "tags": {"a":"b"}},
+{"metric": "cpu.usage_iowait", "timestamp": 1234556769, "value":3.3443, "tags": {"a":"b"}},
+{"metric": "cpu.usage_irq", "timestamp": 1234556768, "value": 0.34432, "tags": {"a":"b"}}
+]
+`
+	b.SetBytes(int64(len(s)))
+	b.ReportAllocs()
+	b.RunParallel(func(pb *testing.PB) {
+		var rows Rows
+		var p fastjson.Parser
+		for pb.Next() {
+			v, err := p.Parse(s)
+			if err != nil {
+				panic(fmt.Errorf("cannot parse %q: %s", s, err))
+			}
+			rows.Unmarshal(v)
+			if len(rows.Rows) != 4 {
+				panic(fmt.Errorf("unexpected number of rows unmarshaled; got %d; want 4", len(rows.Rows)))
+			}
+		}
+	})
+}
--- a/app/vminsert/opentsdbhttp/request_handler.go
+++ b/app/vminsert/opentsdbhttp/request_handler.go
@@ -0,0 +1,150 @@
+package opentsdbhttp
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"runtime"
+	"sync"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/metrics"
+	"github.com/valyala/fastjson"
+)
+
+var (
+	rowsInserted  = metrics.NewCounter(`vm_rows_inserted_total{type="opentsdb-http"}`)
+	rowsPerInsert = metrics.NewSummary(`vm_rows_per_insert{type="opentsdb-http"}`)
+
+	opentsdbReadCalls       = metrics.NewCounter(`vm_read_calls_total{name="opentsdb-http"}`)
+	opentsdbReadErrors      = metrics.NewCounter(`vm_read_errors_total{name="opentsdb-http"}`)
+	opentsdbUnmarshalErrors = metrics.NewCounter(`vm_unmarshal_errors_total{name="opentsdb-http"}`)
+)
+
+// insertHandler processes HTTP OpenTSDB put requests.
+// See http://opentsdb.net/docs/build/html/api_http/put.html
+func insertHandler(req *http.Request, maxSize int64) error {
+	return concurrencylimiter.Do(func() error {
+		return insertHandlerInternal(req, maxSize)
+	})
+}
+
+func insertHandlerInternal(req *http.Request, maxSize int64) error {
+	opentsdbReadCalls.Inc()
+
+	r := req.Body
+	if req.Header.Get("Content-Encoding") == "gzip" {
+		zr, err := common.GetGzipReader(r)
+		if err != nil {
+			opentsdbReadErrors.Inc()
+			return fmt.Errorf("cannot read gzipped http protocol data: %s", err)
+		}
+		defer common.PutGzipReader(zr)
+		r = zr
+	}
+
+	ctx := getPushCtx()
+	defer putPushCtx(ctx)
+
+	// Read the request in ctx.reqBuf
+	lr := io.LimitReader(r, maxSize+1)
+	reqLen, err := ctx.reqBuf.ReadFrom(lr)
+	if err != nil {
+		opentsdbReadErrors.Inc()
+		return fmt.Errorf("cannot read HTTP OpenTSDB request: %s", err)
+	}
+	if reqLen > maxSize {
+		opentsdbReadErrors.Inc()
+		return fmt.Errorf("too big HTTP OpenTSDB request; mustn't exceed %d bytes", maxSize)
+	}
+
+	// Unmarshal the request to ctx.Rows
+	p := parserPool.Get()
+	defer parserPool.Put(p)
+	v, err := p.ParseBytes(ctx.reqBuf.B)
+	if err != nil {
+		opentsdbUnmarshalErrors.Inc()
+		return fmt.Errorf("cannot parse HTTP OpenTSDB json: %s", err)
+	}
+	ctx.Rows.Unmarshal(v)
+
+	// Fill in missing timestamps
+	currentTimestamp := time.Now().Unix()
+	rows := ctx.Rows.Rows
+	for i := range rows {
+		r := &rows[i]
+		if r.Timestamp == 0 {
+			r.Timestamp = currentTimestamp
+		}
+	}
+
+	// Convert timestamps in seconds to milliseconds if needed.
+	// See http://opentsdb.net/docs/javadoc/net/opentsdb/core/Const.html#SECOND_MASK
+	for i := range rows {
+		r := &rows[i]
+		if r.Timestamp&secondMask == 0 {
+			r.Timestamp *= 1e3
+		}
+	}
+
+	// Insert ctx.Rows to db.
+	ic := &ctx.Common
+	ic.Reset(len(rows))
+	for i := range rows {
+		r := &rows[i]
+		ic.Labels = ic.Labels[:0]
+		ic.AddLabel("", r.Metric)
+		for j := range r.Tags {
+			tag := &r.Tags[j]
+			ic.AddLabel(tag.Key, tag.Value)
+		}
+		ic.WriteDataPoint(nil, ic.Labels, r.Timestamp, r.Value)
+	}
+	rowsInserted.Add(len(rows))
+	rowsPerInsert.Update(float64(len(rows)))
+	return ic.FlushBufs()
+}
+
+const secondMask int64 = 0x7FFFFFFF00000000
+
+var parserPool fastjson.ParserPool
+
+type pushCtx struct {
+	Rows   Rows
+	Common common.InsertCtx
+
+	reqBuf bytesutil.ByteBuffer
+}
+
+func (ctx *pushCtx) reset() {
+	ctx.Rows.Reset()
+	ctx.Common.Reset(0)
+	ctx.reqBuf.Reset()
+}
+
+func getPushCtx() *pushCtx {
+	select {
+	case ctx := <-pushCtxPoolCh:
+		return ctx
+	default:
+		if v := pushCtxPool.Get(); v != nil {
+			return v.(*pushCtx)
+		}
+		return &pushCtx{}
+	}
+}
+
+func putPushCtx(ctx *pushCtx) {
+	ctx.reset()
+	select {
+	case pushCtxPoolCh <- ctx:
+	default:
+		pushCtxPool.Put(ctx)
+	}
+}
+
+var pushCtxPool sync.Pool
+var pushCtxPoolCh = make(chan *pushCtx, runtime.GOMAXPROCS(-1))
--- a/app/vminsert/opentsdbhttp/server.go
+++ b/app/vminsert/opentsdbhttp/server.go
@@ -0,0 +1,70 @@
+package opentsdbhttp
+
+import (
+	"context"
+	"net/http"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
+)
+
+var (
+	writeRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/put", protocol="opentsdb-http"}`)
+	writeErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/put", protocol="opentsdb-http"}`)
+)
+
+var (
+	httpServer     *http.Server
+	httpAddr       string
+	maxRequestSize int64
+)
+
+// Serve starts HTTP OpenTSDB server on the given addr.
+func Serve(addr string, maxReqSize int64) {
+	logger.Infof("starting HTTP OpenTSDB server at %q", addr)
+	httpAddr = addr
+	maxRequestSize = maxReqSize
+	httpServer = &http.Server{
+		Addr:         addr,
+		Handler:      http.HandlerFunc(requestHandler),
+		ReadTimeout:  30 * time.Second,
+		WriteTimeout: 10 * time.Second,
+	}
+	go func() {
+		err := httpServer.ListenAndServe()
+		if err == http.ErrServerClosed {
+			return
+		}
+		if err != nil {
+			logger.Fatalf("FATAL: error serving HTTP OpenTSDB: %s", err)
+		}
+	}()
+}
+
+// requestHandler handles HTTP OpenTSDB insert request.
+func requestHandler(w http.ResponseWriter, r *http.Request) {
+	switch r.URL.Path {
+	case "/api/put":
+		writeRequests.Inc()
+		if err := insertHandler(r, maxRequestSize); err != nil {
+			writeErrors.Inc()
+			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
+			return
+		}
+		w.WriteHeader(http.StatusNoContent)
+	default:
+		httpserver.Errorf(w, "unexpected path requested on HTTP OpenTSDB server: %q", r.URL.Path)
+	}
+}
+
+// Stop stops HTTP OpenTSDB server.
+func Stop() {
+	logger.Infof("stopping HTTP OpenTSDB server at %q...", httpAddr)
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+	if err := httpServer.Shutdown(ctx); err != nil {
+		logger.Fatalf("FATAL: cannot close HTTP OpenTSDB server: %s", err)
+	}
+}
--- a/app/vmselect/main.go
+++ b/app/vmselect/main.go
@@ -2,6 +2,7 @@ package vmselect

 import (
 	"flag"
+	"fmt"
 	"net/http"
 	"runtime"
 	"strings"
@@ -30,29 +31,53 @@ func Init() {
 	fs.RemoveDirContents(tmpDirPath)
 	netstorage.InitTmpBlocksDir(tmpDirPath)
 	promql.InitRollupResultCache(*vmstorage.DataPath + "/cache/rollupResult")
+
 	concurrencyCh = make(chan struct{}, *maxConcurrentRequests)
 }

-var concurrencyCh chan struct{}
-
 // Stop stops vmselect
 func Stop() {
 	promql.StopRollupResultCache()
 }

+var concurrencyCh chan struct{}
+
+var (
+	concurrencyLimitReached = metrics.NewCounter(`vm_concurrent_select_limit_reached_total`)
+	concurrencyLimitTimeout = metrics.NewCounter(`vm_concurrent_select_limit_timeout_total`)
+
+	_ = metrics.NewGauge(`vm_concurrent_select_capacity`, func() float64 {
+		return float64(cap(concurrencyCh))
+	})
+	_ = metrics.NewGauge(`vm_concurrent_select_current`, func() float64 {
+		return float64(len(concurrencyCh))
+	})
+)
+
 // RequestHandler handles remote read API requests for Prometheus
 func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 	// Limit the number of concurrent queries.
-	// Sleep for a while until giving up. This should resolve short bursts in requests.
-	t := timerpool.Get(*maxQueueDuration)
 	select {
 	case concurrencyCh <- struct{}{}:
-		timerpool.Put(t)
 		defer func() { <-concurrencyCh }()
-	case <-t.C:
-		timerpool.Put(t)
-		httpserver.Errorf(w, "cannot handle more than %d concurrent requests", cap(concurrencyCh))
-		return true
+	default:
+		// Sleep for a while until giving up. This should resolve short bursts in requests.
+		concurrencyLimitReached.Inc()
+		t := timerpool.Get(*maxQueueDuration)
+		select {
+		case concurrencyCh <- struct{}{}:
+			timerpool.Put(t)
+			defer func() { <-concurrencyCh }()
+		case <-t.C:
+			timerpool.Put(t)
+			concurrencyLimitTimeout.Inc()
+			err := &httpserver.ErrorWithStatusCode{
+				Err:        fmt.Errorf("cannot handle more than %d concurrent requests", cap(concurrencyCh)),
+				StatusCode: http.StatusServiceUnavailable,
+			}
+			httpserver.Errorf(w, "%s", err)
+			return true
+		}
 	}

 	path := strings.Replace(r.URL.Path, "//", "/", -1)
@@ -165,7 +190,10 @@ func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
 	logger.Errorf("error in %q: %s", r.URL.Path, err)

 	w.Header().Set("Content-Type", "application/json")
-	statusCode := 422
+	statusCode := http.StatusUnprocessableEntity
+	if esc, ok := err.(*httpserver.ErrorWithStatusCode); ok {
+		statusCode = esc.StatusCode
+	}
 	w.WriteHeader(statusCode)
 	prometheus.WriteErrorResponse(w, statusCode, err)
 }
--- a/app/vmselect/netstorage/fadvise_freebsd.go
+++ b/app/vmselect/netstorage/fadvise_freebsd.go
@@ -0,0 +1,15 @@
+package netstorage
+
+import (
+	"os"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"golang.org/x/sys/unix"
+)
+
+func mustFadviseRandomRead(f *os.File) {
+	fd := int(f.Fd())
+	if err := unix.Fadvise(int(fd), 0, 0, unix.FADV_RANDOM|unix.FADV_WILLNEED); err != nil {
+		logger.Panicf("FATAL: error returned from unix.Fadvise(RANDOM|WILLNEED): %s", err)
+	}
+}
--- a/app/vmselect/netstorage/netstorage.go
+++ b/app/vmselect/netstorage/netstorage.go
@@ -19,9 +19,9 @@ import (
 )

 var (
-	maxTagKeysPerSearch   = flag.Int("search.maxTagKeys", 10e3, "The maximum number of tag keys returned per search")
-	maxTagValuesPerSearch = flag.Int("search.maxTagValues", 10e3, "The maximum number of tag values returned per search")
-	maxMetricsPerSearch   = flag.Int("search.maxUniqueTimeseries", 100e3, "The maximum number of unique time series each search can scan")
+	maxTagKeysPerSearch   = flag.Int("search.maxTagKeys", 100e3, "The maximum number of tag keys returned per search")
+	maxTagValuesPerSearch = flag.Int("search.maxTagValues", 100e3, "The maximum number of tag values returned per search")
+	maxMetricsPerSearch   = flag.Int("search.maxUniqueTimeseries", 300e3, "The maximum number of unique time series each search can scan")
 )

 // Result is a single timeseries result.
--- a/app/vmselect/netstorage/tmp_blocks_file.go
+++ b/app/vmselect/netstorage/tmp_blocks_file.go
@@ -1,7 +1,6 @@
 package netstorage

 import (
-	"bufio"
 	"fmt"
 	"io/ioutil"
 	"os"
@@ -10,6 +9,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 )
@@ -30,13 +30,23 @@ func InitTmpBlocksDir(tmpDirPath string) {

 var tmpBlocksDir string

-const maxInmemoryTmpBlocksFile = 512 * 1024
+func maxInmemoryTmpBlocksFile() int {
+	mem := memory.Allowed()
+	maxLen := mem / 1024
+	if maxLen < 64*1024 {
+		return 64 * 1024
+	}
+	return maxLen
+}
+
+var _ = metrics.NewGauge(`vm_tmp_blocks_max_inmemory_file_size_bytes`, func() float64 {
+	return float64(maxInmemoryTmpBlocksFile())
+})

 type tmpBlocksFile struct {
 	buf []byte

-	f  *os.File
-	bw *bufio.Writer
+	f *os.File

 	offset uint64
 }
@@ -44,7 +54,9 @@ type tmpBlocksFile struct {
 func getTmpBlocksFile() *tmpBlocksFile {
 	v := tmpBlocksFilePool.Get()
 	if v == nil {
-		return &tmpBlocksFile{}
+		return &tmpBlocksFile{
+			buf: make([]byte, 0, maxInmemoryTmpBlocksFile()),
+		}
 	}
 	return v.(*tmpBlocksFile)
 }
@@ -53,7 +65,6 @@ func putTmpBlocksFile(tbf *tmpBlocksFile) {
 	tbf.MustClose()
 	tbf.buf = tbf.buf[:0]
 	tbf.f = nil
-	tbf.bw = nil
 	tbf.offset = 0
 	tmpBlocksFilePool.Put(tbf)
 }
@@ -69,22 +80,6 @@ func (addr tmpBlockAddr) String() string {
 	return fmt.Sprintf("offset %d, size %d", addr.offset, addr.size)
 }

-func getBufioWriter(f *os.File) *bufio.Writer {
-	v := bufioWriterPool.Get()
-	if v == nil {
-		return bufio.NewWriterSize(f, maxInmemoryTmpBlocksFile*2)
-	}
-	bw := v.(*bufio.Writer)
-	bw.Reset(f)
-	return bw
-}
-
-func putBufioWriter(bw *bufio.Writer) {
-	bufioWriterPool.Put(bw)
-}
-
-var bufioWriterPool sync.Pool
-
 var tmpBlocksFilesCreated = metrics.NewCounter(`vm_tmp_blocks_files_created_total`)

 // WriteBlock writes b to tbf.
@@ -92,28 +87,31 @@ var tmpBlocksFilesCreated = metrics.NewCounter(`vm_tmp_blocks_files_created_tota
 // It returns errors since the operation may fail on space shortage
 // and this must be handled.
 func (tbf *tmpBlocksFile) WriteBlock(b *storage.Block) (tmpBlockAddr, error) {
+	bb := tmpBufPool.Get()
+	defer tmpBufPool.Put(bb)
+	bb.B = storage.MarshalBlock(bb.B[:0], b)
+
 	var addr tmpBlockAddr
 	addr.offset = tbf.offset
-
-	tbfBufLen := len(tbf.buf)
-	tbf.buf = storage.MarshalBlock(tbf.buf, b)
-	addr.size = len(tbf.buf) - tbfBufLen
+	addr.size = len(bb.B)
 	tbf.offset += uint64(addr.size)
-	if tbf.offset <= maxInmemoryTmpBlocksFile {
+	if len(tbf.buf)+len(bb.B) <= cap(tbf.buf) {
+		// Fast path - the data fits tbf.buf
+		tbf.buf = append(tbf.buf, bb.B...)
 		return addr, nil
 	}

+	// Slow path: flush the data from tbf.buf to file.
 	if tbf.f == nil {
 		f, err := ioutil.TempFile(tmpBlocksDir, "")
 		if err != nil {
 			return addr, err
 		}
 		tbf.f = f
-		tbf.bw = getBufioWriter(f)
 		tmpBlocksFilesCreated.Inc()
 	}
-	_, err := tbf.bw.Write(tbf.buf)
-	tbf.buf = tbf.buf[:0]
+	_, err := tbf.f.Write(tbf.buf)
+	tbf.buf = append(tbf.buf[:0], bb.B...)
 	if err != nil {
 		return addr, fmt.Errorf("cannot write block to %q: %s", tbf.f.Name(), err)
 	}
@@ -124,15 +122,15 @@ func (tbf *tmpBlocksFile) Finalize() error {
 	if tbf.f == nil {
 		return nil
 	}
-
-	err := tbf.bw.Flush()
-	putBufioWriter(tbf.bw)
-	tbf.bw = nil
+	if _, err := tbf.f.Write(tbf.buf); err != nil {
+		return fmt.Errorf("cannot flush the remaining %d bytes to tmpBlocksFile: %s", len(tbf.buf), err)
+	}
+	tbf.buf = tbf.buf[:0]
 	if _, err := tbf.f.Seek(0, 0); err != nil {
 		logger.Panicf("FATAL: cannot seek to the start of file: %s", err)
 	}
 	mustFadviseRandomRead(tbf.f)
-	return err
+	return nil
 }

 func (tbf *tmpBlocksFile) MustReadBlockAt(dst *storage.Block, addr tmpBlockAddr) {
@@ -167,10 +165,6 @@ func (tbf *tmpBlocksFile) MustClose() {
 	if tbf.f == nil {
 		return
 	}
-	if tbf.bw != nil {
-		putBufioWriter(tbf.bw)
-		tbf.bw = nil
-	}
 	fname := tbf.f.Name()

 	// Remove the file at first, then close it.
--- a/app/vmselect/netstorage/tmp_blocks_file_test.go
+++ b/app/vmselect/netstorage/tmp_blocks_file_test.go
@@ -30,7 +30,7 @@ func TestTmpBlocksFileSerial(t *testing.T) {
 }

 func TestTmpBlocksFileConcurrent(t *testing.T) {
-	concurrency := 4
+	concurrency := 3
 	ch := make(chan error, concurrency)
 	for i := 0; i < concurrency; i++ {
 		go func() {
@@ -69,7 +69,7 @@ func testTmpBlocksFile() error {
 		_, _, _ = b.MarshalData(0, 0)
 		return &b
 	}
-	for _, size := range []int{1024, 16 * 1024, maxInmemoryTmpBlocksFile / 2, 2 * maxInmemoryTmpBlocksFile} {
+	for _, size := range []int{1024, 16 * 1024, maxInmemoryTmpBlocksFile() / 2, 2 * maxInmemoryTmpBlocksFile()} {
 		err := func() error {
 			tbf := getTmpBlocksFile()
 			defer putTmpBlocksFile(tbf)
@@ -94,7 +94,7 @@ func testTmpBlocksFile() error {
 			}

 			// Read blocks in parallel and verify them
-			concurrency := 3
+			concurrency := 2
 			workCh := make(chan int)
 			doneCh := make(chan error)
 			for i := 0; i < concurrency; i++ {
--- a/app/vmselect/prometheus/prometheus.go
+++ b/app/vmselect/prometheus/prometheus.go
@@ -557,7 +557,9 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
 	if err := promql.ValidateMaxPointsPerTimeseries(start, end, step); err != nil {
 		return err
 	}
-	start, end = promql.AdjustStartEnd(start, end, step)
+	if mayCache {
+		start, end = promql.AdjustStartEnd(start, end, step)
+	}

 	ec := promql.EvalConfig{
 		Start:    start,
@@ -574,12 +576,47 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
 		result = adjustLastPoints(result)
 	}

+	// Remove NaN values as Prometheus does.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/153
+	removeNaNValuesInplace(result)
+
 	w.Header().Set("Content-Type", "application/json")
 	WriteQueryRangeResponse(w, result)
 	queryRangeDuration.UpdateDuration(startTime)
 	return nil
 }

+func removeNaNValuesInplace(tss []netstorage.Result) {
+	for i := range tss {
+		ts := &tss[i]
+		hasNaNs := false
+		for _, v := range ts.Values {
+			if math.IsNaN(v) {
+				hasNaNs = true
+				break
+			}
+		}
+		if !hasNaNs {
+			// Fast path: nothing to remove.
+			continue
+		}
+
+		// Slow path: remove NaNs.
+		srcTimestamps := ts.Timestamps
+		dstValues := ts.Values[:0]
+		dstTimestamps := ts.Timestamps[:0]
+		for j, v := range ts.Values {
+			if math.IsNaN(v) {
+				continue
+			}
+			dstValues = append(dstValues, v)
+			dstTimestamps = append(dstTimestamps, srcTimestamps[j])
+		}
+		ts.Values = dstValues
+		ts.Timestamps = dstTimestamps
+	}
+}
+
 var queryRangeDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/query_range"}`)

 // adjustLastPoints substitutes the last point values with the previous
--- a/app/vmselect/prometheus/prometheus_test.go
+++ b/app/vmselect/prometheus/prometheus_test.go
@@ -2,11 +2,48 @@ package prometheus

 import (
 	"fmt"
+	"math"
 	"net/http"
 	"net/url"
+	"reflect"
 	"testing"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
 )

+func TestRemoveNaNValuesInplace(t *testing.T) {
+	f := func(tss []netstorage.Result, tssExpected []netstorage.Result) {
+		t.Helper()
+		removeNaNValuesInplace(tss)
+		if !reflect.DeepEqual(tss, tssExpected) {
+			t.Fatalf("unexpected result; got %v; want %v", tss, tssExpected)
+		}
+	}
+
+	nan := math.NaN()
+
+	f(nil, nil)
+	f([]netstorage.Result{
+		{
+			Timestamps: []int64{100, 200, 300},
+			Values:     []float64{1, 2, 3},
+		},
+		{
+			Timestamps: []int64{100, 200, 300, 400},
+			Values:     []float64{nan, nan, 3, nan},
+		},
+	}, []netstorage.Result{
+		{
+			Timestamps: []int64{100, 200, 300},
+			Values:     []float64{1, 2, 3},
+		},
+		{
+			Timestamps: []int64{300},
+			Values:     []float64{3},
+		},
+	})
+}
+
 func TestGetTimeSuccess(t *testing.T) {
 	f := func(s string, timestampExpected int64) {
 		t.Helper()
--- a/app/vmselect/promql/aggr.go
+++ b/app/vmselect/promql/aggr.go
@@ -353,6 +353,25 @@ func aggrFuncCountValues(afa *aggrFuncArg) ([]*timeseries, error) {
 	if err != nil {
 		return nil, err
 	}
+
+	// Remove dstLabel from grouping like Prometheus does.
+	modifier := &afa.ae.Modifier
+	switch strings.ToLower(modifier.Op) {
+	case "without":
+		modifier.Args = append(modifier.Args, dstLabel)
+	case "by":
+		dstArgs := modifier.Args[:0]
+		for _, arg := range modifier.Args {
+			if arg == dstLabel {
+				continue
+			}
+			dstArgs = append(dstArgs, arg)
+		}
+		modifier.Args = dstArgs
+	default:
+		// Do nothing
+	}
+
 	afe := func(tss []*timeseries) []*timeseries {
 		m := make(map[float64]bool)
 		for _, ts := range tss {
--- a/app/vmselect/promql/aggr_incremental_test.go
+++ b/app/vmselect/promql/aggr_incremental_test.go
@@ -179,7 +179,8 @@ func compareValues(vs1, vs2 []float64) error {
 			}
 			continue
 		}
-		if v1 != v2 {
+		eps := math.Abs(v1 - v2)
+		if eps > 1e-14 {
 			return fmt.Errorf("unexpected value; got %v; want %v", v1, v2)
 		}
 	}
--- a/app/vmselect/promql/binary_op.go
+++ b/app/vmselect/promql/binary_op.go
@@ -322,6 +322,7 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 			}
 			src := tssRight[0]
 			for _, ts := range tssLeft {
+				resetMetricGroupIfRequired(be, ts)
 				ts.MetricName.AddMissingTags(joinTags, &src.MetricName)
 				rvsLeft = append(rvsLeft, ts)
 				rvsRight = append(rvsRight, src)
@@ -332,6 +333,7 @@ func adjustBinaryOpTags(be *binaryOpExpr, left, right []*timeseries) ([]*timeser
 			}
 			src := tssLeft[0]
 			for _, ts := range tssRight {
+				resetMetricGroupIfRequired(be, ts)
 				ts.MetricName.AddMissingTags(joinTags, &src.MetricName)
 				rvsLeft = append(rvsLeft, src)
 				rvsRight = append(rvsRight, ts)
@@ -416,10 +418,25 @@ func binaryOpIfnot(left, right float64) float64 {
 }

 func binaryOpEq(left, right float64) bool {
+	// Special handling for nan == nan.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
+	if math.IsNaN(left) {
+		return math.IsNaN(right)
+	}
+
 	return left == right
 }

 func binaryOpNeq(left, right float64) bool {
+	// Special handling for comparison with nan.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150 .
+	if math.IsNaN(left) {
+		return !math.IsNaN(right)
+	}
+	if math.IsNaN(right) {
+		return true
+	}
+
 	return left != right
 }

--- a/app/vmselect/promql/exec.go
+++ b/app/vmselect/promql/exec.go
@@ -105,14 +105,14 @@ func maySortResults(e expr, tss []*timeseries) bool {
 func timeseriesToResult(tss []*timeseries, maySort bool) ([]netstorage.Result, error) {
 	tss = removeNaNs(tss)
 	result := make([]netstorage.Result, len(tss))
-	m := make(map[string]bool)
+	m := make(map[string]struct{}, len(tss))
 	bb := bbPool.Get()
 	for i, ts := range tss {
 		bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
-		if m[string(bb.B)] {
+		if _, ok := m[string(bb.B)]; ok {
 			return nil, fmt.Errorf(`duplicate output timeseries: %s%s`, ts.MetricName.MetricGroup, stringMetricName(&ts.MetricName))
 		}
-		m[string(bb.B)] = true
+		m[string(bb.B)] = struct{}{}

 		rs := &result[i]
 		rs.MetricNameMarshaled = append(rs.MetricNameMarshaled[:0], bb.B...)
--- a/app/vmselect/promql/exec_test.go
+++ b/app/vmselect/promql/exec_test.go
@@ -1302,6 +1302,44 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`label_value()`, func(t *testing.T) {
+		t.Parallel()
+		q := `with (
+			x = (
+				label_set(time(), "foo", "123.456", "__name__", "aaa"),
+				label_set(-time(), "foo", "bar", "__name__", "bbb"),
+				label_set(-time(), "__name__", "bxs"),
+				label_set(-time(), "foo", "45", "bar", "xs"),
+			)
+		)
+		sort(x + label_value(x, "foo"))`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{-955, -1155, -1355, -1555, -1755, -1955},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("bar"),
+				Value: []byte("xs"),
+			},
+			{
+				Key:   []byte("foo"),
+				Value: []byte("45"),
+			},
+		}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1123.456, 1323.456, 1523.456, 1723.456, 1923.456, 2123.456},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("123.456"),
+		}}
+		resultExpected := []netstorage.Result{r1, r2}
+		f(q, resultExpected)
+	})
 	t.Run(`label_transform(mismatch)`, func(t *testing.T) {
 		t.Parallel()
 		q := `label_transform(time(), "__name__", "foobar", "xx")`
@@ -1821,9 +1859,9 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
-	t.Run(`vector * on(foo) group_left() duplicate_timeseries`, func(t *testing.T) {
+	t.Run(`vector * on(foo) group_left() duplicate_nonoverlapping_timeseries`, func(t *testing.T) {
 		t.Parallel()
-		q := `label_set(time()/10, "foo", "bar") + on(foo) group_left() (
+		q := `label_set(time()/10, "foo", "bar", "xx", "yy", "__name__", "qwert") + on(foo) group_left() (
 			label_set(time() < 1400, "foo", "bar", "op", "le"),
 			label_set(time() >= 1400, "foo", "bar", "op", "ge"),
 		)`
@@ -1832,13 +1870,85 @@ func TestExecSuccess(t *testing.T) {
 			Values:     []float64{1100, 1320, 1540, 1760, 1980, 2200},
 			Timestamps: timestampsExpected,
 		}
-		r1.MetricName.Tags = []storage.Tag{{
-			Key:   []byte("foo"),
-			Value: []byte("bar"),
-		}}
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("xx"),
+				Value: []byte("yy"),
+			},
+		}
 		resultExpected := []netstorage.Result{r1}
 		f(q, resultExpected)
 	})
+	t.Run(`vector * on(foo) group_left(__name__)`, func(t *testing.T) {
+		t.Parallel()
+		q := `label_set(time()/10, "foo", "bar", "xx", "yy", "__name__", "qwert") + on(foo) group_left(__name__)
+			label_set(time(), "foo", "bar", "__name__", "aaa")`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1100, 1320, 1540, 1760, 1980, 2200},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.MetricGroup = []byte("aaa")
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("xx"),
+				Value: []byte("yy"),
+			},
+		}
+		resultExpected := []netstorage.Result{r1}
+		f(q, resultExpected)
+	})
+	t.Run(`vector * on(foo) group_right()`, func(t *testing.T) {
+		t.Parallel()
+		q := `sort(label_set(time()/10, "foo", "bar", "xx", "yy", "__name__", "qwert") + on(foo) group_right(xx) (
+			label_set(time(), "foo", "bar", "__name__", "aaa"),
+			label_set(time()+3, "foo", "bar", "__name__", "yyy","ppp", "123"),
+		))`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1100, 1320, 1540, 1760, 1980, 2200},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("xx"),
+				Value: []byte("yy"),
+			},
+		}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1103, 1323, 1543, 1763, 1983, 2203},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("ppp"),
+				Value: []byte("123"),
+			},
+			{
+				Key:   []byte("xx"),
+				Value: []byte("yy"),
+			},
+		}
+		resultExpected := []netstorage.Result{r1, r2}
+		f(q, resultExpected)
+	})
 	t.Run(`vector * on() group_left scalar`, func(t *testing.T) {
 		t.Parallel()
 		q := `sort_desc((label_set(time(), "foo", "bar") or label_set(10, "foo", "qwert")) * on() group_left 2)`
@@ -2160,21 +2270,78 @@ func TestExecSuccess(t *testing.T) {
 	})
 	t.Run(`histogram_quantile(negative-bucket-count)`, func(t *testing.T) {
 		t.Parallel()
-		q := `sort(histogram_quantile(0.6,
+		q := `histogram_quantile(0.6,
 			label_set(90, "foo", "bar", "le", "10")
 			or label_set(-100, "foo", "bar", "le", "30")
 			or label_set(300, "foo", "bar", "le", "+Inf")
-		))`
+		)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{30, 30, 30, 30, 30, 30},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("bar"),
+		}}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`histogram_quantile(nan-bucket-count)`, func(t *testing.T) {
+		t.Parallel()
+		q := `histogram_quantile(0.6,
+			label_set(90, "foo", "bar", "le", "10")
+			or label_set(NaN, "foo", "bar", "le", "30")
+			or label_set(300, "foo", "bar", "le", "+Inf")
+		)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{30, 30, 30, 30, 30, 30},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("bar"),
+		}}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`histogram_quantile(nan-bucket-count)`, func(t *testing.T) {
+		t.Parallel()
+		q := `histogram_quantile(0.2,
+			label_set(0, "foo", "bar", "le", "10")
+			or label_set(100, "foo", "bar", "le", "30")
+			or label_set(300, "foo", "bar", "le", "+Inf")
+		)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{22, 22, 22, 22, 22, 22},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("bar"),
+		}}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`histogram_quantile(zero-bucket-count)`, func(t *testing.T) {
+		t.Parallel()
+		q := `histogram_quantile(0.6,
+			label_set(0, "foo", "bar", "le", "10")
+			or label_set(0, "foo", "bar", "le", "30")
+			or label_set(0, "foo", "bar", "le", "+Inf")
+		)`
 		resultExpected := []netstorage.Result{}
 		f(q, resultExpected)
 	})
 	t.Run(`histogram_quantile(nan-bucket-count)`, func(t *testing.T) {
 		t.Parallel()
-		q := `sort(histogram_quantile(0.6,
-			label_set(90, "foo", "bar", "le", "10")
-			or label_set(NaN, "foo", "bar", "le", "30")
-			or label_set(300, "foo", "bar", "le", "+Inf")
-		))`
+		q := `histogram_quantile(0.6,
+			label_set(nan, "foo", "bar", "le", "10")
+			or label_set(nan, "foo", "bar", "le", "30")
+			or label_set(nan, "foo", "bar", "le", "+Inf")
+		)`
 		resultExpected := []netstorage.Result{}
 		f(q, resultExpected)
 	})
@@ -3723,6 +3890,107 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r1, r2, r3, r4, r5, r6}
 		f(q, resultExpected)
 	})
+	t.Run(`count_values by (xxx)`, func(t *testing.T) {
+		t.Parallel()
+		q := `count_values("xxx", label_set(10, "foo", "bar", "xxx", "aaa") or label_set(floor(time()/600), "foo", "bar", "baz", "xx")) by (xxx)`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1, nan, nan, nan, nan, nan},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("1"),
+			},
+		}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, 1, 1, 1, nan, nan},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("2"),
+			},
+		}
+		r3 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, nan, nan, nan, 1, 1},
+			Timestamps: timestampsExpected,
+		}
+		r3.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("3"),
+			},
+		}
+		r4 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1, 1, 1, 1, 1, 1},
+			Timestamps: timestampsExpected,
+		}
+		r4.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("10"),
+			},
+		}
+		resultExpected := []netstorage.Result{r1, r2, r3, r4}
+		f(q, resultExpected)
+	})
+	t.Run(`count_values without (baz)`, func(t *testing.T) {
+		t.Parallel()
+		q := `count_values("xxx", label_set(floor(time()/600), "foo", "bar")) without (baz)`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1, nan, nan, nan, nan, nan},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("1"),
+			},
+		}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, 1, 1, 1, nan, nan},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("2"),
+			},
+		}
+		r3 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, nan, nan, nan, 1, 1},
+			Timestamps: timestampsExpected,
+		}
+		r3.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+			{
+				Key:   []byte("xxx"),
+				Value: []byte("3"),
+			},
+		}
+		resultExpected := []netstorage.Result{r1, r2, r3}
+		f(q, resultExpected)
+	})
 }

 func TestExecError(t *testing.T) {
--- a/app/vmselect/promql/lexer.go
+++ b/app/vmselect/promql/lexer.go
@@ -149,12 +149,6 @@ func scanString(s string) (string, error) {
 }

 func scanPositiveNumber(s string) (string, error) {
-	if strings.HasPrefix(s, "Inf") {
-		return "Inf", nil
-	}
-	if strings.HasPrefix(s, "NaN") {
-		return "NaN", nil
-	}
 	// Scan integer part. It may be empty if fractional part exists.
 	i := 0
 	for i < len(s) && isDecimalChar(s[i]) {
@@ -333,6 +327,14 @@ func scanTagFilterOpPrefix(s string) int {
 	return -1
 }

+func isInfOrNaN(s string) bool {
+	if len(s) != 3 {
+		return false
+	}
+	s = strings.ToLower(s)
+	return s == "inf" || s == "nan"
+}
+
 func isOffset(s string) bool {
 	s = strings.ToLower(s)
 	return s == "offset"
@@ -361,7 +363,7 @@ func isPositiveNumberPrefix(s string) bool {

 	// Check for .234 numbers
 	if s[0] != '.' || len(s) < 2 {
-		return strings.HasPrefix(s, "Inf") || strings.HasPrefix(s, "NaN")
+		return false
 	}
 	return isDecimalChar(s[1])
 }
--- a/app/vmselect/promql/parser.go
+++ b/app/vmselect/promql/parser.go
@@ -373,7 +373,7 @@ func (p *parser) parseSingleExpr() (expr, error) {
 }

 func (p *parser) parseSingleExprWithoutRollupSuffix() (expr, error) {
-	if isPositiveNumberPrefix(p.lex.Token) {
+	if isPositiveNumberPrefix(p.lex.Token) || isInfOrNaN(p.lex.Token) {
 		return p.parsePositiveNumberExpr()
 	}
 	if isStringPrefix(p.lex.Token) {
@@ -417,7 +417,7 @@ func (p *parser) parseSingleExprWithoutRollupSuffix() (expr, error) {
 }

 func (p *parser) parsePositiveNumberExpr() (*numberExpr, error) {
-	if !isPositiveNumberPrefix(p.lex.Token) {
+	if !isPositiveNumberPrefix(p.lex.Token) && !isInfOrNaN(p.lex.Token) {
 		return nil, fmt.Errorf(`positiveNumberExpr: unexpected token %q; want "number"`, p.lex.Token)
 	}

--- a/app/vmselect/promql/parser_test.go
+++ b/app/vmselect/promql/parser_test.go
@@ -170,14 +170,34 @@ func TestParsePromQLSuccess(t *testing.T) {
 	another(`-.2`, `-0.2`)
 	another(`-.2E-2`, `-0.002`)
 	same(`NaN`)
+	another(`nan`, `NaN`)
+	another(`NAN`, `NaN`)
+	another(`nAN`, `NaN`)
 	another(`Inf`, `+Inf`)
+	another(`INF`, `+Inf`)
+	another(`inf`, `+Inf`)
 	another(`+Inf`, `+Inf`)
 	another(`-Inf`, `-Inf`)
+	another(`-inF`, `-Inf`)

 	// binaryOpExpr
-	another(`NaN + 2 *3 * Inf`, `NaN`)
-	another(`Inf - Inf`, `NaN`)
-	another(`Inf + Inf`, `+Inf`)
+	another(`nan == nan`, `NaN`)
+	another(`nan ==bool nan`, `1`)
+	another(`nan !=bool nan`, `0`)
+	another(`nan !=bool 2`, `1`)
+	another(`2 !=bool nan`, `1`)
+	another(`nan >bool nan`, `0`)
+	another(`nan <bool nan`, `0`)
+	another(`1 ==bool nan`, `0`)
+	another(`NaN !=bool 1`, `1`)
+	another(`inf >=bool 2`, `1`)
+	another(`-1 >bool -inf`, `1`)
+	another(`-1 <bool -inf`, `0`)
+	another(`nan + 2 *3 * inf`, `NaN`)
+	another(`INF - Inf`, `NaN`)
+	another(`Inf + inf`, `+Inf`)
+	another(`1/0`, `+Inf`)
+	another(`0/0`, `NaN`)
 	another(`-m`, `0 - m`)
 	same(`m + ignoring () n[5m]`)
 	another(`M + IGNORING () N[5m]`, `M + ignoring () N[5m]`)
--- a/app/vmselect/promql/rollup.go
+++ b/app/vmselect/promql/rollup.go
@@ -45,6 +45,8 @@ var rollupFuncs = map[string]newRollupFunc{
 	"distinct_over_time": newRollupFuncOneArg(rollupDistinct),
 	"integrate":          newRollupFuncOneArg(rollupIntegrate),
 	"ideriv":             newRollupFuncOneArg(rollupIderiv),
+	"lifetime":           newRollupFuncOneArg(rollupLifetime),
+	"scrape_interval":    newRollupFuncOneArg(rollupScrapeInterval),
 	"rollup":             newRollupFuncOneArg(rollupFake),
 	"rollup_rate":        newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
 	"rollup_deriv":       newRollupFuncOneArg(rollupFake),
@@ -61,6 +63,8 @@ var rollupFuncsMayAdjustWindow = map[string]bool{
 	"deriv_fast":      true,
 	"irate":           true,
 	"rate":            true,
+	"lifetime":        true,
+	"scrape_interval": true,
 }

 var rollupFuncsRemoveCounterResets = map[string]bool{
@@ -193,23 +197,21 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i

 	i := 0
 	j := 0
+	ni := 0
+	nj := 0
 	for _, tEnd := range rc.Timestamps {
 		tStart := tEnd - window
-		n := sort.Search(len(timestamps)-i, func(n int) bool {
-			return timestamps[i+n] > tStart
-		})
-		i += n
+		ni = seekFirstTimestampIdxAfter(timestamps[i:], tStart, ni)
+		i += ni
 		if j < i {
 			j = i
 		}
-		n = sort.Search(len(timestamps)-j, func(n int) bool {
-			return timestamps[j+n] > tEnd
-		})
-		j += n
+		nj = seekFirstTimestampIdxAfter(timestamps[j:], tEnd, nj)
+		j += nj

 		rfa.prevValue = nan
 		rfa.prevTimestamp = tStart - maxPrevInterval
-		if i > 0 && timestamps[i-1] > rfa.prevTimestamp {
+		if i < len(timestamps) && i > 0 && timestamps[i-1] > rfa.prevTimestamp {
 			rfa.prevValue = values[i-1]
 			rfa.prevTimestamp = timestamps[i-1]
 		}
@@ -225,6 +227,46 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
 	return dstValues
 }

+func seekFirstTimestampIdxAfter(timestamps []int64, seekTimestamp int64, nHint int) int {
+	if len(timestamps) == 0 || timestamps[0] > seekTimestamp {
+		return 0
+	}
+	startIdx := nHint - 2
+	if startIdx < 0 {
+		startIdx = 0
+	}
+	if startIdx >= len(timestamps) {
+		startIdx = len(timestamps) - 1
+	}
+	endIdx := nHint + 2
+	if endIdx > len(timestamps) {
+		endIdx = len(timestamps)
+	}
+	if startIdx > 0 && timestamps[startIdx] <= seekTimestamp {
+		timestamps = timestamps[startIdx:]
+		endIdx -= startIdx
+	} else {
+		startIdx = 0
+	}
+	if endIdx < len(timestamps) && timestamps[endIdx] > seekTimestamp {
+		timestamps = timestamps[:endIdx]
+	}
+	if len(timestamps) < 16 {
+		// Fast path: the number of timestamps to search is small, so scan them all.
+		for i, timestamp := range timestamps {
+			if timestamp > seekTimestamp {
+				return startIdx + i
+			}
+		}
+		return startIdx + len(timestamps)
+	}
+	// Slow path: too big len(timestamps), so use binary search.
+	i := sort.Search(len(timestamps), func(n int) bool {
+		return n >= 0 && n < len(timestamps) && timestamps[n] > seekTimestamp
+	})
+	return startIdx + i
+}
+
 func getMaxPrevInterval(timestamps []int64) int64 {
 	if len(timestamps) < 2 {
 		return int64(maxSilenceInterval)
@@ -615,10 +657,15 @@ func rollupDelta(rfa *rollupFuncArg) float64 {
 		if len(values) == 0 {
 			return nan
 		}
+		if len(values) == 1 {
+			// Assume that the previous non-existing value was 0.
+			return values[0]
+		}
 		prevValue = values[0]
 		values = values[1:]
 	}
 	if len(values) == 0 {
+		// Assume that the value didn't change on the given interval.
 		return 0
 	}
 	return values[len(values)-1] - prevValue
@@ -632,6 +679,7 @@ func rollupIdelta(rfa *rollupFuncArg) float64 {
 		if math.IsNaN(rfa.prevValue) {
 			return nan
 		}
+		// Assume that the value didn't change on the given interval.
 		return 0
 	}
 	lastValue := values[len(values)-1]
@@ -639,7 +687,8 @@ func rollupIdelta(rfa *rollupFuncArg) float64 {
 	if len(values) == 0 {
 		prevValue := rfa.prevValue
 		if math.IsNaN(prevValue) {
-			return 0
+			// Assume that the previous non-existing value was 0.
+			return lastValue
 		}
 		return lastValue - prevValue
 	}
@@ -661,7 +710,8 @@ func rollupDerivFast(rfa *rollupFuncArg) float64 {
 	prevValue := rfa.prevValue
 	prevTimestamp := rfa.prevTimestamp
 	if math.IsNaN(prevValue) {
-		if len(values) == 0 {
+		if len(values) < 2 {
+			// It is impossible to calculate derivative on 0 or 1 values.
 			return nan
 		}
 		prevValue = values[0]
@@ -670,6 +720,7 @@ func rollupDerivFast(rfa *rollupFuncArg) float64 {
 		timestamps = timestamps[1:]
 	}
 	if len(values) == 0 {
+		// Assume that the value didn't change on the given interval.
 		return 0
 	}
 	vEnd := values[len(values)-1]
@@ -684,11 +735,12 @@ func rollupIderiv(rfa *rollupFuncArg) float64 {
 	// before calling rollup funcs.
 	values := rfa.values
 	timestamps := rfa.timestamps
-	if len(values) == 0 {
-		if math.IsNaN(rfa.prevValue) {
+	if len(values) < 2 {
+		if len(values) == 0 || math.IsNaN(rfa.prevValue) {
+			// It is impossible to calculate derivative on 0 or 1 values.
 			return nan
 		}
-		return 0
+		return (values[0] - rfa.prevValue) / (float64(timestamps[0]-rfa.prevTimestamp) * 1e-3)
 	}
 	vEnd := values[len(values)-1]
 	tEnd := timestamps[len(timestamps)-1]
@@ -712,7 +764,37 @@ func rollupIderiv(rfa *rollupFuncArg) float64 {
 	}
 	dv := vEnd - vStart
 	dt := tEnd - tStart
-	return dv / (float64(dt) / 1000)
+	return dv / (float64(dt) * 1e-3)
+}
+
+func rollupLifetime(rfa *rollupFuncArg) float64 {
+	// Calculate the duration between the first and the last data points.
+	timestamps := rfa.timestamps
+	if math.IsNaN(rfa.prevValue) {
+		if len(timestamps) < 2 {
+			return nan
+		}
+		return float64(timestamps[len(timestamps)-1]-timestamps[0]) * 1e-3
+	}
+	if len(timestamps) == 0 {
+		return nan
+	}
+	return float64(timestamps[len(timestamps)-1]-rfa.prevTimestamp) * 1e-3
+}
+
+func rollupScrapeInterval(rfa *rollupFuncArg) float64 {
+	// Calculate the average interval between data points.
+	timestamps := rfa.timestamps
+	if math.IsNaN(rfa.prevValue) {
+		if len(timestamps) < 2 {
+			return nan
+		}
+		return float64(timestamps[len(timestamps)-1]-timestamps[0]) * 1e-3 / float64(len(timestamps)-1)
+	}
+	if len(timestamps) == 0 {
+		return nan
+	}
+	return (float64(timestamps[len(timestamps)-1]-rfa.prevTimestamp) * 1e-3) / float64(len(timestamps))
 }

 func rollupChanges(rfa *rollupFuncArg) float64 {
--- a/app/vmselect/promql/rollup_result_cache.go
+++ b/app/vmselect/promql/rollup_result_cache.go
@@ -4,14 +4,15 @@ import (
 	"crypto/rand"
 	"flag"
 	"fmt"
-	"runtime"
 	"sync"
 	"sync/atomic"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
 	"github.com/VictoriaMetrics/fastcache"
 	"github.com/VictoriaMetrics/metrics"
 )
@@ -19,7 +20,7 @@ import (
 var disableCache = flag.Bool("search.disableCache", false, "Whether to disable response caching. This may be useful during data backfilling")

 var rollupResultCacheV = &rollupResultCache{
-	fastcache.New(1024 * 1024), // This is a cache for testing.
+	c: workingsetcache.New(1024*1024, time.Hour), // This is a cache for testing.
 }
 var rollupResultCachePath string

@@ -43,12 +44,13 @@ var (
 func InitRollupResultCache(cachePath string) {
 	rollupResultCachePath = cachePath
 	startTime := time.Now()
-	var c *fastcache.Cache
+	cacheSize := getRollupResultCacheSize()
+	var c *workingsetcache.Cache
 	if len(rollupResultCachePath) > 0 {
 		logger.Infof("loading rollupResult cache from %q...", rollupResultCachePath)
-		c = fastcache.LoadFromFileOrNew(rollupResultCachePath, getRollupResultCacheSize())
+		c = workingsetcache.Load(rollupResultCachePath, cacheSize, time.Hour)
 	} else {
-		c = fastcache.New(getRollupResultCacheSize())
+		c = workingsetcache.New(cacheSize, time.Hour)
 	}
 	if *disableCache {
 		c.Reset()
@@ -96,25 +98,26 @@ func InitRollupResultCache(cachePath string) {
 // StopRollupResultCache closes the rollupResult cache.
 func StopRollupResultCache() {
 	if len(rollupResultCachePath) == 0 {
-		rollupResultCacheV.c.Reset()
+		rollupResultCacheV.c.Stop()
+		rollupResultCacheV.c = nil
 		return
 	}
-	gomaxprocs := runtime.GOMAXPROCS(-1)
 	logger.Infof("saving rollupResult cache to %q...", rollupResultCachePath)
 	startTime := time.Now()
-	if err := rollupResultCacheV.c.SaveToFileConcurrent(rollupResultCachePath, gomaxprocs); err != nil {
+	if err := rollupResultCacheV.c.Save(rollupResultCachePath); err != nil {
 		logger.Errorf("cannot close rollupResult cache at %q: %s", rollupResultCachePath, err)
-	} else {
-		var fcs fastcache.Stats
-		rollupResultCacheV.c.UpdateStats(&fcs)
-		rollupResultCacheV.c.Reset()
-		logger.Infof("saved rollupResult cache to %q in %s; entriesCount: %d, sizeBytes: %d",
-			rollupResultCachePath, time.Since(startTime), fcs.EntriesCount, fcs.BytesSize)
+		return
 	}
+	var fcs fastcache.Stats
+	rollupResultCacheV.c.UpdateStats(&fcs)
+	rollupResultCacheV.c.Stop()
+	rollupResultCacheV.c = nil
+	logger.Infof("saved rollupResult cache to %q in %s; entriesCount: %d, sizeBytes: %d",
+		rollupResultCachePath, time.Since(startTime), fcs.EntriesCount, fcs.BytesSize)
 }

 type rollupResultCache struct {
-	c *fastcache.Cache
+	c *workingsetcache.Cache
 }

 var rollupResultCacheResets = metrics.NewCounter(`vm_cache_resets_total{type="promql/rollupResult"}`)
@@ -148,15 +151,23 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
 		return nil, ec.Start
 	}
 	bb.B = key.Marshal(bb.B[:0])
-	resultBuf := rrc.c.GetBig(nil, bb.B)
-	if len(resultBuf) == 0 {
+	compressedResultBuf := resultBufPool.Get()
+	defer resultBufPool.Put(compressedResultBuf)
+	compressedResultBuf.B = rrc.c.GetBig(compressedResultBuf.B[:0], bb.B)
+	if len(compressedResultBuf.B) == 0 {
 		mi.RemoveKey(key)
 		metainfoBuf = mi.Marshal(metainfoBuf[:0])
 		bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, iafc, window, ec.Step)
 		rrc.c.Set(bb.B, metainfoBuf)
 		return nil, ec.Start
 	}
-	tss, err := unmarshalTimeseriesFast(resultBuf)
+	// Decompress into newly allocated byte slice, since tss returned from unmarshalTimeseriesFast
+	// refers to the byte slice, so it cannot be returned to the resultBufPool.
+	resultBuf, err := encoding.DecompressZSTD(nil, compressedResultBuf.B)
+	if err != nil {
+		logger.Panicf("BUG: cannot decompress resultBuf from rollupResultCache: %s; it looks like it was improperly saved", err)
+	}
+	tss, err = unmarshalTimeseriesFast(resultBuf)
 	if err != nil {
 		logger.Panicf("BUG: cannot unmarshal timeseries from rollupResultCache: %s; it looks like it was improperly saved", err)
 	}
@@ -196,6 +207,8 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
 	return tss, newStart
 }

+var resultBufPool bytesutil.ByteBufferPool
+
 func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExpr, iafc *incrementalAggrFuncContext, window int64, tss []*timeseries) {
 	if *disableCache || len(tss) == 0 || !ec.mayCache() {
 		return
@@ -227,11 +240,16 @@ func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExp

 	// Store tss in the cache.
 	maxMarshaledSize := getRollupResultCacheSize() / 4
-	tssMarshaled := marshalTimeseriesFast(tss, maxMarshaledSize, ec.Step)
-	if tssMarshaled == nil {
+	resultBuf := resultBufPool.Get()
+	defer resultBufPool.Put(resultBuf)
+	resultBuf.B = marshalTimeseriesFast(resultBuf.B[:0], tss, maxMarshaledSize, ec.Step)
+	if len(resultBuf.B) == 0 {
 		tooBigRollupResults.Inc()
 		return
 	}
+	compressedResultBuf := resultBufPool.Get()
+	defer resultBufPool.Put(compressedResultBuf)
+	compressedResultBuf.B = encoding.CompressZSTDLevel(compressedResultBuf.B[:0], resultBuf.B, 1)

 	bb := bbPool.Get()
 	defer bbPool.Put(bb)
@@ -240,7 +258,7 @@ func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExp
 	key.prefix = rollupResultCacheKeyPrefix
 	key.suffix = atomic.AddUint64(&rollupResultCacheKeySuffix, 1)
 	bb.B = key.Marshal(bb.B[:0])
-	rrc.c.SetBig(bb.B, tssMarshaled)
+	rrc.c.SetBig(bb.B, compressedResultBuf.B)

 	bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, iafc, window, ec.Step)
 	metainfoBuf := rrc.c.Get(nil, bb.B)
@@ -270,7 +288,7 @@ var (
 var tooBigRollupResults = metrics.NewCounter("vm_too_big_rollup_results_total")

 // Increment this value every time the format of the cache changes.
-const rollupResultCacheVersion = 5
+const rollupResultCacheVersion = 6

 func marshalRollupResultCacheKey(dst []byte, funcName string, me *metricExpr, iafc *incrementalAggrFuncContext, window, step int64) []byte {
 	dst = append(dst, rollupResultCacheVersion)
--- a/app/vmselect/promql/rollup_test.go
+++ b/app/vmselect/promql/rollup_test.go
@@ -45,8 +45,19 @@ func TestRollupIderivDuplicateTimestamps(t *testing.T) {
 		timestamps: []int64{100},
 	}
 	n = rollupIderiv(rfa)
-	if n != 0 {
-		t.Fatalf("unexpected value; got %v; want %v", n, 0)
+	if !math.IsNaN(n) {
+		t.Fatalf("unexpected value; got %v; want %v", n, nan)
+	}
+
+	rfa = &rollupFuncArg{
+		prevTimestamp: 90,
+		prevValue:     10,
+		values:        []float64{15},
+		timestamps:    []int64{100},
+	}
+	n = rollupIderiv(rfa)
+	if n != 500 {
+		t.Fatalf("unexpected value; got %v; want %v", n, 0.5)
 	}

 	rfa = &rollupFuncArg{
@@ -171,7 +182,8 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, meExpecte
 				t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
 			}
 		} else {
-			if v != vExpected {
+			eps := math.Abs(v - vExpected)
+			if eps > 1e-14 {
 				t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
 			}
 		}
@@ -347,7 +359,7 @@ func TestRollupNoWindowNoPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{2, 0, 0, 0, 0, 0, 0, 0}
+		valuesExpected := []float64{2, 0, 0, 0, nan, nan, nan, nan}
 		timestampsExpected := []int64{120, 124, 128, 132, 136, 140, 144, 148}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -378,7 +390,7 @@ func TestRollupWindowNoPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{34, 34, 34, nan}
+		valuesExpected := []float64{nan, nan, nan, nan}
 		timestampsExpected := []int64{161, 171, 181, 191}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -409,7 +421,7 @@ func TestRollupNoWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{12, 44, 34, 34}
+		valuesExpected := []float64{12, 44, 34, nan}
 		timestampsExpected := []int64{100, 120, 140, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -454,7 +466,7 @@ func TestRollupWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{44, 34, 34, 34}
+		valuesExpected := []float64{44, 34, 34, nan}
 		timestampsExpected := []int64{100, 120, 140, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -468,7 +480,7 @@ func TestRollupWindowPartialPoints(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{nan, 54, 44, 34}
+		valuesExpected := []float64{nan, 54, 44, nan}
 		timestampsExpected := []int64{0, 50, 100, 150}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
@@ -569,10 +581,66 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		}
 		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
 		values := rc.Do(nil, testValues, testTimestamps)
-		valuesExpected := []float64{0, 33, -87, 0}
+		valuesExpected := []float64{123, 33, -87, 0}
 		timestampsExpected := []int64{10, 50, 90, 130}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
+	t.Run("lifetime_1", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupLifetime,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 0,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.031, 0.044, 0.04, 0.01}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("lifetime_2", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupLifetime,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 200,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.031, 0.075, 0.115, 0.125}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("scrape_interval_1", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupScrapeInterval,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 0,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.010333333333333333, 0.011, 0.013333333333333334, 0.01}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
+	t.Run("scrape_interval_2", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupScrapeInterval,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 80,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 0.010333333333333333, 0.010714285714285714, 0.012, 0.0125}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
 	t.Run("changes", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupChanges,
@@ -685,7 +753,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
-	t.Run("distinct", func(t *testing.T) {
+	t.Run("distinct_over_time_1", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupDistinct,
 			Start:  0,
@@ -699,6 +767,20 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
+	t.Run("distinct_over_time_2", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupDistinct,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 80,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 4, 7, 6, 3}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
 }

 func testRowsEqual(t *testing.T, values []float64, timestamps []int64, valuesExpected []float64, timestampsExpected []int64) {
--- a/app/vmselect/promql/timeseries.go
+++ b/app/vmselect/promql/timeseries.go
@@ -76,7 +76,7 @@ func putTimeseries(ts *timeseries) {

 var timeseriesPool sync.Pool

-func marshalTimeseriesFast(tss []*timeseries, maxSize int, step int64) []byte {
+func marshalTimeseriesFast(dst []byte, tss []*timeseries, maxSize int, step int64) []byte {
 	if len(tss) == 0 {
 		logger.Panicf("BUG: tss cannot be empty")
 	}
@@ -92,13 +92,13 @@ func marshalTimeseriesFast(tss []*timeseries, maxSize int, step int64) []byte {

 	if size > maxSize {
 		// Do not marshal tss, since it would occupy too much space
-		return nil
+		return dst
 	}

 	// Allocate the buffer for the marshaled tss before its' marshaling.
 	// This should reduce memory fragmentation and memory usage.
-	dst := make([]byte, 0, size)
-	dst = marshalFastTimestamps(dst, tss[0].Timestamps)
+	dst = bytesutil.Resize(dst, size)
+	dst = marshalFastTimestamps(dst[:0], tss[0].Timestamps)
 	for _, ts := range tss {
 		dst = ts.marshalFastNoTimestamps(dst)
 	}
--- a/app/vmselect/promql/timeseries_test.go
+++ b/app/vmselect/promql/timeseries_test.go
@@ -74,7 +74,7 @@ func TestTimeseriesMarshalUnmarshalFast(t *testing.T) {

 			tssOrig = append(tssOrig, &ts)
 		}
-		buf := marshalTimeseriesFast(tssOrig, 1e6, 123)
+		buf := marshalTimeseriesFast(nil, tssOrig, 1e6, 123)
 		tssGot, err := unmarshalTimeseriesFast(buf)
 		if err != nil {
 			t.Fatalf("error in unmarshalTimeseriesFast: %s", err)
--- a/app/vmselect/promql/transform.go
+++ b/app/vmselect/promql/transform.go
@@ -63,6 +63,7 @@ var transformFuncs = map[string]transformFunc{
 	"label_copy":         transformLabelCopy,
 	"label_move":         transformLabelMove,
 	"label_transform":    transformLabelTransform,
+	"label_value":        transformLabelValue,
 	"union":              transformUnion,
 	"":                   transformUnion, // empty func is a synonim to union
 	"keep_last_value":    transformKeepLastValue,
@@ -308,8 +309,16 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
 	bbPool.Put(bb)

 	// Calculate quantile for each group in m
-	lastNonInf := func(xss []x) float64 {
-		for len(xss) > 0 && math.IsInf(xss[len(xss)-1].le, 0) {
+
+	lastNonInf := func(i int, xss []x) float64 {
+		for len(xss) > 0 {
+			xsLast := xss[len(xss)-1]
+			if xsLast.ts.Values[i] == 0 {
+				return nan
+			}
+			if !math.IsInf(xsLast.le, 0) {
+				break
+			}
 			xss = xss[:len(xss)-1]
 		}
 		if len(xss) == 0 {
@@ -318,27 +327,38 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
 		return xss[len(xss)-1].le
 	}
 	quantile := func(i int, phis []float64, xss []x) float64 {
-		vPrev := float64(0)
-		lePrev := float64(0)
 		phi := phis[i]
 		if math.IsNaN(phi) {
 			return nan
 		}
-		// Verify for broken buckets with NaN or negative values.
+		// Fix broken buckets.
+		// They are already sorted by le, so their values must be in ascending order,
+		// since the next bucket value includes all the previous buckets.
+		vPrev := float64(0)
 		for _, xs := range xss {
 			v := xs.ts.Values[i]
-			if math.IsNaN(v) || v < 0 {
-				// Broken bucket.
-				return nan
+			if math.IsNaN(v) || v < vPrev {
+				xs.ts.Values[i] = vPrev
+			} else {
+				vPrev = v
 			}
 		}
+		if len(xss) == 0 {
+			return nan
+		}
 		if phi < 0 {
 			return -inf
 		}
 		if phi > 1 {
 			return inf
 		}
-		vReq := xss[len(xss)-1].ts.Values[i] * phi
+		vLast := xss[len(xss)-1].ts.Values[i]
+		if vLast == 0 {
+			return nan
+		}
+		vReq := vLast * phi
+		vPrev = 0
+		lePrev := float64(0)
 		for _, xs := range xss {
 			v := xs.ts.Values[i]
 			le := xs.le
@@ -348,16 +368,16 @@ func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
 				continue
 			}
 			if math.IsInf(le, 0) {
-				return lastNonInf(xss)
+				return lastNonInf(i, xss)
 			}
 			if v == vPrev {
 				return lePrev
 			}
 			return lePrev + (le-lePrev)*(vReq-vPrev)/(v-vPrev)
 		}
-		return lastNonInf(xss)
+		return lastNonInf(i, xss)
 	}
-	var rvs []*timeseries
+	rvs := make([]*timeseries, 0, len(m))
 	for _, xss := range m {
 		sort.Slice(xss, func(i, j int) bool {
 			return xss[i].le < xss[j].le
@@ -881,6 +901,33 @@ func labelReplace(tss []*timeseries, srcLabel string, r *regexp.Regexp, dstLabel
 	return tss, nil
 }

+func transformLabelValue(tfa *transformFuncArg) ([]*timeseries, error) {
+	args := tfa.args
+	if err := expectTransformArgsNum(args, 2); err != nil {
+		return nil, err
+	}
+	labelName, err := getString(args[1], 1)
+	if err != nil {
+		return nil, fmt.Errorf("cannot get label name: %s", err)
+	}
+	rvs := args[0]
+	for _, ts := range rvs {
+		ts.MetricName.ResetMetricGroup()
+		labelValue := ts.MetricName.GetTagValue(labelName)
+		v, err := strconv.ParseFloat(string(labelValue), 64)
+		if err != nil {
+			v = nan
+		}
+		values := ts.Values
+		for i := range values {
+			values[i] = v
+		}
+	}
+	// Do not remove timeseries with only NaN values, so `default` could be applied to them:
+	// label_value(q, "label") default 123
+	return rvs, nil
+}
+
 func transformLn(v float64) float64 {
 	return math.Log(v)
 }
--- a/app/vmstorage/main.go
+++ b/app/vmstorage/main.go
@@ -365,6 +365,22 @@ func registerStorageMetrics() {
 		return float64(m().TooSmallTimestampRows)
 	})

+	metrics.NewGauge(`vm_concurrent_addrows_limit_reached_total`, func() float64 {
+		return float64(m().AddRowsConcurrencyLimitReached)
+	})
+	metrics.NewGauge(`vm_concurrent_addrows_limit_timeout_total`, func() float64 {
+		return float64(m().AddRowsConcurrencyLimitTimeout)
+	})
+	metrics.NewGauge(`vm_concurrent_addrows_dropped_rows_total`, func() float64 {
+		return float64(m().AddRowsConcurrencyDroppedRows)
+	})
+	metrics.NewGauge(`vm_concurrent_addrows_capacity`, func() float64 {
+		return float64(m().AddRowsConcurrencyCapacity)
+	})
+	metrics.NewGauge(`vm_concurrent_addrows_current`, func() float64 {
+		return float64(m().AddRowsConcurrencyCurrent)
+	})
+
 	metrics.NewGauge(`vm_rows{type="storage/big"}`, func() float64 {
 		return float64(tm().BigRowsCount)
 	})
--- a/deployment/docker/Makefile
+++ b/deployment/docker/Makefile
@@ -1,5 +1,5 @@
 DOCKER_NAMESPACE := victoriametrics
-BUILDER_IMAGE := local/builder:go1.12.7
+BUILDER_IMAGE := local/builder:go1.13.0
 CERTS_IMAGE := local/certs:1.0.2

 package-certs:
--- a/deployment/docker/builder/Dockerfile
+++ b/deployment/docker/builder/Dockerfile
@@ -1,2 +1,2 @@
-FROM golang:1.12.7
+FROM golang:1.13.0
 STOPSIGNAL SIGINT
--- a/go.mod
+++ b/go.mod
@@ -2,17 +2,17 @@ module github.com/VictoriaMetrics/VictoriaMetrics

 require (
 	github.com/VictoriaMetrics/fastcache v1.5.1
-	github.com/VictoriaMetrics/metrics v1.7.0
+	github.com/VictoriaMetrics/metrics v1.7.1
 	github.com/cespare/xxhash/v2 v2.0.1-0.20190104013014-3767db7a7e18
 	github.com/golang/snappy v0.0.1
 	github.com/google/go-cmp v0.3.0 // indirect
-	github.com/klauspost/compress v1.7.4
+	github.com/klauspost/compress v1.7.6
 	github.com/spaolacci/murmur3 v1.1.0 // indirect
 	github.com/valyala/fastjson v1.4.1
-	github.com/valyala/gozstd v1.5.1
+	github.com/valyala/gozstd v1.6.1
 	github.com/valyala/histogram v1.0.1
-	github.com/valyala/quicktemplate v1.1.1
-	golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7
+	github.com/valyala/quicktemplate v1.2.0
+	golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a
 )

 go 1.12
--- a/go.sum
+++ b/go.sum
@@ -3,8 +3,8 @@ github.com/OneOfOne/xxhash v1.2.5 h1:zl/OfRA6nftbBK9qTohYBJ5xvw6C/oNKizR7cZGl3cI
 github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
 github.com/VictoriaMetrics/fastcache v1.5.1 h1:qHgHjyoNFV7jgucU8QZUuU4gcdhfs8QW1kw68OD2Lag=
 github.com/VictoriaMetrics/fastcache v1.5.1/go.mod h1:+jv9Ckb+za/P1ZRg/sulP5Ni1v49daAVERr0H3CuscE=
-github.com/VictoriaMetrics/metrics v1.7.0 h1:+bdBpPEMOSgOwoQFf4KHqgeAy6xiXn/uzlrKx2YSCT8=
-github.com/VictoriaMetrics/metrics v1.7.0/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ=
+github.com/VictoriaMetrics/metrics v1.7.1 h1:g2qrY6Upn8rvlvR40cGHFY0crwi4hpqF0n9vJMNsCSg=
+github.com/VictoriaMetrics/metrics v1.7.1/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ=
 github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8=
 github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM=
 github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
@@ -20,8 +20,8 @@ github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
 github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
-github.com/klauspost/compress v1.7.4 h1:4UqAIzZ1Ns2epCTyJ1d2xMWvxtX+FNSCYWeOFogK9nc=
-github.com/klauspost/compress v1.7.4/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
+github.com/klauspost/compress v1.7.6 h1:GH2karLOcuZtA5a3+KuzSU33A2cvcHGbtEWM6K4t7oU=
+github.com/klauspost/compress v1.7.6/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
 github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
 github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
@@ -41,13 +41,13 @@ github.com/valyala/fastjson v1.4.1 h1:hrltpHpIpkaxll8QltMU8c3QZ5+qIiCL8yKqPFJI/y
 github.com/valyala/fastjson v1.4.1/go.mod h1:nV6MsjxL2IMJQUoHDIrjEI7oLyeqK6aBD7EFWPsvP8o=
 github.com/valyala/fastrand v1.0.0 h1:LUKT9aKer2dVQNUi3waewTbKV+7H17kvWFNKs2ObdkI=
 github.com/valyala/fastrand v1.0.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
-github.com/valyala/gozstd v1.5.1 h1:ZLepItgu2g+B2CfVQy6KCV/as8lnJ7ef1KU6DPxQSS0=
-github.com/valyala/gozstd v1.5.1/go.mod h1:oYOS+oJovjw9ewtrwEYb9+ybolEXd6pHyLMuAWN5zts=
+github.com/valyala/gozstd v1.6.1 h1:oFN2mNW0kOr1fEKJuLpDwakNb6Y9fElVEBZmPEsFTUw=
+github.com/valyala/gozstd v1.6.1/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ=
 github.com/valyala/histogram v1.0.1 h1:FzA7n2Tz/wKRMejgu3PV1vw3htAklTjjuoI6z3d4KDg=
 github.com/valyala/histogram v1.0.1/go.mod h1:lQy0xA4wUz2+IUnf97SivorsJIp8FxsnRd6x25q7Mto=
-github.com/valyala/quicktemplate v1.1.1 h1:C58y/wN0FMTi2PR0n3onltemfFabany53j7M6SDDB8k=
-github.com/valyala/quicktemplate v1.1.1/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4=
+github.com/valyala/quicktemplate v1.2.0 h1:BaO1nHTkspYzmAjPXj0QiDJxai96tlcZyKcI9dyEGvM=
+github.com/valyala/quicktemplate v1.2.0/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4=
 github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
 golang.org/x/net v0.0.0-20180911220305-26e67e76b6c3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 h1:LepdCS8Gf/MVejFIt8lsiexZATdoGVyp5bcyS+rYoUI=
-golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO9UxmJRx8K0gsfABByQ=
+golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
--- a/lib/filestream/filestream_freebsd.go
+++ b/lib/filestream/filestream_freebsd.go
@@ -0,0 +1,64 @@
+package filestream
+
+import (
+	"fmt"
+	"syscall"
+
+	"golang.org/x/sys/unix"
+)
+
+func (st *streamTracker) adviseDontNeed(n int, fdatasync bool) error {
+	st.length += uint64(n)
+	if st.fd == 0 {
+		return nil
+	}
+	if st.length < dontNeedBlockSize {
+		return nil
+	}
+	blockSize := st.length - (st.length % dontNeedBlockSize)
+	if fdatasync {
+		if err := unixFdatasync(int(st.fd)); err != nil {
+			return fmt.Errorf("unix.Fdatasync error: %s", err)
+		}
+	}
+	if err := unix.Fadvise(int(st.fd), int64(st.offset), int64(blockSize), unix.FADV_DONTNEED); err != nil {
+		return fmt.Errorf("unix.Fadvise(FADV_DONTNEEDED, %d, %d) error: %s", st.offset, blockSize, err)
+	}
+	st.offset += blockSize
+	st.length -= blockSize
+	return nil
+}
+
+func (st *streamTracker) close() error {
+	if st.fd == 0 {
+		return nil
+	}
+	// Advise the whole file as it shouldn't be cached.
+	if err := unix.Fadvise(int(st.fd), 0, 0, unix.FADV_DONTNEED); err != nil {
+		return fmt.Errorf("unix.Fadvise(FADV_DONTNEEDED, 0, 0) error: %s", err)
+	}
+	return nil
+}
+
+// unix.Fdatasync is missing, so put it here
+func unixFdatasync(fd int) (err error) {
+	_, _, e1 := unix.Syscall(unix.SYS_FDATASYNC, uintptr(fd), 0, 0)
+	if e1 != 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func errnoErr(e syscall.Errno) error {
+	switch e {
+	case 0:
+		return nil
+	case unix.EAGAIN:
+		return syscall.EAGAIN
+	case unix.EINVAL:
+		return syscall.EINVAL
+	case unix.ENOENT:
+		return syscall.ENOENT
+	}
+	return e
+}
--- a/lib/fs/dir_remover.go
+++ b/lib/fs/dir_remover.go
@@ -0,0 +1,111 @@
+package fs
+
+import (
+	"os"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
+)
+
+func mustRemoveAll(path string) bool {
+	err := os.RemoveAll(path)
+	if err == nil {
+		// Make sure the parent directory doesn't contain references
+		// to the current directory.
+		mustSyncParentDirIfExists(path)
+		return true
+	}
+	if !isTemporaryNFSError(err) {
+		logger.Panicf("FATAL: cannot remove %q: %s", path, err)
+	}
+	// NFS prevents from removing directories with open files.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 .
+	// Schedule for later directory removal.
+	nfsDirRemoveFailedAttempts.Inc()
+	select {
+	case removeDirCh <- path:
+	default:
+		logger.Panicf("FATAL: cannot schedule %s for removal, since the removal queue is full (%d entries)", path, cap(removeDirCh))
+	}
+	return false
+}
+
+var nfsDirRemoveFailedAttempts = metrics.NewCounter(`vm_nfs_dir_remove_failed_attempts_total`)
+
+var removeDirCh = make(chan string, 1024)
+
+func dirRemover() {
+	const minSleepTime = 100 * time.Millisecond
+	const maxSleepTime = time.Second
+	sleepTime := minSleepTime
+	for {
+		var path string
+		select {
+		case path = <-removeDirCh:
+		default:
+			if atomic.LoadUint64(&stopDirRemover) != 0 {
+				return
+			}
+			time.Sleep(minSleepTime)
+			continue
+		}
+		if mustRemoveAll(path) {
+			sleepTime = minSleepTime
+			continue
+		}
+
+		// Couldn't remove the directory at the path because of NFS lock.
+		// Sleep for a while and try again.
+		// Do not limit the amount of time required for deleting the directory,
+		// since this may break on laggy NFS.
+		// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/162 .
+		time.Sleep(sleepTime)
+		if sleepTime < maxSleepTime {
+			sleepTime *= 2
+		} else {
+			logger.Errorf("failed to remove directory %q due to NFS lock; retrying later", path)
+		}
+	}
+}
+
+func isTemporaryNFSError(err error) bool {
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 for details.
+	errStr := err.Error()
+	return strings.Contains(errStr, "directory not empty") || strings.Contains(errStr, "device or resource busy")
+}
+
+var dirRemoverWG sync.WaitGroup
+var stopDirRemover uint64
+
+func init() {
+	dirRemoverWG.Add(1)
+	go func() {
+		defer dirRemoverWG.Done()
+		dirRemover()
+	}()
+}
+
+// MustStopDirRemover must be called in the end of graceful shutdown
+// in order to wait for removing the remaining directories from removeDirCh.
+//
+// It is expected that nobody calls MustRemoveAll when MustStopDirRemover
+// is called.
+func MustStopDirRemover() {
+	atomic.StoreUint64(&stopDirRemover, 1)
+	doneCh := make(chan struct{})
+	go func() {
+		dirRemoverWG.Wait()
+		close(doneCh)
+	}()
+	const maxWaitTime = 5 * time.Second
+	select {
+	case <-doneCh:
+		return
+	case <-time.After(maxWaitTime):
+		logger.Panicf("FATAL: cannot stop dirRemover in %s", maxWaitTime)
+	}
+}
--- a/lib/fs/fs.go
+++ b/lib/fs/fs.go
@@ -5,12 +5,13 @@ import (
 	"io"
 	"os"
 	"path/filepath"
-	"strings"
-	"time"
+	"regexp"
+	"sync/atomic"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/metrics"
+	"golang.org/x/sys/unix"
 )

 // ReadAtCloser is rand-access read interface.
@@ -87,26 +88,42 @@ func MustSyncPath(path string) {
 	}
 }

-// WriteFile writes data to the given file path.
+var tmpFileNum uint64
+
+// WriteFileAtomically atomically writes data to the given file path.
 //
-// WriteFile returns only after the file is fully written
+// WriteFile returns only after the file is fully written and synced
 // to the underlying storage.
-func WriteFile(path string, data []byte) error {
+func WriteFileAtomically(path string, data []byte) error {
+	// Check for the existing file. It is expected that
+	// the WriteFileAtomically function cannot be called concurrently
+	// with the same `path`.
 	if IsPathExist(path) {
 		return fmt.Errorf("cannot create file %q, since it already exists", path)
 	}
-	f, err := filestream.Create(path, false)
+
+	n := atomic.AddUint64(&tmpFileNum, 1)
+	tmpPath := fmt.Sprintf("%s.tmp.%d", path, n)
+	f, err := filestream.Create(tmpPath, false)
 	if err != nil {
-		return fmt.Errorf("cannot create file %q: %s", path, err)
+		return fmt.Errorf("cannot create file %q: %s", tmpPath, err)
 	}
 	if _, err := f.Write(data); err != nil {
 		f.MustClose()
-		return fmt.Errorf("cannot write %d bytes to file %q: %s", len(data), path, err)
+		MustRemoveAll(tmpPath)
+		return fmt.Errorf("cannot write %d bytes to file %q: %s", len(data), tmpPath, err)
 	}

 	// Sync and close the file.
 	f.MustClose()

+	// Atomically move the file from tmpPath to path.
+	if err := os.Rename(tmpPath, path); err != nil {
+		// do not call MustRemoveAll(tmpPath) here, so the user could inspect
+		// the file contents during investigating the issue.
+		return fmt.Errorf("cannot move %q to %q: %s", tmpPath, path, err)
+	}
+
 	// Sync the containing directory, so the file is guaranteed to appear in the directory.
 	// See https://www.quora.com/When-should-you-fsync-the-containing-directory-in-addition-to-the-file-itself
 	absPath, err := filepath.Abs(path)
@@ -119,6 +136,15 @@ func WriteFile(path string, data []byte) error {
 	return nil
 }

+// IsTemporaryFileName returns true if fn matches temporary file name pattern
+// from WriteFileAtomically.
+func IsTemporaryFileName(fn string) bool {
+	return tmpFileNameRe.MatchString(fn)
+}
+
+// tmpFileNameRe is regexp for temporary file name - see WriteFileAtomically for details.
+var tmpFileNameRe = regexp.MustCompile(`\.tmp\.\d+$`)
+
 // MkdirAllIfNotExist creates the given path dir if it isn't exist.
 func MkdirAllIfNotExist(path string) error {
 	if IsPathExist(path) {
@@ -220,62 +246,7 @@ func mustSyncParentDirIfExists(path string) {
 //
 // It properly handles NFS issue https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 .
 func MustRemoveAll(path string) {
-	err := os.RemoveAll(path)
-	if err == nil {
-		// Make sure the parent directory doesn't contain references
-		// to the current directory.
-		mustSyncParentDirIfExists(path)
-		return
-	}
-	if !isTemporaryNFSError(err) {
-		logger.Panicf("FATAL: cannot remove %q: %s", path, err)
-	}
-	// NFS prevents from removing directories with open files.
-	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 .
-	// Schedule for later directory removal.
-	select {
-	case removeDirCh <- path:
-	default:
-		logger.Panicf("FATAL: cannot schedule %s for removal, since the removal queue is full (%d entries)", path, cap(removeDirCh))
-	}
-}
-
-var removeDirCh = make(chan string, 1024)
-
-func dirRemover() {
-	for path := range removeDirCh {
-		attempts := 0
-		for {
-			err := os.RemoveAll(path)
-			if err == nil {
-				break
-			}
-			if !isTemporaryNFSError(err) {
-				logger.Panicf("FATAL: cannot remove %q: %s", path, err)
-			}
-			// NFS prevents from removing directories with open files.
-			// Sleep for a while and try again in the hope open files will be closed.
-			// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 .
-			attempts++
-			if attempts > 10 {
-				logger.Panicf("FATAL: cannot remove %q in %d attempts: %s", path, attempts, err)
-			}
-			time.Sleep(100 * time.Millisecond)
-		}
-		// Make sure the parent directory doesn't contain references
-		// to the current directory.
-		mustSyncParentDirIfExists(path)
-	}
-}
-
-func isTemporaryNFSError(err error) bool {
-	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 for details.
-	errStr := err.Error()
-	return strings.Contains(errStr, "directory not empty") || strings.Contains(errStr, "device or resource busy")
-}
-
-func init() {
-	go dirRemover()
+	_ = mustRemoveAll(path)
 }

 // HardLinkFiles makes hard links for all the files from srcDir in dstDir.
@@ -358,3 +329,34 @@ func MustWriteData(w io.Writer, data []byte) {
 		logger.Panicf("BUG: writer wrote %d bytes instead of %d bytes", n, len(data))
 	}
 }
+
+// CreateFlockFile creates flock.lock file in the directory dir
+// and returns the handler to the file.
+func CreateFlockFile(dir string) (*os.File, error) {
+	flockFile := dir + "/flock.lock"
+	flockF, err := os.Create(flockFile)
+	if err != nil {
+		return nil, fmt.Errorf("cannot create lock file %q: %s", flockFile, err)
+	}
+	if err := unix.Flock(int(flockF.Fd()), unix.LOCK_EX|unix.LOCK_NB); err != nil {
+		return nil, fmt.Errorf("cannot acquire lock on file %q: %s", flockFile, err)
+	}
+	return flockF, nil
+}
+
+// MustGetFreeSpace returns free space for the given directory path.
+func MustGetFreeSpace(path string) uint64 {
+	d, err := os.Open(path)
+	if err != nil {
+		logger.Panicf("FATAL: cannot determine free disk space on %q: %s", path, err)
+	}
+	defer MustClose(d)
+
+	fd := d.Fd()
+	var stat unix.Statfs_t
+	if err := unix.Fstatfs(int(fd), &stat); err != nil {
+		logger.Panicf("FATAL: cannot determine free disk space on %q: %s", path, err)
+	}
+	freeSpace := uint64(stat.Bavail) * uint64(stat.Bsize)
+	return freeSpace
+}
--- a/lib/fs/fs_test.go
+++ b/lib/fs/fs_test.go
@@ -0,0 +1,24 @@
+package fs
+
+import (
+	"testing"
+)
+
+func TestIsTemporaryFileName(t *testing.T) {
+	f := func(s string, resultExpected bool) {
+		t.Helper()
+		result := IsTemporaryFileName(s)
+		if result != resultExpected {
+			t.Fatalf("unexpected IsTemporaryFileName(%q); got %v; want %v", s, result, resultExpected)
+		}
+	}
+	f("", false)
+	f(".", false)
+	f(".tmp", false)
+	f("tmp.123", false)
+	f(".tmp.123.xx", false)
+	f(".tmp.1", true)
+	f("asdf.dff.tmp.123", true)
+	f("asdf.sdfds.tmp.dfd", false)
+	f("dfd.sdfds.dfds.1232", false)
+}
--- a/lib/httpserver/httpserver.go
+++ b/lib/httpserver/httpserver.go
@@ -423,7 +423,29 @@ var (
 func Errorf(w http.ResponseWriter, format string, args ...interface{}) {
 	errStr := fmt.Sprintf(format, args...)
 	logger.Errorf("%s", errStr)
-	http.Error(w, errStr, http.StatusBadRequest)
+
+	// Extract statusCode from args
+	statusCode := http.StatusBadRequest
+	for _, arg := range args {
+		if esc, ok := arg.(*ErrorWithStatusCode); ok {
+			statusCode = esc.StatusCode
+			break
+		}
+	}
+	http.Error(w, errStr, statusCode)
+}
+
+// ErrorWithStatusCode is error with HTTP status code.
+//
+// The given StatusCode is sent to client when the error is passed to Errorf.
+type ErrorWithStatusCode struct {
+	Err        error
+	StatusCode int
+}
+
+// Error implements error interface.
+func (e *ErrorWithStatusCode) Error() string {
+	return e.Err.Error()
 }

 func isTrivialNetworkError(err error) bool {
--- a/lib/memory/memory.go
+++ b/lib/memory/memory.go
@@ -10,27 +10,41 @@ import (

 var allowedMemPercent = flag.Float64("memory.allowedPercent", 60, "Allowed percent of system memory VictoriaMetrics caches may occupy")

-var allowedMemory int
+var (
+	allowedMemory   int
+	remainingMemory int
+)

 var once sync.Once

+func initOnce() {
+	if !flag.Parsed() {
+		// Do not use logger.Panicf here, since logger may be uninitialized yet.
+		panic(fmt.Errorf("BUG: memory.Allowed must be called only after flag.Parse call"))
+	}
+	if *allowedMemPercent < 10 || *allowedMemPercent > 200 {
+		logger.Panicf("FATAL: -memory.allowedPercent must be in the range [10...200]; got %f", *allowedMemPercent)
+	}
+	percent := *allowedMemPercent / 100
+
+	mem := sysTotalMemory()
+	allowedMemory = int(float64(mem) * percent)
+	remainingMemory = mem - allowedMemory
+	logger.Infof("limiting caches to %d bytes, leaving %d bytes to the OS according to -memory.allowedPercent=%g", allowedMemory, remainingMemory, *allowedMemPercent)
+}
+
 // Allowed returns the amount of system memory allowed to use by the app.
 //
 // The function must be called only after flag.Parse is called.
 func Allowed() int {
-	once.Do(func() {
-		if !flag.Parsed() {
-			// Do not use logger.Panicf here, since logger may be uninitialized yet.
-			panic(fmt.Errorf("BUG: memory.Allowed must be called only after flag.Parse call"))
-		}
-		if *allowedMemPercent < 10 || *allowedMemPercent > 200 {
-			logger.Panicf("FATAL: -memory.allowedPercent must be in the range [10...200]; got %f", *allowedMemPercent)
-		}
-		percent := *allowedMemPercent / 100
-
-		mem := sysTotalMemory()
-		allowedMemory = int(float64(mem) * percent)
-		logger.Infof("limiting caches to %d bytes of RAM according to -memory.allowedPercent=%g", allowedMemory, *allowedMemPercent)
-	})
+	once.Do(initOnce)
 	return allowedMemory
 }
+
+// Remaining returns the amount of memory remaining to the OS.
+//
+// This function must be called only after flag.Parse is called.
+func Remaining() int {
+	once.Do(initOnce)
+	return remainingMemory
+}
--- a/lib/memory/memory_bsd.go
+++ b/lib/memory/memory_bsd.go
@@ -0,0 +1,17 @@
+// +build freebsd openbsd dragonfly netbsd
+
+package memory
+
+import (
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+// This code has been adopted from https://github.com/pbnjay/memory
+
+func sysTotalMemory() int {
+	s, err := sysctlUint64("hw.physmem")
+	if err != nil {
+		logger.Panicf("FATAL: cannot determine system memory: %s", err)
+	}
+	return int(s)
+}
--- a/lib/memory/memory_darwin.go
+++ b/lib/memory/memory_darwin.go
@@ -1,9 +1,6 @@
 package memory

 import (
-	"syscall"
-	"unsafe"
-
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 )

@@ -15,16 +12,3 @@ func sysTotalMemory() int {
 	}
 	return int(s)
 }
-
-func sysctlUint64(name string) (uint64, error) {
-	s, err := syscall.Sysctl(name)
-	if err != nil {
-		return 0, err
-	}
-	// hack because the string conversion above drops a \0
-	b := []byte(s)
-	if len(b) < 8 {
-		b = append(b, 0)
-	}
-	return *(*uint64)(unsafe.Pointer(&b[0])), nil
-}
--- a/lib/memory/sysctl.go
+++ b/lib/memory/sysctl.go
@@ -0,0 +1,22 @@
+// +build darwin freebsd openbsd dragonfly netbsd
+
+package memory
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+// This has been adapted from github.com/pbnjay/memory.
+func sysctlUint64(name string) (uint64, error) {
+	s, err := syscall.Sysctl(name)
+	if err != nil {
+		return 0, err
+	}
+	// hack because the string conversion above drops a \0
+	b := []byte(s)
+	if len(b) < 8 {
+		b = append(b, 0)
+	}
+	return *(*uint64)(unsafe.Pointer(&b[0])), nil
+}
--- a/lib/mergeset/part_header.go
+++ b/lib/mergeset/part_header.go
@@ -164,7 +164,7 @@ func (ph *partHeader) WriteMetadata(partPath string) error {
 		return fmt.Errorf("cannot marshal metadata: %s", err)
 	}
 	metadataPath := partPath + "/metadata.json"
-	if err := fs.WriteFile(metadataPath, metadata); err != nil {
+	if err := fs.WriteFileAtomically(metadataPath, metadata); err != nil {
 		return fmt.Errorf("cannot create %q: %s", metadataPath, err)
 	}
 	return nil
--- a/lib/mergeset/table.go
+++ b/lib/mergeset/table.go
@@ -16,7 +16,6 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/syncwg"
-	"golang.org/x/sys/unix"
 )

 // maxParts is the maximum number of parts in the table.
@@ -60,6 +59,8 @@ const rawItemsFlushInterval = time.Second
 type Table struct {
 	path string

+	flushCallback func()
+
 	partsLock sync.Mutex
 	parts     []*partWrapper

@@ -122,8 +123,11 @@ func (pw *partWrapper) decRef() {

 // OpenTable opens a table on the given path.
 //
+// Optional flushCallback is called every time new data batch is flushed
+// to the underlying storage and becomes visible to search.
+//
 // The table is created if it doesn't exist yet.
-func OpenTable(path string) (*Table, error) {
+func OpenTable(path string, flushCallback func()) (*Table, error) {
 	path = filepath.Clean(path)
 	logger.Infof("opening table %q...", path)
 	startTime := time.Now()
@@ -134,13 +138,9 @@ func OpenTable(path string) (*Table, error) {
 	}

 	// Protect from concurrent opens.
-	flockFile := path + "/flock.lock"
-	flockF, err := os.Create(flockFile)
+	flockF, err := fs.CreateFlockFile(path)
 	if err != nil {
-		return nil, fmt.Errorf("cannot create lock file %q: %s", flockFile, err)
-	}
-	if err := unix.Flock(int(flockF.Fd()), unix.LOCK_EX|unix.LOCK_NB); err != nil {
-		return nil, fmt.Errorf("cannot acquire lock on file %q: %s", flockFile, err)
+		return nil, err
 	}

 	// Open table parts.
@@ -150,11 +150,12 @@ func OpenTable(path string) (*Table, error) {
 	}

 	tb := &Table{
-		path:     path,
-		parts:    pws,
-		mergeIdx: uint64(time.Now().UnixNano()),
-		flockF:   flockF,
-		stopCh:   make(chan struct{}),
+		path:          path,
+		flushCallback: flushCallback,
+		parts:         pws,
+		mergeIdx:      uint64(time.Now().UnixNano()),
+		flockF:        flockF,
+		stopCh:        make(chan struct{}),
 	}
 	tb.startPartMergers()
 	tb.startRawItemsFlusher()
@@ -449,6 +450,9 @@ func (tb *Table) mergeRawItemsBlocks(blocksToMerge []*inmemoryBlock) {
 		if err := tb.mergeParts(pws, nil, true); err != nil {
 			logger.Panicf("FATAL: cannot merge raw parts: %s", err)
 		}
+		if tb.flushCallback != nil {
+			tb.flushCallback()
+		}
 	}

 	for {
@@ -715,7 +719,7 @@ func (tb *Table) mergeParts(pws []*partWrapper, stopCh <-chan struct{}, isOuterP
 	dstPartPath := ph.Path(tb.path, mergeIdx)
 	fmt.Fprintf(&bb, "%s -> %s\n", tmpPartPath, dstPartPath)
 	txnPath := fmt.Sprintf("%s/txn/%016X", tb.path, mergeIdx)
-	if err := fs.WriteFile(txnPath, bb.B); err != nil {
+	if err := fs.WriteFileAtomically(txnPath, bb.B); err != nil {
 		return fmt.Errorf("cannot create transaction file %q: %s", txnPath, err)
 	}

@@ -808,19 +812,7 @@ func (tb *Table) maxOutPartItems() uint64 {
 }

 func (tb *Table) maxOutPartItemsSlow() uint64 {
-	// Determine the amount of free space on tb.path.
-	d, err := os.Open(tb.path)
-	if err != nil {
-		logger.Panicf("FATAL: cannot determine free disk space on %q: %s", tb.path, err)
-	}
-	defer fs.MustClose(d)
-
-	fd := d.Fd()
-	var stat unix.Statfs_t
-	if err := unix.Fstatfs(int(fd), &stat); err != nil {
-		logger.Panicf("FATAL: cannot determine free disk space on %q: %s", tb.path, err)
-	}
-	freeSpace := stat.Bavail * uint64(stat.Bsize)
+	freeSpace := fs.MustGetFreeSpace(tb.path)

 	// Calculate the maximum number of items in the output merge part
 	// by dividing the freeSpace by 4 and by the number of concurrent
@@ -994,7 +986,12 @@ func runTransactions(txnLock *sync.RWMutex, path string) error {
 	})

 	for _, fi := range fis {
-		txnPath := txnDir + "/" + fi.Name()
+		fn := fi.Name()
+		if fs.IsTemporaryFileName(fn) {
+			// Skip temporary files, which could be left after unclean shutdown.
+			continue
+		}
+		txnPath := txnDir + "/" + fn
 		if err := runTransaction(txnLock, path, txnPath); err != nil {
 			return fmt.Errorf("cannot run transaction from %q: %s", txnPath, err)
 		}
--- a/lib/mergeset/table_search_test.go
+++ b/lib/mergeset/table_search_test.go
@@ -5,6 +5,7 @@ import (
 	"math/rand"
 	"os"
 	"sort"
+	"sync/atomic"
 	"testing"
 	"time"
 )
@@ -39,7 +40,7 @@ func TestTableSearchSerial(t *testing.T) {

 	func() {
 		// Re-open the table and verify the search works.
-		tb, err := OpenTable(path)
+		tb, err := OpenTable(path, nil)
 		if err != nil {
 			t.Fatalf("cannot open table: %s", err)
 		}
@@ -74,7 +75,7 @@ func TestTableSearchConcurrent(t *testing.T) {

 	// Re-open the table and verify the search works.
 	func() {
-		tb, err := OpenTable(path)
+		tb, err := OpenTable(path, nil)
 		if err != nil {
 			t.Fatalf("cannot open table: %s", err)
 		}
@@ -146,7 +147,11 @@ func testTableSearchSerial(tb *Table, items []string) error {
 }

 func newTestTable(path string, itemsCount int) (*Table, []string, error) {
-	tb, err := OpenTable(path)
+	var flushes uint64
+	flushCallback := func() {
+		atomic.AddUint64(&flushes, 1)
+	}
+	tb, err := OpenTable(path, flushCallback)
 	if err != nil {
 		return nil, nil, fmt.Errorf("cannot open table: %s", err)
 	}
@@ -159,6 +164,9 @@ func newTestTable(path string, itemsCount int) (*Table, []string, error) {
 		items[i] = item
 	}
 	tb.DebugFlush()
+	if itemsCount > 0 && atomic.LoadUint64(&flushes) == 0 {
+		return nil, nil, fmt.Errorf("unexpeted zero flushes for itemsCount=%d", itemsCount)
+	}

 	sort.Strings(items)
 	return tb, items, nil
--- a/lib/mergeset/table_search_timing_test.go
+++ b/lib/mergeset/table_search_timing_test.go
@@ -32,7 +32,7 @@ func benchmarkTableSearch(b *testing.B, itemsCount int) {

 	// Force finishing pending merges
 	tb.MustClose()
-	tb, err = OpenTable(path)
+	tb, err = OpenTable(path, nil)
 	if err != nil {
 		b.Fatalf("unexpected error when re-opening table %q: %s", path, err)
 	}
--- a/lib/mergeset/table_test.go
+++ b/lib/mergeset/table_test.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"os"
 	"sync"
+	"sync/atomic"
 	"testing"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@@ -20,7 +21,7 @@ func TestTableOpenClose(t *testing.T) {
 	}()

 	// Create a new table
-	tb, err := OpenTable(path)
+	tb, err := OpenTable(path, nil)
 	if err != nil {
 		t.Fatalf("cannot create new table: %s", err)
 	}
@@ -30,7 +31,7 @@ func TestTableOpenClose(t *testing.T) {

 	// Re-open created table multiple times.
 	for i := 0; i < 10; i++ {
-		tb, err := OpenTable(path)
+		tb, err := OpenTable(path, nil)
 		if err != nil {
 			t.Fatalf("cannot open created table: %s", err)
 		}
@@ -44,14 +45,14 @@ func TestTableOpenMultipleTimes(t *testing.T) {
 		_ = os.RemoveAll(path)
 	}()

-	tb1, err := OpenTable(path)
+	tb1, err := OpenTable(path, nil)
 	if err != nil {
 		t.Fatalf("cannot open table: %s", err)
 	}
 	defer tb1.MustClose()

 	for i := 0; i < 10; i++ {
-		tb2, err := OpenTable(path)
+		tb2, err := OpenTable(path, nil)
 		if err == nil {
 			tb2.MustClose()
 			t.Fatalf("expecting non-nil error when opening already opened table")
@@ -68,7 +69,11 @@ func TestTableAddItemSerial(t *testing.T) {
 		_ = os.RemoveAll(path)
 	}()

-	tb, err := OpenTable(path)
+	var flushes uint64
+	flushCallback := func() {
+		atomic.AddUint64(&flushes, 1)
+	}
+	tb, err := OpenTable(path, flushCallback)
 	if err != nil {
 		t.Fatalf("cannot open %q: %s", path, err)
 	}
@@ -78,6 +83,9 @@ func TestTableAddItemSerial(t *testing.T) {

 	// Verify items count after pending items flush.
 	tb.DebugFlush()
+	if atomic.LoadUint64(&flushes) == 0 {
+		t.Fatalf("unexpected zero flushes")
+	}

 	var m TableMetrics
 	tb.UpdateMetrics(&m)
@@ -91,7 +99,7 @@ func TestTableAddItemSerial(t *testing.T) {
 	testReopenTable(t, path, itemsCount)

 	// Add more items in order to verify merge between inmemory parts and file-based parts.
-	tb, err = OpenTable(path)
+	tb, err = OpenTable(path, nil)
 	if err != nil {
 		t.Fatalf("cannot open %q: %s", path, err)
 	}
@@ -124,7 +132,7 @@ func TestTableCreateSnapshotAt(t *testing.T) {
 		_ = os.RemoveAll(path)
 	}()

-	tb, err := OpenTable(path)
+	tb, err := OpenTable(path, nil)
 	if err != nil {
 		t.Fatalf("cannot open %q: %s", path, err)
 	}
@@ -155,13 +163,13 @@ func TestTableCreateSnapshotAt(t *testing.T) {
 	}()

 	// Verify snapshots contain all the data.
-	tb1, err := OpenTable(snapshot1)
+	tb1, err := OpenTable(snapshot1, nil)
 	if err != nil {
 		t.Fatalf("cannot open %q: %s", path, err)
 	}
 	defer tb1.MustClose()

-	tb2, err := OpenTable(snapshot2)
+	tb2, err := OpenTable(snapshot2, nil)
 	if err != nil {
 		t.Fatalf("cannot open %q: %s", path, err)
 	}
@@ -205,7 +213,11 @@ func TestTableAddItemsConcurrent(t *testing.T) {
 		_ = os.RemoveAll(path)
 	}()

-	tb, err := OpenTable(path)
+	var flushes uint64
+	flushCallback := func() {
+		atomic.AddUint64(&flushes, 1)
+	}
+	tb, err := OpenTable(path, flushCallback)
 	if err != nil {
 		t.Fatalf("cannot open %q: %s", path, err)
 	}
@@ -215,6 +227,10 @@ func TestTableAddItemsConcurrent(t *testing.T) {

 	// Verify items count after pending items flush.
 	tb.DebugFlush()
+	if atomic.LoadUint64(&flushes) == 0 {
+		t.Fatalf("unexpected zero flushes")
+	}
+
 	var m TableMetrics
 	tb.UpdateMetrics(&m)
 	if m.ItemsCount != itemsCount {
@@ -227,7 +243,7 @@ func TestTableAddItemsConcurrent(t *testing.T) {
 	testReopenTable(t, path, itemsCount)

 	// Add more items in order to verify merge between inmemory parts and file-based parts.
-	tb, err = OpenTable(path)
+	tb, err = OpenTable(path, nil)
 	if err != nil {
 		t.Fatalf("cannot open %q: %s", path, err)
 	}
@@ -269,7 +285,7 @@ func testReopenTable(t *testing.T, path string, itemsCount int) {
 	t.Helper()

 	for i := 0; i < 10; i++ {
-		tb, err := OpenTable(path)
+		tb, err := OpenTable(path, nil)
 		if err != nil {
 			t.Fatalf("cannot re-open %q: %s", path, err)
 		}
--- a/lib/prompb/remote.pb.go
+++ b/lib/prompb/remote.pb.go
@@ -31,7 +31,7 @@ func (m *WriteRequest) Unmarshal(dAtA []byte) error {
 			}
 			b := dAtA[iNdEx]
 			iNdEx++
-			wire |= (uint64(b) & 0x7F) << shift
+			wire |= uint64(b&0x7F) << shift
 			if b < 0x80 {
 				break
 			}
@@ -118,7 +118,7 @@ func skipRemote(dAtA []byte) (n int, err error) {
 			}
 			b := dAtA[iNdEx]
 			iNdEx++
-			wire |= (uint64(b) & 0x7F) << shift
+			wire |= uint64(b&0x7F) << shift
 			if b < 0x80 {
 				break
 			}
@@ -176,7 +176,7 @@ func skipRemote(dAtA []byte) (n int, err error) {
 					}
 					b := dAtA[iNdEx]
 					iNdEx++
-					innerWire |= (uint64(b) & 0x7F) << shift
+					innerWire |= uint64(b&0x7F) << shift
 					if b < 0x80 {
 						break
 					}
--- a/lib/prompb/types.pb.go
+++ b/lib/prompb/types.pb.go
@@ -43,7 +43,7 @@ func (m *Sample) Unmarshal(dAtA []byte) error {
 			}
 			b := dAtA[iNdEx]
 			iNdEx++
-			wire |= (uint64(b) & 0x7F) << shift
+			wire |= uint64(b&0x7F) << shift
 			if b < 0x80 {
 				break
 			}
@@ -82,7 +82,7 @@ func (m *Sample) Unmarshal(dAtA []byte) error {
 				}
 				b := dAtA[iNdEx]
 				iNdEx++
-				m.Timestamp |= (int64(b) & 0x7F) << shift
+				m.Timestamp |= int64(b&0x7F) << shift
 				if b < 0x80 {
 					break
 				}
@@ -128,7 +128,7 @@ func (m *TimeSeries) Unmarshal(dAtA []byte, dstLabels []Label, dstSamples []Samp
 			}
 			b := dAtA[iNdEx]
 			iNdEx++
-			wire |= (uint64(b) & 0x7F) << shift
+			wire |= uint64(b&0x7F) << shift
 			if b < 0x80 {
 				break
 			}
@@ -255,7 +255,7 @@ func (m *Label) Unmarshal(dAtA []byte) error {
 			}
 			b := dAtA[iNdEx]
 			iNdEx++
-			wire |= (uint64(b) & 0x7F) << shift
+			wire |= uint64(b&0x7F) << shift
 			if b < 0x80 {
 				break
 			}
@@ -283,7 +283,7 @@ func (m *Label) Unmarshal(dAtA []byte) error {
 				}
 				b := dAtA[iNdEx]
 				iNdEx++
-				stringLen |= (uint64(b) & 0x7F) << shift
+				stringLen |= uint64(b&0x7F) << shift
 				if b < 0x80 {
 					break
 				}
@@ -312,7 +312,7 @@ func (m *Label) Unmarshal(dAtA []byte) error {
 				}
 				b := dAtA[iNdEx]
 				iNdEx++
-				stringLen |= (uint64(b) & 0x7F) << shift
+				stringLen |= uint64(b&0x7F) << shift
 				if b < 0x80 {
 					break
 				}
@@ -363,7 +363,7 @@ func skipTypes(dAtA []byte) (n int, err error) {
 			}
 			b := dAtA[iNdEx]
 			iNdEx++
-			wire |= (uint64(b) & 0x7F) << shift
+			wire |= uint64(b&0x7F) << shift
 			if b < 0x80 {
 				break
 			}
@@ -421,7 +421,7 @@ func skipTypes(dAtA []byte) (n int, err error) {
 					}
 					b := dAtA[iNdEx]
 					iNdEx++
-					innerWire |= (uint64(b) & 0x7F) << shift
+					innerWire |= uint64(b&0x7F) << shift
 					if b < 0x80 {
 						break
 					}
--- a/lib/storage/block.go
+++ b/lib/storage/block.go
@@ -205,19 +205,6 @@ func (b *Block) MarshalData(timestampsBlockOffset, valuesBlockOffset uint64) ([]
 	b.bh.ValuesBlockSize = uint32(len(b.valuesData))
 	b.values = b.values[:0]

-	if len(timestamps) > 1 && (b.bh.ValuesMarshalType == encoding.MarshalTypeConst || b.bh.ValuesMarshalType == encoding.MarshalTypeDeltaConst) {
-		// Special case - values are constant or are changed with constant rate.
-		// In this case we may 'cheat' by assuming timestamps are changed
-		// at ideal constant rate. This improves timestamps' compression rate.
-		minTimestamp := timestamps[0]
-		maxTimestamp := timestamps[len(timestamps)-1]
-		delta := (maxTimestamp - minTimestamp) / int64(len(timestamps)-1)
-		ts := minTimestamp
-		for i := 1; i < len(timestamps); i++ {
-			ts += delta
-			timestamps[i] = ts
-		}
-	}
 	b.timestampsData, b.bh.TimestampsMarshalType, b.bh.MinTimestamp = encoding.MarshalTimestamps(b.timestampsData[:0], timestamps, b.bh.PrecisionBits)
 	b.bh.TimestampsBlockOffset = timestampsBlockOffset
 	b.bh.TimestampsBlockSize = uint32(len(b.timestampsData))
--- a/lib/storage/block_header.go
+++ b/lib/storage/block_header.go
@@ -181,6 +181,10 @@ func unmarshalBlockHeaders(dst []blockHeader, src []byte, blockHeadersCount int)
 		logger.Panicf("BUG: blockHeadersCount must be greater than zero; got %d", blockHeadersCount)
 	}
 	dstLen := len(dst)
+	if n := dstLen + blockHeadersCount - cap(dst); n > 0 {
+		dst = append(dst[:cap(dst)], make([]blockHeader, n)...)
+		dst = dst[:dstLen]
+	}
 	var bh blockHeader
 	for len(src) > 0 {
 		tmp, err := bh.Unmarshal(src)
--- a/lib/storage/index_db.go
+++ b/lib/storage/index_db.go
@@ -18,6 +18,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
 	"github.com/VictoriaMetrics/fastcache"
 	xxhash "github.com/cespare/xxhash/v2"
 )
@@ -52,17 +53,17 @@ type indexDB struct {
 	extDBLock sync.Mutex

 	// Cache for fast TagFilters -> TSIDs lookup.
-	tagCache *fastcache.Cache
+	tagCache *workingsetcache.Cache

 	// Cache for fast MetricID -> TSID lookup.
-	metricIDCache *fastcache.Cache
+	metricIDCache *workingsetcache.Cache

 	// Cache for fast MetricID -> MetricName lookup.
-	metricNameCache *fastcache.Cache
+	metricNameCache *workingsetcache.Cache

 	// Cache holding useless TagFilters entries, which have no tag filters
 	// matching low number of metrics.
-	uselessTagFiltersCache *fastcache.Cache
+	uselessTagFiltersCache *workingsetcache.Cache

 	indexSearchPool sync.Pool

@@ -101,7 +102,7 @@ type indexDB struct {
 }

 // openIndexDB opens index db from the given path with the given caches.
-func openIndexDB(path string, metricIDCache, metricNameCache *fastcache.Cache, currHourMetricIDs, prevHourMetricIDs *atomic.Value) (*indexDB, error) {
+func openIndexDB(path string, metricIDCache, metricNameCache *workingsetcache.Cache, currHourMetricIDs, prevHourMetricIDs *atomic.Value) (*indexDB, error) {
 	if metricIDCache == nil {
 		logger.Panicf("BUG: metricIDCache must be non-nil")
 	}
@@ -115,7 +116,7 @@ func openIndexDB(path string, metricIDCache, metricNameCache *fastcache.Cache, c
 		logger.Panicf("BUG: prevHourMetricIDs must be non-nil")
 	}

-	tb, err := mergeset.OpenTable(path)
+	tb, err := mergeset.OpenTable(path, invalidateTagCache)
 	if err != nil {
 		return nil, fmt.Errorf("cannot open indexDB %q: %s", path, err)
 	}
@@ -130,10 +131,10 @@ func openIndexDB(path string, metricIDCache, metricNameCache *fastcache.Cache, c
 		tb:       tb,
 		name:     name,

-		tagCache:               fastcache.New(mem / 32),
+		tagCache:               workingsetcache.New(mem/32, time.Hour),
 		metricIDCache:          metricIDCache,
 		metricNameCache:        metricNameCache,
-		uselessTagFiltersCache: fastcache.New(mem / 128),
+		uselessTagFiltersCache: workingsetcache.New(mem/128, time.Hour),

 		currHourMetricIDs: currHourMetricIDs,
 		prevHourMetricIDs: prevHourMetricIDs,
@@ -273,8 +274,8 @@ func (db *indexDB) decRef() {
 	db.SetExtDB(nil)

 	// Free space occupied by caches owned by db.
-	db.tagCache.Reset()
-	db.uselessTagFiltersCache.Reset()
+	db.tagCache.Stop()
+	db.uselessTagFiltersCache.Stop()

 	db.tagCache = nil
 	db.metricIDCache = nil
@@ -291,20 +292,36 @@ func (db *indexDB) decRef() {
 }

 func (db *indexDB) getFromTagCache(key []byte) ([]TSID, bool) {
-	value := db.tagCache.GetBig(nil, key)
-	if len(value) == 0 {
+	compressedBuf := tagBufPool.Get()
+	defer tagBufPool.Put(compressedBuf)
+	compressedBuf.B = db.tagCache.GetBig(compressedBuf.B[:0], key)
+	if len(compressedBuf.B) == 0 {
 		return nil, false
 	}
-	tsids, err := unmarshalTSIDs(nil, value)
+	buf := tagBufPool.Get()
+	defer tagBufPool.Put(buf)
+	var err error
+	buf.B, err = encoding.DecompressZSTD(buf.B[:0], compressedBuf.B)
+	if err != nil {
+		logger.Panicf("FATAL: cannot decompress tsids from tagCache: %s", err)
+	}
+	tsids, err := unmarshalTSIDs(nil, buf.B)
 	if err != nil {
 		logger.Panicf("FATAL: cannot unmarshal tsids from tagCache: %s", err)
 	}
 	return tsids, true
 }

+var tagBufPool bytesutil.ByteBufferPool
+
 func (db *indexDB) putToTagCache(tsids []TSID, key []byte) {
-	value := marshalTSIDs(nil, tsids)
-	db.tagCache.SetBig(key, value)
+	buf := tagBufPool.Get()
+	buf.B = marshalTSIDs(buf.B[:0], tsids)
+	compressedBuf := tagBufPool.Get()
+	compressedBuf.B = encoding.CompressZSTDLevel(compressedBuf.B[:0], buf.B, 1)
+	tagBufPool.Put(buf)
+	db.tagCache.SetBig(key, compressedBuf.B)
+	tagBufPool.Put(compressedBuf)
 }

 func (db *indexDB) getFromMetricIDCache(dst *TSID, metricID uint64) error {
@@ -388,7 +405,7 @@ func unmarshalTSIDs(dst []TSID, src []byte) ([]TSID, error) {
 	return dst, nil
 }

-func (db *indexDB) invalidateTagCache() {
+func invalidateTagCache() {
 	// This function must be fast, since it is called each
 	// time new timeseries is added.
 	atomic.AddUint64(&tagFiltersKeyGen, 1)
@@ -496,8 +513,8 @@ func (db *indexDB) createTSIDByName(dst *TSID, metricName []byte) error {
 		return fmt.Errorf("cannot create indexes: %s", err)
 	}

-	// Invalidate tag cache, since it doesn't contain tags for the created mn -> TSID mapping.
-	db.invalidateTagCache()
+	// There is no need in invalidating tag cache, since it is invalidated
+	// on db.tb flush via invalidateTagCache flushCallback passed to OpenTable.

 	return nil
 }
@@ -873,7 +890,10 @@ func (db *indexDB) DeleteTSIDs(tfss []*TagFilters) (int, error) {
 	db.updateDeletedMetricIDs(metricIDs)

 	// Reset TagFilters -> TSIDS cache, since it may contain deleted TSIDs.
-	db.invalidateTagCache()
+	invalidateTagCache()
+
+	// Do not reset uselessTagFiltersCache, since the found metricIDs
+	// on cache miss are filtered out later with deletedMetricIDs.

 	// Delete TSIDs in the extDB.
 	if db.doExtDB(func(extDB *indexDB) {
@@ -974,7 +994,8 @@ func (db *indexDB) searchTSIDs(tfss []*TagFilters, tr TimeRange, maxMetrics int)
 		extTSIDs, err = is.searchTSIDs(tfss, tr, maxMetrics)
 		extDB.putIndexSearch(is)

-		db.putToTagCache(tsids, tfKeyExtBuf.B)
+		sort.Slice(extTSIDs, func(i, j int) bool { return extTSIDs[i].Less(&extTSIDs[j]) })
+		extDB.putToTagCache(extTSIDs, tfKeyExtBuf.B)
 	}) {
 		if err != nil {
 			return nil, err
@@ -1218,6 +1239,82 @@ func (is *indexSearch) updateMetricIDsByMetricNameMatch(metricIDs, srcMetricIDs
 	return nil
 }

+func (is *indexSearch) getTagFilterWithMinMetricIDsCountOptimized(tfs *TagFilters, tr TimeRange, maxMetrics int) (*tagFilter, map[uint64]struct{}, error) {
+	// Try fast path with the minimized number of maxMetrics.
+	maxMetricsAdjusted := is.adjustMaxMetricsAdaptive(tr, maxMetrics)
+	minTf, minMetricIDs, err := is.getTagFilterWithMinMetricIDsCountAdaptive(tfs, maxMetricsAdjusted)
+	if err == nil {
+		return minTf, minMetricIDs, nil
+	}
+	if err != errTooManyMetrics {
+		return nil, nil, err
+	}
+
+	// All the tag filters match too many metrics.
+
+	// Slow path: try filtering the matching metrics by time range.
+	// This should work well for cases when old metrics are constantly substituted
+	// by big number of new metrics. For example, prometheus-operator creates many new
+	// metrics for each new deployment.
+	//
+	// Allow fetching up to 20*maxMetrics metrics for the given time range
+	// in the hope these metricIDs will be filtered out by other filters later.
+	maxTimeRangeMetrics := 20 * maxMetrics
+	metricIDsForTimeRange, err := is.getMetricIDsForTimeRange(tr, maxTimeRangeMetrics+1)
+	if err == errMissingMetricIDsForDate {
+		// Slow path: try to select find the tag filter without maxMetrics adjustement.
+		minTf, minMetricIDs, err = is.getTagFilterWithMinMetricIDsCountAdaptive(tfs, maxMetrics)
+		if err == nil {
+			return minTf, minMetricIDs, nil
+		}
+		if err != errTooManyMetrics {
+			return nil, nil, err
+		}
+		return nil, nil, fmt.Errorf("cannot find tag filter matching less than %d time series; "+
+			"either increase -search.maxUniqueTimeseries or use more specific tag filters", maxMetrics)
+	}
+	if err != nil {
+		return nil, nil, err
+	}
+	if len(metricIDsForTimeRange) <= maxTimeRangeMetrics {
+		return nil, metricIDsForTimeRange, nil
+	}
+
+	// Slow path: try to select the tag filter without maxMetrics adjustement.
+	minTf, minMetricIDs, err = is.getTagFilterWithMinMetricIDsCountAdaptive(tfs, maxMetrics)
+	if err == nil {
+		return minTf, minMetricIDs, nil
+	}
+	if err != errTooManyMetrics {
+		return nil, nil, err
+	}
+	return nil, nil, fmt.Errorf("more than %d time series found on the time range %s; either increase -search.maxUniqueTimeseries or shrink the time range",
+		maxMetrics, tr.String())
+}
+
+const maxDaysForDateMetricIDs = 40
+
+func (is *indexSearch) adjustMaxMetricsAdaptive(tr TimeRange, maxMetrics int) int {
+	minDate := uint64(tr.MinTimestamp) / msecPerDay
+	maxDate := uint64(tr.MaxTimestamp) / msecPerDay
+	if maxDate-minDate > maxDaysForDateMetricIDs {
+		// Cannot reduce maxMetrics for the given time range,
+		// since it is expensive extracting metricIDs for the given tr.
+		return maxMetrics
+	}
+	hmPrev := is.db.prevHourMetricIDs.Load().(*hourMetricIDs)
+	if !hmPrev.isFull {
+		return maxMetrics
+	}
+	hourMetrics := len(hmPrev.m)
+	if hourMetrics >= 256 && maxMetrics > hourMetrics/4 {
+		// It is cheaper to filter on the hour or day metrics if the minimum
+		// number of matching metrics across tfs exceeds hourMetrics / 4.
+		return hourMetrics / 4
+	}
+	return maxMetrics
+}
+
 func (is *indexSearch) getTagFilterWithMinMetricIDsCountAdaptive(tfs *TagFilters, maxMetrics int) (*tagFilter, map[uint64]struct{}, error) {
 	kb := &is.kb
 	kb.B = append(kb.B[:0], uselessMultiTagFiltersKeyPrefix)
@@ -1266,29 +1363,6 @@ func (is *indexSearch) getTagFilterWithMinMetricIDsCountAdaptive(tfs *TagFilters

 var errTooManyMetrics = errors.New("all the tag filters match too many metrics")

-const maxDaysForDateMetricIDs = 40
-
-func (is *indexSearch) adjustMaxMetricsAdaptive(tr TimeRange, maxMetrics int) int {
-	minDate := uint64(tr.MinTimestamp) / msecPerDay
-	maxDate := uint64(tr.MaxTimestamp) / msecPerDay
-	if maxDate-minDate > maxDaysForDateMetricIDs {
-		// Cannot reduce maxMetrics for the given time range,
-		// since the it is expensive extracting metricIDs for the given tr.
-		return maxMetrics
-	}
-	hmPrev := is.db.prevHourMetricIDs.Load().(*hourMetricIDs)
-	if !hmPrev.isFull {
-		return maxMetrics
-	}
-	hourMetrics := len(hmPrev.m)
-	if hourMetrics >= 256 && maxMetrics > hourMetrics/4 {
-		// It is cheaper to filter on the hour or day metrics if the minimum
-		// number of matching metrics across tfs exceeds hourMetrics / 4.
-		return hourMetrics / 4
-	}
-	return maxMetrics
-}
-
 func (is *indexSearch) getTagFilterWithMinMetricIDsCount(tfs *TagFilters, maxMetrics int) (*tagFilter, map[uint64]struct{}, error) {
 	var minMetricIDs map[uint64]struct{}
 	var minTf *tagFilter
@@ -1463,37 +1537,9 @@ func (is *indexSearch) updateMetricIDsForTagFilters(metricIDs map[uint64]struct{
 	// Sort tag filters for faster ts.Seek below.
 	sort.Slice(tfs.tfs, func(i, j int) bool { return bytes.Compare(tfs.tfs[i].prefix, tfs.tfs[j].prefix) < 0 })

-	maxMetricsAdjusted := is.adjustMaxMetricsAdaptive(tr, maxMetrics)
-	minTf, minMetricIDs, err := is.getTagFilterWithMinMetricIDsCountAdaptive(tfs, maxMetricsAdjusted)
+	minTf, minMetricIDs, err := is.getTagFilterWithMinMetricIDsCountOptimized(tfs, tr, maxMetrics)
 	if err != nil {
-		if err != errTooManyMetrics {
-			return err
-		}
-
-		// All the tag filters match too many metrics.
-
-		// Slow path: try filtering the matching metrics by time range.
-		// This should work well for cases when old metrics are constantly substituted
-		// by big number of new metrics. For example, prometheus-operator creates many new
-		// metrics for each new deployment.
-		//
-		// Allow fetching up to 20*maxMetrics metrics for the given time range
-		// in the hope these metricIDs will be filtered out by other filters below.
-		maxTimeRangeMetrics := 20 * maxMetrics
-		metricIDsForTimeRange, err := is.getMetricIDsForTimeRange(tr, maxTimeRangeMetrics+1)
-		if err == errMissingMetricIDsForDate {
-			return fmt.Errorf("cannot find tag filter matching less than %d time series; either increase -search.maxUniqueTimeseries or use more specific tag filters",
-				maxMetrics)
-		}
-		if err != nil {
-			return err
-		}
-		if len(metricIDsForTimeRange) > maxTimeRangeMetrics {
-			return fmt.Errorf("more than %d time series found on the time range %s; either increase -search.maxUniqueTimeseries or shrink the time range",
-				maxTimeRangeMetrics, tr.String())
-		}
-		minMetricIDs = metricIDsForTimeRange
-		minTf = nil
+		return err
 	}

 	// Find intersection of minTf with other tfs.
--- a/lib/storage/index_db_test.go
+++ b/lib/storage/index_db_test.go
@@ -12,7 +12,7 @@ import (
 	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
-	"github.com/VictoriaMetrics/fastcache"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
 )

 func TestMarshalUnmarshalTSIDs(t *testing.T) {
@@ -57,10 +57,10 @@ func TestMarshalUnmarshalTSIDs(t *testing.T) {
 }

 func TestIndexDBOpenClose(t *testing.T) {
-	metricIDCache := fastcache.New(1234)
-	metricNameCache := fastcache.New(1234)
-	defer metricIDCache.Reset()
-	defer metricNameCache.Reset()
+	metricIDCache := workingsetcache.New(1234, time.Hour)
+	metricNameCache := workingsetcache.New(1234, time.Hour)
+	defer metricIDCache.Stop()
+	defer metricNameCache.Stop()

 	var hmCurr atomic.Value
 	hmCurr.Store(&hourMetricIDs{})
@@ -85,10 +85,10 @@ func TestIndexDB(t *testing.T) {
 	const metricGroups = 10

 	t.Run("serial", func(t *testing.T) {
-		metricIDCache := fastcache.New(1234)
-		metricNameCache := fastcache.New(1234)
-		defer metricIDCache.Reset()
-		defer metricNameCache.Reset()
+		metricIDCache := workingsetcache.New(1234, time.Hour)
+		metricNameCache := workingsetcache.New(1234, time.Hour)
+		defer metricIDCache.Stop()
+		defer metricNameCache.Stop()

 		var hmCurr atomic.Value
 		hmCurr.Store(&hourMetricIDs{})
@@ -142,10 +142,10 @@ func TestIndexDB(t *testing.T) {
 	})

 	t.Run("concurrent", func(t *testing.T) {
-		metricIDCache := fastcache.New(1234)
-		metricNameCache := fastcache.New(1234)
-		defer metricIDCache.Reset()
-		defer metricNameCache.Reset()
+		metricIDCache := workingsetcache.New(1234, time.Hour)
+		metricNameCache := workingsetcache.New(1234, time.Hour)
+		defer metricIDCache.Stop()
+		defer metricNameCache.Stop()

 		var hmCurr atomic.Value
 		hmCurr.Store(&hourMetricIDs{})
--- a/lib/storage/index_db_timing_test.go
+++ b/lib/storage/index_db_timing_test.go
@@ -3,20 +3,50 @@ package storage
 import (
 	"fmt"
 	"os"
+	"regexp"
 	"strconv"
 	"sync/atomic"
 	"testing"
+	"time"

-	"github.com/VictoriaMetrics/fastcache"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
 )

+func BenchmarkRegexpFilterMatch(b *testing.B) {
+	b.ReportAllocs()
+	b.RunParallel(func(pb *testing.PB) {
+		re := regexp.MustCompile(`.*foo-bar-baz.*`)
+		b := []byte("fdsffd foo-bar-baz assd fdsfad dasf dsa")
+		for pb.Next() {
+			if !re.Match(b) {
+				panic("BUG: regexp must match!")
+			}
+			b[0]++
+		}
+	})
+}
+
+func BenchmarkRegexpFilterMismatch(b *testing.B) {
+	b.ReportAllocs()
+	b.RunParallel(func(pb *testing.PB) {
+		re := regexp.MustCompile(`.*foo-bar-baz.*`)
+		b := []byte("fdsffd foo-bar sfddsf assd nmn,mfdsdsakj")
+		for pb.Next() {
+			if re.Match(b) {
+				panic("BUG: regexp mustn't match!")
+			}
+			b[0]++
+		}
+	})
+}
+
 func BenchmarkIndexDBAddTSIDs(b *testing.B) {
 	const recordsPerLoop = 1e3

-	metricIDCache := fastcache.New(1234)
-	metricNameCache := fastcache.New(1234)
-	defer metricIDCache.Reset()
-	defer metricNameCache.Reset()
+	metricIDCache := workingsetcache.New(1234, time.Hour)
+	metricNameCache := workingsetcache.New(1234, time.Hour)
+	defer metricIDCache.Stop()
+	defer metricNameCache.Stop()

 	var hmCurr atomic.Value
 	hmCurr.Store(&hourMetricIDs{})
@@ -78,86 +108,11 @@ func benchmarkIndexDBAddTSIDs(db *indexDB, tsid *TSID, mn *MetricName, startOffs
 	}
 }

-func BenchmarkIndexDBSearchTSIDs(b *testing.B) {
-	metricIDCache := fastcache.New(1234)
-	metricNameCache := fastcache.New(1234)
-	defer metricIDCache.Reset()
-	defer metricNameCache.Reset()
-
-	var hmCurr atomic.Value
-	hmCurr.Store(&hourMetricIDs{})
-	var hmPrev atomic.Value
-	hmPrev.Store(&hourMetricIDs{})
-
-	const dbName = "bench-index-db-search-tsids"
-	db, err := openIndexDB(dbName, metricIDCache, metricNameCache, &hmCurr, &hmPrev)
-	if err != nil {
-		b.Fatalf("cannot open indexDB: %s", err)
-	}
-	defer func() {
-		db.MustClose()
-		if err := os.RemoveAll(dbName); err != nil {
-			b.Fatalf("cannot remove indexDB: %s", err)
-		}
-	}()
-
-	const recordsCount = 1e5
-
-	// Fill the db with recordsCount records.
-	var mn MetricName
-	mn.MetricGroup = []byte("rps")
-	for i := 0; i < 2; i++ {
-		key := fmt.Sprintf("key_%d", i)
-		value := fmt.Sprintf("value_%d", i)
-		mn.AddTag(key, value)
-	}
-	var tsid TSID
-	var metricName []byte
-	is := db.getIndexSearch()
-	defer db.putIndexSearch(is)
-	for i := 0; i < recordsCount; i++ {
-		mn.sortTags()
-		metricName = mn.Marshal(metricName[:0])
-		if err := is.GetOrCreateTSIDByName(&tsid, metricName); err != nil {
-			b.Fatalf("cannot insert record: %s", err)
-		}
-	}
-
-	b.SetBytes(1)
-	b.ReportAllocs()
-	b.ResetTimer()
-	b.RunParallel(func(pb *testing.PB) {
-		tags := []Tag{
-			{[]byte("key_0"), []byte("value_0")},
-			{[]byte("key_1"), []byte("value_1")},
-		}
-		var tfs TagFilters
-		tfss := []*TagFilters{&tfs}
-		i := 0
-		for pb.Next() {
-			tfs.Reset()
-			for j := range tags {
-				if err := tfs.Add(tags[j].Key, tags[j].Value, false, false); err != nil {
-					panic(fmt.Errorf("BUG: unexpected error: %s", err))
-				}
-			}
-			tsids, err := db.searchTSIDs(tfss, TimeRange{}, 1e5)
-			if err != nil {
-				panic(fmt.Errorf("unexpected error in search for tfs=%s: %s", &tfs, err))
-			}
-			if len(tsids) == 0 && i < recordsCount {
-				panic(fmt.Errorf("zero tsids found for tfs=%s", &tfs))
-			}
-			i++
-		}
-	})
-}
-
 func BenchmarkIndexDBGetTSIDs(b *testing.B) {
-	metricIDCache := fastcache.New(1234)
-	metricNameCache := fastcache.New(1234)
-	defer metricIDCache.Reset()
-	defer metricNameCache.Reset()
+	metricIDCache := workingsetcache.New(1234, time.Hour)
+	metricNameCache := workingsetcache.New(1234, time.Hour)
+	defer metricIDCache.Stop()
+	defer metricNameCache.Stop()

 	var hmCurr atomic.Value
 	hmCurr.Store(&hourMetricIDs{})
--- a/lib/storage/metric_name.go
+++ b/lib/storage/metric_name.go
@@ -9,6 +9,7 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
 )

@@ -366,17 +367,8 @@ func (mn *MetricName) Unmarshal(src []byte) error {
 		}
 	}

-	// Verify no identical tag keys.
-	if len(mn.Tags) > 0 {
-		prevKey := mn.Tags[0].Key
-		for i := range mn.Tags[1:] {
-			t := &mn.Tags[1+i]
-			if bytes.Equal(t.Key, prevKey) {
-				return fmt.Errorf("found duplicate key %q", prevKey)
-			}
-			prevKey = t.Key
-		}
-	}
+	// There is no need in verifying for identical tag keys,
+	// since they must be handled in MetricName.Marshal inside marshalTags.

 	return nil
 }
@@ -392,9 +384,18 @@ const maxLabelNameLen = 256
 const maxLabelValueLen = 16 * 1024

 // The maximum number of labels per each timeseries.
+var maxLabelsPerTimeseries = 30
+
+// SetMaxLabelsPerTimeseries sets the limit on the number of labels
+// per each time series.
 //
-// Superflouos lables are dropped.
-const maxLabelsPerTimeseries = 30
+// Superfouos labels are dropped.
+func SetMaxLabelsPerTimeseries(maxLabels int) {
+	if maxLabels <= 0 {
+		logger.Panicf("BUG: maxLabels must be positive; got %d", maxLabels)
+	}
+	maxLabelsPerTimeseries = maxLabels
+}

 // MarshalMetricNameRaw marshals labels to dst and returns the result.
 //
@@ -574,8 +575,15 @@ func (ts *canonicalTagsSort) Swap(i, j int) {
 }

 func marshalTags(dst []byte, tags []Tag) []byte {
+	var prevKey []byte
 	for i := range tags {
-		dst = tags[i].Marshal(dst)
+		t := &tags[i]
+		if string(prevKey) == string(t.Key) {
+			// Skip duplicate keys, since they aren't allowed in Prometheus data model.
+			continue
+		}
+		prevKey = t.Key
+		dst = t.Marshal(dst)
 	}
 	return dst
 }
--- a/lib/storage/metric_name_test.go
+++ b/lib/storage/metric_name_test.go
@@ -34,6 +34,32 @@ func testMetricNameSortTags(t *testing.T, tags, expectedTags []string) {
 	}
 }

+func TestMetricNameMarshalDuplicateKeys(t *testing.T) {
+	var mn MetricName
+	mn.MetricGroup = []byte("xxx")
+	mn.AddTag("foo", "bar")
+	mn.AddTag("duplicate", "tag")
+	mn.AddTag("duplicate", "tag")
+	mn.AddTag("tt", "xx")
+	mn.AddTag("duplicate", "tag2")
+
+	var mnExpected MetricName
+	mnExpected.MetricGroup = []byte("xxx")
+	mnExpected.AddTag("duplicate", "tag")
+	mnExpected.AddTag("foo", "bar")
+	mnExpected.AddTag("tt", "xx")
+
+	mn.sortTags()
+	data := mn.Marshal(nil)
+	var mn1 MetricName
+	if err := mn1.Unmarshal(data); err != nil {
+		t.Fatalf("cannot unmarshal mn %s: %s", &mn, err)
+	}
+	if !reflect.DeepEqual(&mnExpected, &mn1) {
+		t.Fatalf("unexpected mn unmarshaled;\ngot\n%+v\nwant\n%+v", &mn1, &mnExpected)
+	}
+}
+
 func TestMetricNameMarshalUnmarshal(t *testing.T) {
 	for i := 0; i < 10; i++ {
 		for tagsCount := 0; tagsCount < 10; tagsCount++ {
--- a/lib/storage/part.go
+++ b/lib/storage/part.go
@@ -144,7 +144,7 @@ func (p *part) MustClose() {
 	p.valuesFile.MustClose()
 	p.indexFile.MustClose()

-	isBig := p.ph.RowsCount > maxRowsPerSmallPart
+	isBig := p.ph.RowsCount > maxRowsPerSmallPart()
 	p.ibCache.Reset(isBig)
 }

--- a/lib/storage/part_search.go
+++ b/lib/storage/part_search.go
@@ -51,6 +51,7 @@ func (ps *partSearch) reset() {
 	ps.p = nil
 	ps.tsids = ps.tsids[:0]
 	ps.tsidIdx = 0
+	ps.fetchData = true
 	ps.metaindex = nil
 	ps.ibCache = nil
 	ps.bhs = nil
--- a/lib/storage/partition.go
+++ b/lib/storage/partition.go
@@ -19,23 +19,18 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
-	"golang.org/x/sys/unix"
 )

-// The maximum number of rows in a small part.
-//
-// Small part merges cannot be interrupted during server stop, so this value
-// must be small enough to complete a merge
-// of `maxRowsPerSmallPart * defaultPartsToMerge` rows in a reasonable amount
-// of time (up to a a minute).
-//
-// Additionally, this number limits the maximum size of small parts storage.
-// Production simultation shows that the required size of the storage
-// may be estimated as:
-//
-//     maxRowsPerSmallPart * 2 * defaultPartsToMerge * mergeWorkers
-//
-const maxRowsPerSmallPart = 300e6
+func maxRowsPerSmallPart() uint64 {
+	// Small parts are cached in the OS page cache,
+	// so limit the number of rows for small part
+	// by the remaining free RAM.
+	mem := memory.Remaining()
+	if mem <= 0 {
+		return 100e6
+	}
+	return uint64(mem) / defaultPartsToMerge
+}

 // The maximum number of rows per big part.
 //
@@ -813,8 +808,8 @@ func (pt *partition) partsMerger(mergerFunc func(isFinal bool) error) error {
 	}
 }

-func (pt *partition) maxOutPartRows() uint64 {
-	freeSpace := mustGetFreeDiskSpace(pt.bigPartsPath)
+func maxRowsByPath(path string) uint64 {
+	freeSpace := mustGetFreeDiskSpace(path)

 	// Calculate the maximum number of rows in the output merge part
 	// by dividing the freeSpace by the number of concurrent
@@ -822,7 +817,11 @@ func (pt *partition) maxOutPartRows() uint64 {
 	// This assumes each row is compressed into 1 byte. Production
 	// simulation shows that each row usually occupies up to 0.5 bytes,
 	// so this is quite safe assumption.
-	return freeSpace / uint64(mergeWorkers)
+	maxRows := freeSpace / uint64(mergeWorkers)
+	if maxRows > maxRowsPerBigPart {
+		maxRows = maxRowsPerBigPart
+	}
+	return maxRows
 }

 func mustGetFreeDiskSpace(path string) uint64 {
@@ -838,18 +837,7 @@ func mustGetFreeDiskSpace(path string) uint64 {

 	// Slow path.
 	// Determine the amount of free space on bigPartsPath.
-	d, err := os.Open(path)
-	if err != nil {
-		logger.Panicf("FATAL: cannot determine free disk space on %q: %s", path, err)
-	}
-	defer fs.MustClose(d)
-
-	fd := d.Fd()
-	var stat unix.Statfs_t
-	if err := unix.Fstatfs(int(fd), &stat); err != nil {
-		logger.Panicf("FATAL: cannot determine free disk space on %q: %s", path, err)
-	}
-	e.freeSpace = stat.Bavail * uint64(stat.Bsize)
+	e.freeSpace = fs.MustGetFreeSpace(path)
 	e.updateTime = time.Now()
 	freeSpaceMap[path] = e
 	return e.freeSpace
@@ -866,10 +854,7 @@ type freeSpaceEntry struct {
 }

 func (pt *partition) mergeBigParts(isFinal bool) error {
-	maxRows := pt.maxOutPartRows()
-	if maxRows > maxRowsPerBigPart {
-		maxRows = maxRowsPerBigPart
-	}
+	maxRows := maxRowsByPath(pt.bigPartsPath)

 	pt.partsLock.Lock()
 	pws := getPartsToMerge(pt.bigParts, maxRows, isFinal)
@@ -888,7 +873,15 @@ func (pt *partition) mergeBigParts(isFinal bool) error {
 }

 func (pt *partition) mergeSmallParts(isFinal bool) error {
-	maxRows := uint64(maxRowsPerSmallPart * defaultPartsToMerge)
+	maxRows := maxRowsByPath(pt.smallPartsPath)
+	if maxRows > maxRowsPerSmallPart() {
+		// The output part may go to big part,
+		// so make sure it as enough space.
+		maxBigPartRows := maxRowsByPath(pt.bigPartsPath)
+		if maxRows > maxBigPartRows {
+			maxRows = maxBigPartRows
+		}
+	}

 	pt.partsLock.Lock()
 	pws := getPartsToMerge(pt.smallParts, maxRows, isFinal)
@@ -951,7 +944,7 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}) erro
 	for _, pw := range pws {
 		outRowsCount += pw.p.ph.RowsCount
 	}
-	isBigPart := outRowsCount > maxRowsPerSmallPart
+	isBigPart := outRowsCount > maxRowsPerSmallPart()
 	nocache := isBigPart

 	// Prepare BlockStreamWriter for destination part.
@@ -1008,7 +1001,7 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}) erro
 	}
 	fmt.Fprintf(&bb, "%s -> %s\n", tmpPartPath, dstPartPath)
 	txnPath := fmt.Sprintf("%s/txn/%016X", ptPath, mergeIdx)
-	if err := fs.WriteFile(txnPath, bb.B); err != nil {
+	if err := fs.WriteFileAtomically(txnPath, bb.B); err != nil {
 		return fmt.Errorf("cannot create transaction file %q: %s", txnPath, err)
 	}

@@ -1367,7 +1360,12 @@ func runTransactions(txnLock *sync.RWMutex, pathPrefix1, pathPrefix2, path strin
 	})

 	for _, fi := range fis {
-		txnPath := txnDir + "/" + fi.Name()
+		fn := fi.Name()
+		if fs.IsTemporaryFileName(fn) {
+			// Skip temporary files, which could be left after unclean shutdown.
+			continue
+		}
+		txnPath := txnDir + "/" + fn
 		if err := runTransaction(txnLock, pathPrefix1, pathPrefix2, txnPath); err != nil {
 			return fmt.Errorf("cannot run transaction from %q: %s", txnPath, err)
 		}
--- a/lib/storage/partition_test.go
+++ b/lib/storage/partition_test.go
@@ -6,11 +6,8 @@ import (
 	"testing"
 )

-func TestPartitionMaxOutPartRows(t *testing.T) {
-	pt := &partition{
-		bigPartsPath: ".",
-	}
-	n := pt.maxOutPartRows()
+func TestPartitionMaxRowsByPath(t *testing.T) {
+	n := maxRowsByPath(".")
 	if n < 1e3 {
 		t.Fatalf("too small number of rows can be created in the current directory: %d", n)
 	}
--- a/lib/storage/storage.go
+++ b/lib/storage/storage.go
@@ -20,8 +20,8 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
 	"github.com/VictoriaMetrics/fastcache"
-	"golang.org/x/sys/unix"
 )

 const maxRetentionMonths = 12 * 100
@@ -40,16 +40,16 @@ type Storage struct {
 	tb *table

 	// tsidCache is MetricName -> TSID cache.
-	tsidCache *fastcache.Cache
+	tsidCache *workingsetcache.Cache

 	// metricIDCache is MetricID -> TSID cache.
-	metricIDCache *fastcache.Cache
+	metricIDCache *workingsetcache.Cache

 	// metricNameCache is MetricID -> MetricName cache.
-	metricNameCache *fastcache.Cache
+	metricNameCache *workingsetcache.Cache

 	// dateMetricIDCache is (Date, MetricID) cache.
-	dateMetricIDCache *fastcache.Cache
+	dateMetricIDCache *workingsetcache.Cache

 	// Fast cache for MetricID values occured during the current hour.
 	currHourMetricIDs atomic.Value
@@ -68,6 +68,10 @@ type Storage struct {

 	tooSmallTimestampRows uint64
 	tooBigTimestampRows   uint64
+
+	addRowsConcurrencyLimitReached uint64
+	addRowsConcurrencyLimitTimeout uint64
+	addRowsConcurrencyDroppedRows  uint64
 }

 // OpenStorage opens storage on the given path with the given number of retention months.
@@ -99,13 +103,10 @@ func OpenStorage(path string, retentionMonths int) (*Storage, error) {
 		return nil, fmt.Errorf("cannot create %q: %s", snapshotsPath, err)
 	}

-	flockFile := path + "/flock.lock"
-	flockF, err := os.Create(flockFile)
+	// Protect from concurrent opens.
+	flockF, err := fs.CreateFlockFile(path)
 	if err != nil {
-		return nil, fmt.Errorf("cannot create lock file %q: %s", flockFile, err)
-	}
-	if err := unix.Flock(int(flockF.Fd()), unix.LOCK_EX|unix.LOCK_NB); err != nil {
-		return nil, fmt.Errorf("cannot acquire lock on file %q: %s", flockFile, err)
+		return nil, err
 	}
 	s.flockF = flockF

@@ -277,6 +278,12 @@ type Metrics struct {
 	TooSmallTimestampRows uint64
 	TooBigTimestampRows   uint64

+	AddRowsConcurrencyLimitReached uint64
+	AddRowsConcurrencyLimitTimeout uint64
+	AddRowsConcurrencyDroppedRows  uint64
+	AddRowsConcurrencyCapacity     uint64
+	AddRowsConcurrencyCurrent      uint64
+
 	TSIDCacheSize       uint64
 	TSIDCacheSizeBytes  uint64
 	TSIDCacheRequests   uint64
@@ -317,6 +324,12 @@ func (s *Storage) UpdateMetrics(m *Metrics) {
 	m.TooSmallTimestampRows += atomic.LoadUint64(&s.tooSmallTimestampRows)
 	m.TooBigTimestampRows += atomic.LoadUint64(&s.tooBigTimestampRows)

+	m.AddRowsConcurrencyLimitReached += atomic.LoadUint64(&s.addRowsConcurrencyLimitReached)
+	m.AddRowsConcurrencyLimitTimeout += atomic.LoadUint64(&s.addRowsConcurrencyLimitTimeout)
+	m.AddRowsConcurrencyDroppedRows += atomic.LoadUint64(&s.addRowsConcurrencyDroppedRows)
+	m.AddRowsConcurrencyCapacity = uint64(cap(addRowsConcurrencyCh))
+	m.AddRowsConcurrencyCurrent = uint64(len(addRowsConcurrencyCh))
+
 	var cs fastcache.Stats
 	s.tsidCache.UpdateStats(&cs)
 	m.TSIDCacheSize += cs.EntriesCount
@@ -448,10 +461,10 @@ func (s *Storage) MustClose() {
 	s.idb().MustClose()

 	// Save caches.
-	s.mustSaveCache(s.tsidCache, "MetricName->TSID", "metricName_tsid")
-	s.mustSaveCache(s.metricIDCache, "MetricID->TSID", "metricID_tsid")
-	s.mustSaveCache(s.metricNameCache, "MetricID->MetricName", "metricID_metricName")
-	s.mustSaveCache(s.dateMetricIDCache, "Date->MetricID", "date_metricID")
+	s.mustSaveAndStopCache(s.tsidCache, "MetricName->TSID", "metricName_tsid")
+	s.mustSaveAndStopCache(s.metricIDCache, "MetricID->TSID", "metricID_tsid")
+	s.mustSaveAndStopCache(s.metricNameCache, "MetricID->MetricName", "metricID_metricName")
+	s.mustSaveAndStopCache(s.dateMetricIDCache, "Date->MetricID", "date_metricID")

 	hmCurr := s.currHourMetricIDs.Load().(*hourMetricIDs)
 	s.mustSaveHourMetricIDs(hmCurr, "curr_hour_metric_ids")
@@ -530,11 +543,11 @@ func (s *Storage) mustSaveHourMetricIDs(hm *hourMetricIDs, name string) {
 	logger.Infof("saved %s to %q in %s; entriesCount: %d; sizeBytes: %d", name, path, time.Since(startTime), len(hm.m), len(dst))
 }

-func (s *Storage) mustLoadCache(info, name string, sizeBytes int) *fastcache.Cache {
+func (s *Storage) mustLoadCache(info, name string, sizeBytes int) *workingsetcache.Cache {
 	path := s.cachePath + "/" + name
 	logger.Infof("loading %s cache from %q...", info, path)
 	startTime := time.Now()
-	c := fastcache.LoadFromFileOrNew(path, sizeBytes)
+	c := workingsetcache.Load(path, sizeBytes, time.Hour)
 	var cs fastcache.Stats
 	c.UpdateStats(&cs)
 	logger.Infof("loaded %s cache from %q in %s; entriesCount: %d; sizeBytes: %d",
@@ -542,17 +555,16 @@ func (s *Storage) mustLoadCache(info, name string, sizeBytes int) *fastcache.Cac
 	return c
 }

-func (s *Storage) mustSaveCache(c *fastcache.Cache, info, name string) {
-	gomaxprocs := runtime.GOMAXPROCS(-1)
+func (s *Storage) mustSaveAndStopCache(c *workingsetcache.Cache, info, name string) {
 	path := s.cachePath + "/" + name
 	logger.Infof("saving %s cache to %q...", info, path)
 	startTime := time.Now()
-	if err := c.SaveToFileConcurrent(path, gomaxprocs); err != nil {
+	if err := c.Save(path); err != nil {
 		logger.Panicf("FATAL: cannot save %s cache to %q: %s", info, path, err)
 	}
 	var cs fastcache.Stats
 	c.UpdateStats(&cs)
-	c.Reset()
+	c.Stop()
 	logger.Infof("saved %s cache to %q in %s; entriesCount: %d; sizeBytes: %d",
 		info, path, time.Since(startTime), cs.EntriesCount, cs.BytesSize)
 }
@@ -722,15 +734,24 @@ func (s *Storage) AddRows(mrs []MetricRow, precisionBits uint8) error {
 	// Limit the number of concurrent goroutines that may add rows to the storage.
 	// This should prevent from out of memory errors and CPU trashing when too many
 	// goroutines call AddRows.
-	t := timerpool.Get(addRowsTimeout)
 	select {
 	case addRowsConcurrencyCh <- struct{}{}:
-		timerpool.Put(t)
 		defer func() { <-addRowsConcurrencyCh }()
-	case <-t.C:
-		timerpool.Put(t)
-		return fmt.Errorf("Cannot add %d rows to storage in %s, since it is overloaded with %d concurrent writers. Add more CPUs or reduce load",
-			len(mrs), addRowsTimeout, cap(addRowsConcurrencyCh))
+	default:
+		// Sleep for a while until giving up
+		atomic.AddUint64(&s.addRowsConcurrencyLimitReached, 1)
+		t := timerpool.Get(addRowsTimeout)
+		select {
+		case addRowsConcurrencyCh <- struct{}{}:
+			timerpool.Put(t)
+			defer func() { <-addRowsConcurrencyCh }()
+		case <-t.C:
+			timerpool.Put(t)
+			atomic.AddUint64(&s.addRowsConcurrencyLimitTimeout, 1)
+			atomic.AddUint64(&s.addRowsConcurrencyDroppedRows, uint64(len(mrs)))
+			return fmt.Errorf("Cannot add %d rows to storage in %s, since it is overloaded with %d concurrent writers. Add more CPUs or reduce load",
+				len(mrs), addRowsTimeout, cap(addRowsConcurrencyCh))
+		}
 	}

 	// Add rows to the storage.
@@ -748,7 +769,9 @@ var (
 )

 func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]rawRow, error) {
-	var errors []error
+	// Return only the last error, since it has no sense in returning all errors.
+	var lastError error
+
 	var is *indexSearch
 	var mn *MetricName
 	var kb *bytesutil.ByteBuffer
@@ -771,11 +794,13 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
 		}
 		if mr.Timestamp < minTimestamp {
 			// Skip rows with too small timestamps outside the retention.
+			lastError = fmt.Errorf("cannot insert row with too small timestamp %d outside the retention; minimum allowed timestamp is %d", mr.Timestamp, minTimestamp)
 			atomic.AddUint64(&s.tooSmallTimestampRows, 1)
 			continue
 		}
 		if mr.Timestamp > maxTimestamp {
 			// Skip rows with too big timestamps significantly exceeding the current time.
+			lastError = fmt.Errorf("cannot insert row with too big timestamp %d exceeding the current time; maximum allowd timestamp is %d", mr.Timestamp, maxTimestamp)
 			atomic.AddUint64(&s.tooBigTimestampRows, 1)
 			continue
 		}
@@ -805,8 +830,7 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
 			// Do not stop adding rows on error - just skip invalid row.
 			// This guarantees that invalid rows don't prevent
 			// from adding valid rows into the storage.
-			err = fmt.Errorf("cannot unmarshal MetricNameRaw %q: %s", mr.MetricNameRaw, err)
-			errors = append(errors, err)
+			lastError = fmt.Errorf("cannot unmarshal MetricNameRaw %q: %s", mr.MetricNameRaw, err)
 			j--
 			continue
 		}
@@ -816,8 +840,7 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
 			// Do not stop adding rows on error - just skip invalid row.
 			// This guarantees that invalid rows don't prevent
 			// from adding valid rows into the storage.
-			err = fmt.Errorf("cannot obtain TSID for MetricName %q: %s", kb.B, err)
-			errors = append(errors, err)
+			lastError = fmt.Errorf("cannot obtain TSID for MetricName %q: %s", kb.B, err)
 			j--
 			continue
 		}
@@ -831,18 +854,16 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
 	rows = rows[:rowsLen+j]

 	if err := s.tb.AddRows(rows); err != nil {
-		err = fmt.Errorf("cannot add rows to table: %s", err)
-		errors = append(errors, err)
+		lastError = fmt.Errorf("cannot add rows to table: %s", err)
 	}
-	errors = s.updateDateMetricIDCache(rows, errors)
-	if len(errors) > 0 {
-		// Return only the first error, since it has no sense in returning all errors.
-		return rows, fmt.Errorf("errors occurred during rows addition: %s", errors[0])
+	lastError = s.updateDateMetricIDCache(rows, lastError)
+	if lastError != nil {
+		return rows, fmt.Errorf("errors occurred during rows addition: %s", lastError)
 	}
 	return rows, nil
 }

-func (s *Storage) updateDateMetricIDCache(rows []rawRow, errors []error) []error {
+func (s *Storage) updateDateMetricIDCache(rows []rawRow, lastError error) error {
 	var date uint64
 	var hour uint64
 	var prevTimestamp int64
@@ -884,11 +905,11 @@ func (s *Storage) updateDateMetricIDCache(rows []rawRow, errors []error) []error
 		// by concurrent goroutines.
 		s.dateMetricIDCache.Set(keyBuf, nil)
 		if err := idb.storeDateMetricID(date, metricID); err != nil {
-			errors = append(errors, err)
+			lastError = err
 			continue
 		}
 	}
-	return errors
+	return lastError
 }

 func (s *Storage) updateCurrHourMetricIDs() {
@@ -950,7 +971,7 @@ func (s *Storage) putTSIDToCache(tsid *TSID, metricName []byte) {
 	s.tsidCache.Set(metricName, buf)
 }

-func openIndexDBTables(path string, metricIDCache, metricNameCache *fastcache.Cache, currHourMetricIDs, prevHourMetricIDs *atomic.Value) (curr, prev *indexDB, err error) {
+func openIndexDBTables(path string, metricIDCache, metricNameCache *workingsetcache.Cache, currHourMetricIDs, prevHourMetricIDs *atomic.Value) (curr, prev *indexDB, err error) {
 	if err := fs.MkdirAllIfNotExist(path); err != nil {
 		return nil, nil, fmt.Errorf("cannot create directory %q: %s", path, err)
 	}
--- a/lib/storage/storage_test.go
+++ b/lib/storage/storage_test.go
@@ -349,7 +349,8 @@ func testStorageRandTimestamps(s *Storage) error {
 			mrs = append(mrs, mr)
 		}
 		if err := s.AddRows(mrs, defaultPrecisionBits); err != nil {
-			if !strings.Contains(err.Error(), "too big timestamp") {
+			errStr := err.Error()
+			if !strings.Contains(errStr, "too big timestamp") && !strings.Contains(errStr, "too small timestamp") {
 				return fmt.Errorf("unexpected error when adding mrs: %s", err)
 			}
 		}
--- a/lib/storage/table.go
+++ b/lib/storage/table.go
@@ -10,7 +10,6 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"golang.org/x/sys/unix"
 )

 // table represents a single table with time series data.
@@ -84,13 +83,10 @@ func openTable(path string, retentionMonths int, getDeletedMetricIDs func() map[
 		return nil, fmt.Errorf("cannot create directory for table %q: %s", path, err)
 	}

-	flockFile := path + "/flock.lock"
-	flockF, err := os.Create(flockFile)
+	// Protect from concurrent opens.
+	flockF, err := fs.CreateFlockFile(path)
 	if err != nil {
-		return nil, fmt.Errorf("cannot create lock file %q: %s", flockFile, err)
-	}
-	if err := unix.Flock(int(flockF.Fd()), unix.LOCK_EX|unix.LOCK_NB); err != nil {
-		return nil, fmt.Errorf("cannot acquire lock on file %q: %s", flockFile, err)
+		return nil, err
 	}

 	// Create directories for small and big partitions if they don't exist yet.
--- a/lib/storage/tag_filters.go
+++ b/lib/storage/tag_filters.go
@@ -592,15 +592,18 @@ func extractRegexpPrefix(b []byte) ([]byte, []byte) {
 	if re == emptyRegexp {
 		return nil, nil
 	}
-	if re.Op == syntax.OpLiteral {
+	if re.Op == syntax.OpLiteral && re.Flags&syntax.FoldCase == 0 {
 		return []byte(string(re.Rune)), nil
 	}
 	var prefix []byte
-	if re.Op == syntax.OpConcat && re.Sub[0].Op == syntax.OpLiteral {
-		prefix = []byte(string(re.Sub[0].Rune))
-		re.Sub = re.Sub[1:]
-		if len(re.Sub) == 0 {
-			return nil, nil
+	if re.Op == syntax.OpConcat {
+		sub0 := re.Sub[0]
+		if sub0.Op == syntax.OpLiteral && sub0.Flags&syntax.FoldCase == 0 {
+			prefix = []byte(string(sub0.Rune))
+			re.Sub = re.Sub[1:]
+			if len(re.Sub) == 0 {
+				return nil, nil
+			}
 		}
 	}
 	if _, err := syntax.Compile(re); err != nil {
--- a/lib/storage/tag_filters_test.go
+++ b/lib/storage/tag_filters_test.go
@@ -5,6 +5,21 @@ import (
 	"testing"
 )

+func TestExtractRegexpPrefix(t *testing.T) {
+	f := func(s string, expectedPrefix, expectedSuffix string) {
+		t.Helper()
+		prefix, suffix := extractRegexpPrefix([]byte(s))
+		if string(prefix) != expectedPrefix {
+			t.Fatalf("unexpected prefix for %q; got %q; want %q", s, prefix, expectedPrefix)
+		}
+		if string(suffix) != expectedSuffix {
+			t.Fatalf("unexpected suffix for %q; got %q; want %q", s, suffix, expectedSuffix)
+		}
+	}
+	f("", "", "")
+	f("foobar", "foobar", "")
+}
+
 func TestGetRegexpFromCache(t *testing.T) {
 	f := func(s string, orValuesExpected, expectedMatches, expectedMismatches []string) {
 		t.Helper()
@@ -397,67 +412,73 @@ func TestGetOrValues(t *testing.T) {
 }

 func TestGetRegexpPrefix(t *testing.T) {
-	testGetRegexpPrefix(t, "", "", "")
-	testGetRegexpPrefix(t, "^", "", "")
-	testGetRegexpPrefix(t, "$", "", "")
-	testGetRegexpPrefix(t, "^()$", "", "")
-	testGetRegexpPrefix(t, "^(?:)$", "", "")
-	testGetRegexpPrefix(t, "foobar", "foobar", "")
-	testGetRegexpPrefix(t, "foo$|^foobar", "foo", "(?:(?:)|bar)")
-	testGetRegexpPrefix(t, "^(foo$|^foobar)$", "foo", "(?:(?:)|bar)")
-	testGetRegexpPrefix(t, "foobar|foobaz", "fooba", "[rz]")
-	testGetRegexpPrefix(t, "(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x")
-	testGetRegexpPrefix(t, "(тестЧЧ|тест)", "тест", "(?:ЧЧ|(?:))")
-	testGetRegexpPrefix(t, "foo(bar|baz|bana)", "fooba", "(?:[rz]|na)")
-	testGetRegexpPrefix(t, "^foobar|foobaz", "fooba", "[rz]")
-	testGetRegexpPrefix(t, "^foobar|^foobaz$", "fooba", "[rz]")
-	testGetRegexpPrefix(t, "foobar|foobaz", "fooba", "[rz]")
-	testGetRegexpPrefix(t, "(?:^foobar|^foobaz)aa.*", "fooba", "[rz]aa(?-s:.)*")
-	testGetRegexpPrefix(t, "foo[bar]+", "foo", "[a-br]+")
-	testGetRegexpPrefix(t, "foo[a-z]+", "foo", "[a-z]+")
-	testGetRegexpPrefix(t, "foo[bar]*", "foo", "[a-br]*")
-	testGetRegexpPrefix(t, "foo[a-z]*", "foo", "[a-z]*")
-	testGetRegexpPrefix(t, "foo[x]+", "foo", "x+")
-	testGetRegexpPrefix(t, "foo[^x]+", "foo", "[^x]+")
-	testGetRegexpPrefix(t, "foo[x]*", "foo", "x*")
-	testGetRegexpPrefix(t, "foo[^x]*", "foo", "[^x]*")
-	testGetRegexpPrefix(t, "foo[x]*bar", "foo", "x*bar")
-	testGetRegexpPrefix(t, "fo\\Bo[x]*bar?", "fo", "\\Box*bar?")
+	f := func(t *testing.T, s, expectedPrefix, expectedSuffix string) {
+		t.Helper()
+
+		prefix, suffix := getRegexpPrefix([]byte(s))
+		if string(prefix) != expectedPrefix {
+			t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
+		}
+		if string(suffix) != expectedSuffix {
+			t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
+		}
+
+		// Get the prefix from cache.
+		prefix, suffix = getRegexpPrefix([]byte(s))
+		if string(prefix) != expectedPrefix {
+			t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
+		}
+		if string(suffix) != expectedSuffix {
+			t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
+		}
+	}
+
+	f(t, "", "", "")
+	f(t, "^", "", "")
+	f(t, "$", "", "")
+	f(t, "^()$", "", "")
+	f(t, "^(?:)$", "", "")
+	f(t, "foobar", "foobar", "")
+	f(t, "foo$|^foobar", "foo", "(?:(?:)|bar)")
+	f(t, "^(foo$|^foobar)$", "foo", "(?:(?:)|bar)")
+	f(t, "foobar|foobaz", "fooba", "[rz]")
+	f(t, "(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x")
+	f(t, "(тестЧЧ|тест)", "тест", "(?:ЧЧ|(?:))")
+	f(t, "foo(bar|baz|bana)", "fooba", "(?:[rz]|na)")
+	f(t, "^foobar|foobaz", "fooba", "[rz]")
+	f(t, "^foobar|^foobaz$", "fooba", "[rz]")
+	f(t, "foobar|foobaz", "fooba", "[rz]")
+	f(t, "(?:^foobar|^foobaz)aa.*", "fooba", "[rz]aa(?-s:.)*")
+	f(t, "foo[bar]+", "foo", "[a-br]+")
+	f(t, "foo[a-z]+", "foo", "[a-z]+")
+	f(t, "foo[bar]*", "foo", "[a-br]*")
+	f(t, "foo[a-z]*", "foo", "[a-z]*")
+	f(t, "foo[x]+", "foo", "x+")
+	f(t, "foo[^x]+", "foo", "[^x]+")
+	f(t, "foo[x]*", "foo", "x*")
+	f(t, "foo[^x]*", "foo", "[^x]*")
+	f(t, "foo[x]*bar", "foo", "x*bar")
+	f(t, "fo\\Bo[x]*bar?", "fo", "\\Box*bar?")
+	f(t, "foo.+bar", "foo", "(?-s:.)+bar")
+	f(t, "a(b|c.*).+", "a", "(?:b|c(?-s:.)*)(?-s:.)+")
+	f(t, "ab|ac", "a", "[b-c]")
+	f(t, "(?i)xyz", "", "(?i:XYZ)")
+	f(t, "(?i)up.+x", "", "(?i:UP)(?-s:.)+(?i:X)")
+	f(t, "(?smi)xy.*z$", "", "(?i:XY)(?s:.)*(?i:Z)(?m:$)")

 	// test invalid regexps
-	testGetRegexpPrefix(t, "a(", "a(", "")
-	testGetRegexpPrefix(t, "a[", "a[", "")
-	testGetRegexpPrefix(t, "a[]", "a[]", "")
-	testGetRegexpPrefix(t, "a{", "a{", "")
-	testGetRegexpPrefix(t, "a{}", "a{}", "")
-	testGetRegexpPrefix(t, "invalid(regexp", "invalid(regexp", "")
+	f(t, "a(", "a(", "")
+	f(t, "a[", "a[", "")
+	f(t, "a[]", "a[]", "")
+	f(t, "a{", "a{", "")
+	f(t, "a{}", "a{}", "")
+	f(t, "invalid(regexp", "invalid(regexp", "")

 	// The transformed regexp mustn't match aba
-	testGetRegexpPrefix(t, "a?(^ba|c)", "", "a?(?:\\Aba|c)")
+	f(t, "a?(^ba|c)", "", "a?(?:\\Aba|c)")

 	// The transformed regexp mustn't match barx
-	testGetRegexpPrefix(t, "(foo|bar$)x*", "", "(?:foo|bar(?-m:$))x*")
-}
-
-func testGetRegexpPrefix(t *testing.T, s, expectedPrefix, expectedSuffix string) {
-	t.Helper()
-
-	prefix, suffix := getRegexpPrefix([]byte(s))
-	if string(prefix) != expectedPrefix {
-		t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
-	}
-	if string(suffix) != expectedSuffix {
-		t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
-	}
-
-	// Get the prefix from cache.
-	prefix, suffix = getRegexpPrefix([]byte(s))
-	if string(prefix) != expectedPrefix {
-		t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
-	}
-	if string(suffix) != expectedSuffix {
-		t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
-	}
+	f(t, "(foo|bar$)x*", "", "(?:foo|bar(?-m:$))x*")
 }

 func TestTagFiltersAddEmpty(t *testing.T) {
--- a/lib/workingsetcache/cache.go
+++ b/lib/workingsetcache/cache.go
@@ -0,0 +1,255 @@
+package workingsetcache
+
+import (
+	"runtime"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/VictoriaMetrics/fastcache"
+)
+
+// Cache is a cache for working set entries.
+//
+// The cache evicts inactive entries after the given expireDuration.
+// Recently accessed entries survive expireDuration.
+//
+// Comparing to fastcache, this cache minimizes the required RAM size
+// to values smaller than maxBytes.
+type Cache struct {
+	curr atomic.Value
+	prev atomic.Value
+
+	// skipPrev indicates whether to use only curr and skip prev.
+	//
+	// This flag is set if curr is filled for more than 50% space.
+	// In this case using prev would result in RAM waste,
+	// it is better to use only curr cache with doubled size.
+	skipPrev uint64
+
+	// mu serializes access to curr, prev and skipPrev
+	// in expirationWorker and cacheSizeWatcher.
+	mu sync.Mutex
+
+	wg     sync.WaitGroup
+	stopCh chan struct{}
+
+	misses uint64
+}
+
+// Load loads the cache from filePath and limits its size to maxBytes
+// and evicts inactive entires after expireDuration.
+//
+// Stop must be called on the returned cache when it is no longer needed.
+func Load(filePath string, maxBytes int, expireDuration time.Duration) *Cache {
+	// Split maxBytes between curr and prev caches.
+	maxBytes /= 2
+	curr := fastcache.LoadFromFileOrNew(filePath, maxBytes)
+	return newWorkingSetCache(curr, maxBytes, expireDuration)
+}
+
+// New creates new cache with the given maxBytes size and the given expireDuration
+// for inactive entries.
+//
+// Stop must be called on the returned cache when it is no longer needed.
+func New(maxBytes int, expireDuration time.Duration) *Cache {
+	// Split maxBytes between curr and prev caches.
+	maxBytes /= 2
+	curr := fastcache.New(maxBytes)
+	return newWorkingSetCache(curr, maxBytes, expireDuration)
+}
+
+func newWorkingSetCache(curr *fastcache.Cache, maxBytes int, expireDuration time.Duration) *Cache {
+	prev := fastcache.New(1024)
+	var c Cache
+	c.curr.Store(curr)
+	c.prev.Store(prev)
+	c.stopCh = make(chan struct{})
+
+	c.wg.Add(1)
+	go func() {
+		defer c.wg.Done()
+		c.expirationWorker(maxBytes, expireDuration)
+	}()
+	c.wg.Add(1)
+	go func() {
+		defer c.wg.Done()
+		c.cacheSizeWatcher(maxBytes)
+	}()
+	return &c
+}
+
+func (c *Cache) expirationWorker(maxBytes int, expireDuration time.Duration) {
+	t := time.NewTicker(expireDuration / 2)
+	for {
+		select {
+		case <-c.stopCh:
+			t.Stop()
+			return
+		case <-t.C:
+		}
+
+		c.mu.Lock()
+		if atomic.LoadUint64(&c.skipPrev) != 0 {
+			// Expire prev cache and create fresh curr cache.
+			// Do not reuse prev cache, since it can have too big capacity.
+			prev := c.prev.Load().(*fastcache.Cache)
+			prev.Reset()
+			curr := c.curr.Load().(*fastcache.Cache)
+			c.prev.Store(curr)
+			curr = fastcache.New(maxBytes)
+			c.curr.Store(curr)
+		}
+		c.mu.Unlock()
+	}
+}
+
+func (c *Cache) cacheSizeWatcher(maxBytes int) {
+	t := time.NewTicker(time.Minute)
+	for {
+		select {
+		case <-c.stopCh:
+			t.Stop()
+			return
+		case <-t.C:
+		}
+		var cs fastcache.Stats
+		curr := c.curr.Load().(*fastcache.Cache)
+		curr.UpdateStats(&cs)
+		if cs.BytesSize < uint64(maxBytes)/2 {
+			continue
+		}
+
+		// curr cache size exceeds 50% of its capacity. It is better
+		// to double the size of curr cache and stop using prev cache,
+		// since this will result in higher summary cache capacity.
+		c.mu.Lock()
+		curr.Reset()
+		prev := c.prev.Load().(*fastcache.Cache)
+		prev.Reset()
+		curr = fastcache.New(maxBytes * 2)
+		c.curr.Store(curr)
+		atomic.StoreUint64(&c.skipPrev, 1)
+		c.mu.Unlock()
+		return
+	}
+}
+
+// Save safes the cache to filePath.
+func (c *Cache) Save(filePath string) error {
+	curr := c.curr.Load().(*fastcache.Cache)
+	concurrency := runtime.GOMAXPROCS(-1)
+	return curr.SaveToFileConcurrent(filePath, concurrency)
+}
+
+// Stop stops the cache.
+//
+// The cache cannot be used after the Stop call.
+func (c *Cache) Stop() {
+	close(c.stopCh)
+	c.wg.Wait()
+
+	c.Reset()
+}
+
+// Reset resets the cache.
+func (c *Cache) Reset() {
+	prev := c.prev.Load().(*fastcache.Cache)
+	prev.Reset()
+	curr := c.curr.Load().(*fastcache.Cache)
+	curr.Reset()
+
+	c.misses = 0
+}
+
+// UpdateStats updates fcs with cache stats.
+func (c *Cache) UpdateStats(fcs *fastcache.Stats) {
+	curr := c.curr.Load().(*fastcache.Cache)
+	fcsOrig := *fcs
+	curr.UpdateStats(fcs)
+	if atomic.LoadUint64(&c.skipPrev) != 0 {
+		return
+	}
+
+	fcs.Misses = fcsOrig.Misses + atomic.LoadUint64(&c.misses)
+	fcsOrig.Reset()
+	prev := c.prev.Load().(*fastcache.Cache)
+	prev.UpdateStats(&fcsOrig)
+	fcs.EntriesCount += fcsOrig.EntriesCount
+	fcs.BytesSize += fcsOrig.BytesSize
+}
+
+// Get appends the found value for the given key to dst and returns the result.
+func (c *Cache) Get(dst, key []byte) []byte {
+	curr := c.curr.Load().(*fastcache.Cache)
+	result := curr.Get(dst, key)
+	if len(result) > len(dst) {
+		// Fast path - the entry is found in the current cache.
+		return result
+	}
+	if atomic.LoadUint64(&c.skipPrev) != 0 {
+		return result
+	}
+
+	// Search for the entry in the previous cache.
+	prev := c.prev.Load().(*fastcache.Cache)
+	result = prev.Get(dst, key)
+	if len(result) <= len(dst) {
+		// Nothing found.
+		atomic.AddUint64(&c.misses, 1)
+		return result
+	}
+	// Cache the found entry in the current cache.
+	curr.Set(key, result[len(dst):])
+	return result
+}
+
+// Has verifies whether the cahce contains the given key.
+func (c *Cache) Has(key []byte) bool {
+	curr := c.curr.Load().(*fastcache.Cache)
+	if curr.Has(key) {
+		return true
+	}
+	if atomic.LoadUint64(&c.skipPrev) != 0 {
+		return false
+	}
+	prev := c.prev.Load().(*fastcache.Cache)
+	return prev.Has(key)
+}
+
+// Set sets the given value for the given key.
+func (c *Cache) Set(key, value []byte) {
+	curr := c.curr.Load().(*fastcache.Cache)
+	curr.Set(key, value)
+}
+
+// GetBig appends the found value for the given key to dst and returns the result.
+func (c *Cache) GetBig(dst, key []byte) []byte {
+	curr := c.curr.Load().(*fastcache.Cache)
+	result := curr.GetBig(dst, key)
+	if len(result) > len(dst) {
+		// Fast path - the entry is found in the current cache.
+		return result
+	}
+	if atomic.LoadUint64(&c.skipPrev) != 0 {
+		return result
+	}
+
+	// Search for the entry in the previous cache.
+	prev := c.prev.Load().(*fastcache.Cache)
+	result = prev.GetBig(dst, key)
+	if len(result) <= len(dst) {
+		// Nothing found.
+		atomic.AddUint64(&c.misses, 1)
+		return result
+	}
+	// Cache the found entry in the current cache.
+	curr.SetBig(key, result[len(dst):])
+	return result
+}
+
+// SetBig sets the given value for the given key.
+func (c *Cache) SetBig(key, value []byte) {
+	curr := c.curr.Load().(*fastcache.Cache)
+	curr.SetBig(key, value)
+}
--- a/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go
+++ b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go
@@ -67,7 +67,11 @@ func writeProcessMetrics(w io.Writer) {
 	// It is expensive obtaining `process_open_fds` when big number of file descriptors is opened,
 	// don't do it here.

-	fmt.Fprintf(w, "process_cpu_seconds_total %g\n", float64(p.Utime+p.Stime)/userHZ)
+	utime := float64(p.Utime) / userHZ
+	stime := float64(p.Stime) / userHZ
+	fmt.Fprintf(w, "process_cpu_seconds_system_total %g\n", stime)
+	fmt.Fprintf(w, "process_cpu_seconds_total %g\n", utime+stime)
+	fmt.Fprintf(w, "process_cpu_seconds_user_total %g\n", utime)
 	fmt.Fprintf(w, "process_major_pagefaults_total %d\n", p.Majflt)
 	fmt.Fprintf(w, "process_minor_pagefaults_total %d\n", p.Minflt)
 	fmt.Fprintf(w, "process_num_threads %d\n", p.NumThreads)
--- a/vendor/github.com/klauspost/compress/LICENSE
+++ b/vendor/github.com/klauspost/compress/LICENSE
@@ -1,4 +1,5 @@
 Copyright (c) 2012 The Go Authors. All rights reserved.
+Copyright (c) 2019 Klaus Post. All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
--- a/vendor/github.com/klauspost/compress/fse/decompress.go
+++ b/vendor/github.com/klauspost/compress/fse/decompress.go
@@ -243,7 +243,7 @@ func (s *Scratch) buildDtable() error {
 			nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
 			s.decTable[u].nbBits = nBits
 			newState := (nextState << nBits) - tableSize
-			if newState > tableSize {
+			if newState >= tableSize {
 				return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
 			}
 			if newState == uint16(u) && nBits == 0 {
@@ -281,8 +281,12 @@ func (s *Scratch) decompress() error {
 			tmp[off+2] = s1.nextFast()
 			tmp[off+3] = s2.nextFast()
 			off += 4
+			// When off is 0, we have overflowed and should write.
 			if off == 0 {
 				s.Out = append(s.Out, tmp...)
+				if len(s.Out) >= s.DecompressLimit {
+					return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit)
+				}
 			}
 		}
 	} else {
@@ -296,7 +300,7 @@ func (s *Scratch) decompress() error {
 			off += 4
 			if off == 0 {
 				s.Out = append(s.Out, tmp...)
-				off = 0
+				// When off is 0, we have overflowed and should write.
 				if len(s.Out) >= s.DecompressLimit {
 					return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit)
 				}
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -247,9 +247,13 @@ func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
 	dstOut := s.Out
 	dstEvery := (dstSize + 3) / 4

+	const tlSize = 1 << tableLogMax
+	const tlMask = tlSize - 1
+	single := s.dt.single[:tlSize]
+
 	decode := func(br *bitReader) byte {
 		val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */
-		v := s.dt.single[val]
+		v := single[val&tlMask]
 		br.bitsRead += v.nBits
 		return v.byte
 	}
@@ -279,7 +283,7 @@ bigloop:
 		off += 2
 		if off == bufoff {
 			if bufoff > dstEvery {
-				return nil, errors.New("corruption detected: stream overrun")
+				return nil, errors.New("corruption detected: stream overrun 1")
 			}
 			copy(dstOut, tmp[:bufoff])
 			copy(dstOut[dstEvery:], tmp[bufoff:bufoff*2])
@@ -288,15 +292,15 @@ bigloop:
 			off = 0
 			dstOut = dstOut[bufoff:]
 			// There must at least be 3 buffers left.
-			if len(dstOut) < dstEvery*3+3 {
-				return nil, errors.New("corruption detected: stream overrun")
+			if len(dstOut) < dstEvery*3 {
+				return nil, errors.New("corruption detected: stream overrun 2")
 			}
 		}
 	}
 	if off > 0 {
 		ioff := int(off)
 		if len(dstOut) < dstEvery*3+ioff {
-			return nil, errors.New("corruption detected: stream overrun")
+			return nil, errors.New("corruption detected: stream overrun 3")
 		}
 		copy(dstOut, tmp[:off])
 		copy(dstOut[dstEvery:dstEvery+ioff], tmp[bufoff:bufoff*2])
@@ -311,7 +315,7 @@ bigloop:
 		for !br.finished() {
 			br.fill()
 			if offset >= len(dstOut) {
-				return nil, errors.New("corruption detected: stream overrun")
+				return nil, errors.New("corruption detected: stream overrun 4")
 			}
 			dstOut[offset] = decode(br)
 			offset++
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@@ -34,7 +34,8 @@ For now, a high speed (fastest) and medium-fast (default) compressor has been im
 The "Fastest" compression ratio is roughly equivalent to zstd level 1. 
 The "Default" compression ration is roughly equivalent to zstd level 3 (default).

-In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. The compression ratio compared to stdlib is around level 3, but usually 3x as fast.
+In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. 
+The compression ratio compared to stdlib is around level 3, but usually 3x as fast.

 Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2.

@@ -217,7 +218,8 @@ silesia.tar zstd    3   211947520   66793301    1377    146.79

 As part of the development process a *Snappy* -> *Zstandard* converter was also built.

-This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. Note that a single block is not framed.
+This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. 
+Note that a single block is not framed.

 Conversion is done by converting the stream directly from Snappy without intermediate full decoding.
 Therefore the compression ratio is much less than what can be done by a full decompression
--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@@ -155,14 +155,17 @@ func (h *literalsHeader) setSize(regenLen int) {
 }

 // setSizes will set the size of a compressed literals section and the input length.
-func (h *literalsHeader) setSizes(compLen, inLen int) {
+func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
 	compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
 	// Only retain 2 bits
 	const mask = 3
 	lh := uint64(*h & mask)
 	switch {
 	case compBits <= 10 && inBits <= 10:
-		lh |= (1 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
+		if !single {
+			lh |= 1 << 2
+		}
+		lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
 		if debug {
 			const mmask = (1 << 24) - 1
 			n := (lh >> 4) & mmask
@@ -175,8 +178,14 @@ func (h *literalsHeader) setSizes(compLen, inLen int) {
 		}
 	case compBits <= 14 && inBits <= 14:
 		lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60)
+		if single {
+			panic("single stream used with more than 10 bits length.")
+		}
 	case compBits <= 18 && inBits <= 18:
 		lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60)
+		if single {
+			panic("single stream used with more than 10 bits length.")
+		}
 	default:
 		panic("internal error: block too big")
 	}
@@ -307,12 +316,30 @@ func (b *blockEnc) encodeLits() error {
 		return nil
 	}

-	// TODO: Switch to 1X when less than x bytes.
-	out, reUsed, err := huff0.Compress4X(b.literals, b.litEnc)
-	// Bail out of compression is too little.
-	if len(out) > (len(b.literals) - len(b.literals)>>4) {
+	var (
+		out            []byte
+		reUsed, single bool
+		err            error
+	)
+	if len(b.literals) >= 1024 {
+		// Use 4 Streams.
+		out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
+		if len(out) > len(b.literals)-len(b.literals)>>4 {
+			// Bail out of compression is too little.
+			err = huff0.ErrIncompressible
+		}
+	} else if len(b.literals) > 32 {
+		// Use 1 stream
+		single = true
+		out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
+		if len(out) > len(b.literals)-len(b.literals)>>4 {
+			// Bail out of compression is too little.
+			err = huff0.ErrIncompressible
+		}
+	} else {
 		err = huff0.ErrIncompressible
 	}
+
 	switch err {
 	case huff0.ErrIncompressible:
 		if debug {
@@ -351,7 +378,7 @@ func (b *blockEnc) encodeLits() error {
 		lh.setType(literalsBlockCompressed)
 	}
 	// Set sizes
-	lh.setSizes(len(out), len(b.literals))
+	lh.setSizes(len(out), len(b.literals), single)
 	bh.setSize(uint32(len(out) + lh.size() + 1))

 	// Write block headers.
@@ -381,16 +408,23 @@ func (b *blockEnc) encode() error {
 	b.output = bh.appendTo(b.output)

 	var (
-		out    []byte
-		reUsed bool
-		err    error
+		out            []byte
+		reUsed, single bool
+		err            error
 	)
-	if len(b.literals) > 32 {
-		// TODO: Switch to 1X on small blocks.
+	if len(b.literals) >= 1024 {
+		// Use 4 Streams.
 		out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
 		if len(out) > len(b.literals)-len(b.literals)>>4 {
 			err = huff0.ErrIncompressible
 		}
+	} else if len(b.literals) > 32 {
+		// Use 1 stream
+		single = true
+		out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
+		if len(out) > len(b.literals)-len(b.literals)>>4 {
+			err = huff0.ErrIncompressible
+		}
 	} else {
 		err = huff0.ErrIncompressible
 	}
@@ -435,7 +469,7 @@ func (b *blockEnc) encode() error {
 				}
 			}
 		}
-		lh.setSizes(len(out), len(b.literals))
+		lh.setSizes(len(out), len(b.literals), single)
 		if debug {
 			printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
 			println("Adding literal header:", lh)
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@@ -116,6 +116,9 @@ func (r *readerWrapper) readByte() (byte, error) {
 }

 func (r *readerWrapper) skipN(n int) error {
-	_, err := io.CopyN(ioutil.Discard, r.r, int64(n))
+	n2, err := io.CopyN(ioutil.Discard, r.r, int64(n))
+	if n2 != int64(n) {
+		err = io.ErrUnexpectedEOF
+	}
 	return err
 }
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -281,17 +281,17 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 		}
 		d.decoders <- block
 		frame.rawInput = nil
+		frame.bBuf = nil
 		d.frames <- frame
 	}()
+	frame.bBuf = input
 	if cap(dst) == 0 {
 		// Allocate 1MB by default if nothing is provided.
 		dst = make([]byte, 0, 1<<20)
 	}

-	// Allocation here:
-	br := byteBuf(input)
 	for {
-		err := frame.reset(&br)
+		err := frame.reset(&frame.bBuf)
 		if err == io.EOF {
 			return dst, nil
 		}
@@ -313,7 +313,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 		if err != nil {
 			return dst, err
 		}
-		if len(br) == 0 {
+		if len(frame.bBuf) == 0 {
 			break
 		}
 	}
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -82,16 +82,11 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
 		stepSize++
 	}

-	// TEMPLATE
-
 	const kSearchStrength = 8

 	// nextEmit is where in src the next emitLiteral should start from.
 	nextEmit := s
 	cv := load6432(src, s)
-	// nextHash is the hash at s
-	nextHashS := hash5(cv, dFastShortTableBits)
-	nextHashL := hash8(cv, dFastLongTableBits)

 	// Relative offsets
 	offset1 := int32(blk.recentOffsets[0])
@@ -119,8 +114,8 @@ encodeLoop:
 				panic("offset0 was 0")
 			}

-			nextHashS = nextHashS & dFastShortTableMask
-			nextHashL = nextHashL & dFastLongTableMask
+			nextHashS := hash5(cv, dFastShortTableBits)
+			nextHashL := hash8(cv, dFastLongTableBits)
 			candidateL := e.longTable[nextHashL]
 			candidateS := e.table[nextHashS]

@@ -172,8 +167,6 @@ encodeLoop:
 						break encodeLoop
 					}
 					cv = load6432(src, s)
-					nextHashS = hash5(cv, dFastShortTableBits)
-					nextHashL = hash8(cv, dFastLongTableBits)
 					continue
 				}
 				const repOff2 = 1
@@ -221,8 +214,6 @@ encodeLoop:
 						break encodeLoop
 					}
 					cv = load6432(src, s)
-					nextHashS = hash5(cv, dFastShortTableBits)
-					nextHashL = hash8(cv, dFastLongTableBits)
 					// Swap offsets
 					offset1, offset2 = offset2, offset1
 					continue
@@ -296,8 +287,6 @@ encodeLoop:
 				break encodeLoop
 			}
 			cv = load6432(src, s)
-			nextHashS = hash5(cv, dFastShortTableBits)
-			nextHashL = hash8(cv, dFastLongTableBits)
 		}

 		// A 4-byte match has been found. Update recent offsets.
@@ -354,20 +343,18 @@ encodeLoop:
 		cv1 := load6432(src, index1)
 		te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
 		te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
-		e.longTable[hash8(cv0, dFastLongTableBits)&dFastLongTableMask] = te0
-		e.longTable[hash8(cv1, dFastLongTableBits)&dFastLongTableMask] = te1
+		e.longTable[hash8(cv0, dFastLongTableBits)] = te0
+		e.longTable[hash8(cv1, dFastLongTableBits)] = te1
 		cv0 >>= 8
 		cv1 >>= 8
 		te0.offset++
 		te1.offset++
 		te0.val = uint32(cv0)
 		te1.val = uint32(cv1)
-		e.table[hash5(cv0, dFastShortTableBits)&dFastShortTableMask] = te0
-		e.table[hash5(cv1, dFastShortTableBits)&dFastShortTableMask] = te1
+		e.table[hash5(cv0, dFastShortTableBits)] = te0
+		e.table[hash5(cv1, dFastShortTableBits)] = te1

 		cv = load6432(src, s)
-		nextHashS = hash5(cv1>>8, dFastShortTableBits)
-		nextHashL = hash8(cv, dFastLongTableBits)

 		if !canRepeat {
 			continue
@@ -381,14 +368,17 @@ encodeLoop:
 				break
 			}

+			// Store this, since we have it.
+			nextHashS := hash5(cv1>>8, dFastShortTableBits)
+			nextHashL := hash8(cv, dFastLongTableBits)
+
 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
 			l := 4 + e.matchlen(s+4, o2+4, src)

-			// Store this, since we have it.
 			entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
-			e.longTable[nextHashL&dFastLongTableMask] = entry
-			e.table[nextHashS&dFastShortTableMask] = entry
+			e.longTable[nextHashL] = entry
+			e.table[nextHashS] = entry
 			seq.matchLen = uint32(l) - zstdMinMatch
 			seq.litLen = 0

@@ -408,8 +398,6 @@ encodeLoop:
 				break encodeLoop
 			}
 			cv = load6432(src, s)
-			nextHashS = hash5(cv, dFastShortTableBits)
-			nextHashL = hash8(cv, dFastLongTableBits)
 		}
 	}

--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -124,8 +124,6 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
 	// nextEmit is where in src the next emitLiteral should start from.
 	nextEmit := s
 	cv := load6432(src, s)
-	// nextHash is the hash at s
-	nextHash := hash6(cv, hashLog)

 	// Relative offsets
 	offset1 := int32(blk.recentOffsets[0])
@@ -157,8 +155,8 @@ encodeLoop:
 				panic("offset0 was 0")
 			}

-			nextHash2 := hash6(cv>>8, hashLog) & tableMask
-			nextHash = nextHash & tableMask
+			nextHash := hash6(cv, hashLog)
+			nextHash2 := hash6(cv>>8, hashLog)
 			candidate := e.table[nextHash]
 			candidate2 := e.table[nextHash2]
 			repIndex := s - offset1 + 2
@@ -207,8 +205,6 @@ encodeLoop:
 					break encodeLoop
 				}
 				cv = load6432(src, s)
-				//nextHash = hashLen(cv, hashLog, mls)
-				nextHash = hash6(cv, hashLog)
 				continue
 			}
 			coffset0 := s - (candidate.offset - e.cur)
@@ -245,7 +241,6 @@ encodeLoop:
 				break encodeLoop
 			}
 			cv = load6432(src, s)
-			nextHash = hash6(cv, hashLog)
 		}
 		// A 4-byte match has been found. We'll later see if more than 4 bytes.
 		offset2 = offset1
@@ -292,15 +287,16 @@ encodeLoop:
 			break encodeLoop
 		}
 		cv = load6432(src, s)
-		nextHash = hash6(cv, hashLog)

 		// Check offset 2
-		if o2 := s - offset2; canRepeat && o2 > 0 && load3232(src, o2) == uint32(cv) {
+		if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
 			l := 4 + e.matchlen(s+4, o2+4, src)
+
 			// Store this, since we have it.
-			e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+			nextHash := hash6(cv, hashLog)
+			e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
 			seq.matchLen = uint32(l) - zstdMinMatch
 			seq.litLen = 0
 			// Since litlen is always 0, this is offset 1.
@@ -319,7 +315,6 @@ encodeLoop:
 			}
 			// Prepare next loop.
 			cv = load6432(src, s)
-			nextHash = hash6(cv, hashLog)
 		}
 	}

--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -257,7 +257,12 @@ func (e *Encoder) nextBlock(final bool) error {
 				}
 				s.wWg.Done()
 			}()
-			err := blk.encode()
+			err := errIncompressible
+			// If we got the exact same number of literals as input,
+			// assume the literals cannot be compressed.
+			if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
+				err = blk.encode()
+			}
 			switch err {
 			case errIncompressible:
 				if debug {
@@ -444,7 +449,13 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 		if len(src) == 0 {
 			blk.last = true
 		}
-		err := blk.encode()
+		err := errIncompressible
+		// If we got the exact same number of literals as input,
+		// assume the literals cannot be compressed.
+		if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
+			err = blk.encode()
+		}
+
 		switch err {
 		case errIncompressible:
 			if debug {
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -39,6 +39,9 @@ type frameDec struct {

 	rawInput byteBuffer

+	// Byte buffer that can be reused for small input blocks.
+	bBuf byteBuf
+
 	// asyncRunning indicates whether the async routine processes input on 'decoding'.
 	asyncRunning   bool
 	asyncRunningMu sync.Mutex
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
@@ -184,29 +184,75 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
 // decSymbol contains information about a state entry,
 // Including the state offset base, the output symbol and
 // the number of bits to read for the low part of the destination state.
-type decSymbol struct {
-	newState uint16
-	addBits  uint8 // Used for symbols until transformed.
-	nbBits   uint8
-	baseline uint32
+// Using a composite uint64 is faster than a struct with separate members.
+type decSymbol uint64
+
+func newDecSymbol(nbits, addBits uint8, newState uint16, baseline uint32) decSymbol {
+	return decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
+}
+
+func (d decSymbol) nbBits() uint8 {
+	return uint8(d)
+}
+
+func (d decSymbol) addBits() uint8 {
+	return uint8(d >> 8)
+}
+
+func (d decSymbol) newState() uint16 {
+	return uint16(d >> 16)
+}
+
+func (d decSymbol) baseline() uint32 {
+	return uint32(d >> 32)
+}
+
+func (d decSymbol) baselineInt() int {
+	return int(d >> 32)
+}
+
+func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) {
+	*d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
+}
+
+func (d *decSymbol) setNBits(nBits uint8) {
+	const mask = 0xffffffffffffff00
+	*d = (*d & mask) | decSymbol(nBits)
+}
+
+func (d *decSymbol) setAddBits(addBits uint8) {
+	const mask = 0xffffffffffff00ff
+	*d = (*d & mask) | (decSymbol(addBits) << 8)
+}
+
+func (d *decSymbol) setNewState(state uint16) {
+	const mask = 0xffffffff0000ffff
+	*d = (*d & mask) | decSymbol(state)<<16
+}
+
+func (d *decSymbol) setBaseline(baseline uint32) {
+	const mask = 0xffffffff
+	*d = (*d & mask) | decSymbol(baseline)<<32
+}
+
+func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
+	const mask = 0xffff00ff
+	*d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
 }

 // decSymbolValue returns the transformed decSymbol for the given symbol.
 func decSymbolValue(symb uint8, t []baseOffset) (decSymbol, error) {
 	if int(symb) >= len(t) {
-		return decSymbol{}, fmt.Errorf("rle symbol %d >= max %d", symb, len(t))
+		return 0, fmt.Errorf("rle symbol %d >= max %d", symb, len(t))
 	}
 	lu := t[symb]
-	return decSymbol{
-		addBits:  lu.addBits,
-		baseline: lu.baseLine,
-	}, nil
+	return newDecSymbol(0, lu.addBits, 0, lu.baseLine), nil
 }

 // setRLE will set the decoder til RLE mode.
 func (s *fseDecoder) setRLE(symbol decSymbol) {
 	s.actualTableLog = 0
-	s.maxBits = symbol.addBits
+	s.maxBits = symbol.addBits()
 	s.dt[0] = symbol
 }

@@ -220,7 +266,7 @@ func (s *fseDecoder) buildDtable() error {
 	{
 		for i, v := range s.norm[:s.symbolLen] {
 			if v == -1 {
-				s.dt[highThreshold].addBits = uint8(i)
+				s.dt[highThreshold].setAddBits(uint8(i))
 				highThreshold--
 				symbolNext[i] = 1
 			} else {
@@ -235,7 +281,7 @@ func (s *fseDecoder) buildDtable() error {
 		position := uint32(0)
 		for ss, v := range s.norm[:s.symbolLen] {
 			for i := 0; i < int(v); i++ {
-				s.dt[position].addBits = uint8(ss)
+				s.dt[position].setAddBits(uint8(ss))
 				position = (position + step) & tableMask
 				for position > highThreshold {
 					// lowprob area
@@ -253,11 +299,11 @@ func (s *fseDecoder) buildDtable() error {
 	{
 		tableSize := uint16(1 << s.actualTableLog)
 		for u, v := range s.dt[:tableSize] {
-			symbol := v.addBits
+			symbol := v.addBits()
 			nextState := symbolNext[symbol]
 			symbolNext[symbol] = nextState + 1
 			nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
-			s.dt[u&maxTableMask].nbBits = nBits
+			s.dt[u&maxTableMask].setNBits(nBits)
 			newState := (nextState << nBits) - tableSize
 			if newState > tableSize {
 				return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
@@ -266,7 +312,7 @@ func (s *fseDecoder) buildDtable() error {
 				// Seems weird that this is possible with nbits > 0.
 				return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
 			}
-			s.dt[u&maxTableMask].newState = newState
+			s.dt[u&maxTableMask].setNewState(newState)
 		}
 	}
 	return nil
@@ -279,25 +325,21 @@ func (s *fseDecoder) transform(t []baseOffset) error {
 	tableSize := uint16(1 << s.actualTableLog)
 	s.maxBits = 0
 	for i, v := range s.dt[:tableSize] {
-		if int(v.addBits) >= len(t) {
-			return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits, len(t))
+		add := v.addBits()
+		if int(add) >= len(t) {
+			return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits(), len(t))
 		}
-		lu := t[v.addBits]
+		lu := t[add]
 		if lu.addBits > s.maxBits {
 			s.maxBits = lu.addBits
 		}
-		s.dt[i&maxTableMask] = decSymbol{
-			newState: v.newState,
-			nbBits:   v.nbBits,
-			addBits:  lu.addBits,
-			baseline: lu.baseLine,
-		}
+		v.setExt(lu.addBits, lu.baseLine)
+		s.dt[i] = v
 	}
 	return nil
 }

 type fseState struct {
-	// TODO: Check if *[1 << maxTablelog]decSymbol is faster.
 	dt    []decSymbol
 	state decSymbol
 }
@@ -312,26 +354,31 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) {
 // next returns the current symbol and sets the next state.
 // At least tablelog bits must be available in the bit reader.
 func (s *fseState) next(br *bitReader) {
-	lowBits := uint16(br.getBits(s.state.nbBits))
-	s.state = s.dt[s.state.newState+lowBits]
+	lowBits := uint16(br.getBits(s.state.nbBits()))
+	s.state = s.dt[s.state.newState()+lowBits]
 }

 // finished returns true if all bits have been read from the bitstream
 // and the next state would require reading bits from the input.
 func (s *fseState) finished(br *bitReader) bool {
-	return br.finished() && s.state.nbBits > 0
+	return br.finished() && s.state.nbBits() > 0
 }

 // final returns the current state symbol without decoding the next.
 func (s *fseState) final() (int, uint8) {
-	return int(s.state.baseline), s.state.addBits
+	return s.state.baselineInt(), s.state.addBits()
+}
+
+// final returns the current state symbol without decoding the next.
+func (s decSymbol) final() (int, uint8) {
+	return s.baselineInt(), s.addBits()
 }

 // nextFast returns the next symbol and sets the next state.
 // This can only be used if no symbols are 0 bits.
 // At least tablelog bits must be available in the bit reader.
 func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
-	lowBits := uint16(br.getBitsFast(s.state.nbBits))
-	s.state = s.dt[s.state.newState+lowBits]
-	return s.state.baseline, s.state.addBits
+	lowBits := uint16(br.getBitsFast(s.state.nbBits()))
+	s.state = s.dt[s.state.newState()+lowBits]
+	return s.state.baseline(), s.state.addBits()
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Aliaksandr Valialkin	88f8670ede	lib/fs: add MustStopDirRemover for waiting until pending directories are removed on graceful shutdown This patch is mainly required for laggy NFS. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/162	2019-09-05 11:13:17 +03:00
Aliaksandr Valialkin	9eb5de334f	lib/storage: typo fix	2019-09-04 19:58:01 +03:00
Aliaksandr Valialkin	6954e126fc	app/vmselect/promql: ignore grouping by destination label in `count_values`, since such a grouping is performed automatically	2019-09-04 19:58:01 +03:00
Aliaksandr Valialkin	bce35b8dd9	README.md: mention that Prometheus doesn't drop data when VictoriaMetrics restarts	2019-09-04 18:40:39 +03:00
Aliaksandr Valialkin	16dd145586	lib/storage: remove duplicate tag keys on `MetricName.Marshal` call Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/172	2019-09-04 18:13:45 +03:00
Aliaksandr Valialkin	cd2c9e39da	deployment/docker: switch Go builder from Go 1.12.9 to Go 1.13.0	2019-09-04 17:17:23 +03:00
Aliaksandr Valialkin	305e7bc981	app/vmselect/promql: do not return artificial points beyond the last point in time series	2019-09-04 16:35:34 +03:00
Aliaksandr Valialkin	9721d06c6a	app/vmselect/prometheus: do not adjust `start` and `end` args in `/api/v1/query_range` if `nocache=1` arg is set Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/171	2019-09-04 13:10:09 +03:00
Aliaksandr Valialkin	4862e93024	lib/fs: try harder with directory removal on NFS in the event of temporary lock Do not give up after 11 attempts of directory removal on laggy NFS. Add `vm_nfs_dir_remove_failed_attempts_total` metric for counting the number of failed attempts on directory removal. Log failed attempts on directory removal after long sleep times. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/162	2019-09-04 12:24:50 +03:00
Aliaksandr Valialkin	db4560ca31	app/vmselect/promql: reset timeseries name on group_left and group_right as Prometheus does	2019-09-03 20:42:54 +03:00
Aliaksandr Valialkin	1575a560f0	app/vmselect/netstorage: adaptively adjust the maximum inmemory file size for storing temporary blocks The maximum inmemory file size now depends on `-memory.allowedPercent`. This should improve performance and reduce the number of filesystem calls on machines with big amounts of RAM when performing heavy queries over big number of samples and time series.	2019-09-03 13:32:09 +03:00
Aliaksandr Valialkin	e1d76ec1f3	lib/storage: invalidate `tagFilters -> TSIDS` cache when newly added index data becomes visible to search Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/163	2019-08-29 15:08:35 +03:00
Aliaksandr Valialkin	aeaa5de5fe	lib/prombp: apply `ba06b47c16` The following commands used: gofmt -r '(uint64(x)&0x7F)<<shift -> uint64(x&0x7F)<<shift' -w ./lib/prompb/ gofmt -r '(int64(x)&0x7F)<<shift -> int64(x&0x7F)<<shift' -w ./lib/prompb/	2019-08-29 13:35:27 +03:00
Aliaksandr Valialkin	4c0a262a2e	.github/workflows: verify builds on freebsd and darwin	2019-08-28 23:05:15 +03:00
Aliaksandr Valialkin	3685fc18d5	Makefile: extract `app-local` and `app-local-pure` build rules	2019-08-28 01:34:58 +03:00
Aliaksandr Valialkin	ede7ad3703	app/victoria-metrics: add missing `victoria-metrics` prefix to `--version` output when building with `make victoria-metrics`	2019-08-28 01:28:08 +03:00
Aliaksandr Valialkin	9196c085a7	all: port to FreeBSD on GOARCH=amd64	2019-08-28 01:19:23 +03:00
Aliaksandr Valialkin	3802ae9269	README.md: recommend checking which metrics will be deleted before deleting them	2019-08-27 15:01:16 +03:00
Artem Navoiev	b0090dbd86	add github actions (#160 )	2019-08-27 14:42:46 +03:00
Aliaksandr Valialkin	603a79b357	app/vmstorage: increase default values for search.maxTagKeys, search.maxTagValues and search.maxUniqueTimeseries	2019-08-27 14:29:53 +03:00
Aliaksandr Valialkin	2655220c58	lib/storage: go fmt	2019-08-27 14:29:51 +03:00
Aliaksandr Valialkin	bf915fc0db	lib/storage: report proper maxMetrics limit when more than -search.maxUniqueTimeseries series match the given filters	2019-08-27 14:21:42 +03:00
Aliaksandr Valialkin	2fc157ff7a	lib/storage: properly handle `(?i)` in the tag filter regexp Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/161	2019-08-26 00:44:45 +03:00
Aliaksandr Valialkin	0dc0006f34	lib/storage: calculate the maximum number of rows per small part from `-memory.allowedPercent` Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/159 This simplifies error detection additionally to the `vm_rows_ignored_total` counters.	2019-08-25 15:31:47 +03:00
Aliaksandr Valialkin	4b688fffee	lib/storage: calculate the maximum number of rows per small part from `-memory.allowedPercent` This should improve query speed over recent data on machines with big amounts of RAM	2019-08-25 14:41:12 +03:00
Aliaksandr Valialkin	1402a6b981	lib/storage: properly limit the number of output rows in small and big parts storage Previously small parts storage didn't take into account the available disk space for big parts.	2019-08-25 14:41:12 +03:00
Aliaksandr Valialkin	3308279c4e	lib/storage: remove outdated comment on maxRowsPerSmallPart The commend became outdated after the commit ed6ac1a5df027f0dfc22448e3b27c26b6f77c67a, which stops merging of small parts on graceful shutdown instead of waiting for their completion.	2019-08-25 13:47:32 +03:00
Aliaksandr Valialkin	fb909cf710	app/vminsert/influx: set `db` label only if Influx line doesnt have `db` tag	2019-08-24 13:52:48 +03:00
Aliaksandr Valialkin	c4e75f09dc	README.md: mention that `-retentionPeriod` must cover the backfilled data	2019-08-24 13:52:48 +03:00
Aliaksandr Valialkin	fb8840ac38	vendor: update github.com/valyala/quicktemplate from v1.1.1 to v1.2.0	2019-08-24 13:41:15 +03:00
Aliaksandr Valialkin	9c9221d1b2	app/vminsert: skip empty tags	2019-08-24 13:36:29 +03:00
Aliaksandr Valialkin	70ca018a57	app/vminsert/opentsdbhttp: skip invalid rows and continue parsing the remaining rows Invalid rows are logged and counted in `vm_rows_invalid_total{type="opentsdb-http"}` metric	2019-08-24 13:36:29 +03:00
Aliaksandr Valialkin	4266091e4f	app/vminsert/opentsdb: skip invalid rows and continue parsing the remaining rows Invalid rows are logged and counted in `vm_rows_invalid_total{type="opentsdb"}` metric	2019-08-24 13:36:29 +03:00
Aliaksandr Valialkin	8001d29b6e	app/vminsert/graphite: skip invalid rows and continue parsing the remaining rows Invalid rows are logged and counted in `vm_rows_invalid_total{type="graphite"}` metric	2019-08-24 13:36:29 +03:00
Aliaksandr Valialkin	9d3f1fcbb9	app/vminsert/influx: skip invalid rows and continue parsing the remaining rows Invalid influx lines are logged and counted in `vm_rows_invalid_total{type="influx"}` metric.	2019-08-24 13:36:29 +03:00
Aliaksandr Valialkin	ba7b3806be	app/vminsert/influx: do not allow escaping newline char, since they dont occur in real life The prefious report with escaped newline chars in influx line protocol was false alarm.	2019-08-23 18:42:05 +03:00
Aliaksandr Valialkin	7fa88c6efc	app/vminsert/opentsdbhttp: allow timestamp as float64 and as string, since it occurs in real life	2019-08-23 18:35:41 +03:00
Aliaksandr Valialkin	4da34b11f8	app/vminsert/influx: handle `\r\n` aka `crlf` influx line endings from windows world Such lines exist in real life.	2019-08-23 18:28:49 +03:00
Aliaksandr Valialkin	a18317adbc	app/vminsert/influx: allow escaping newline char Though newline char isn't mentioned in escape rules at https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/ , there are reports that such chars occur in real life	2019-08-23 15:14:46 +03:00
Aliaksandr Valialkin	44d7fc599d	app/vminsert/influx: skip comments starting with `#` in influx line protocol	2019-08-23 14:43:09 +03:00
Aliaksandr Valialkin	dce6079379	README.md: add a section about Go profiling	2019-08-23 13:37:09 +03:00
Aliaksandr Valialkin	98419c00ef	vendor: `make vendor-update`	2019-08-23 10:02:10 +03:00
Aliaksandr Valialkin	ac004665b5	all: return 503 http error if service is temporarily unavailable Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/156	2019-08-23 09:55:07 +03:00
Aliaksandr Valialkin	8c03a8c4b4	app/vminsert: allow setting the maximum number of labels per time series via `-maxLabelsPerTimeseries`	2019-08-23 08:45:26 +03:00
Aliaksandr Valialkin	8a126c2865	README.md: mention that VictoriaMetrics supports enterprise workloads	2019-08-22 18:00:47 +03:00
Aliaksandr Valialkin	380cae23a0	lib/storage: add benchmarks for regexp filter match / mismatch These benchmarks allow estimate the performance of regexp filters in promql	2019-08-22 16:36:42 +03:00
Aliaksandr Valialkin	1272e407b2	app/vmselect/promql: attempt to repair invalid bucket counts passed to `histogram_quantile` Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/136 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/154	2019-08-22 14:39:46 +03:00
Aliaksandr Valialkin	5f33fc8e46	app/vminsert: add ability to ingest data via HTTP OpenTSDB `/api/put` requests This is manual merge of the https://github.com/VictoriaMetrics/VictoriaMetrics/pull/152 Thanks to nustinov@gmail.com for the initial pull request.	2019-08-22 12:28:32 +03:00
Aliaksandr Valialkin	ec8125606d	app/vminsert/opentsdb: fix BenchmarkRowsUnmarshal by adding missing `put` prefixes to each line	2019-08-21 19:14:47 +03:00
Aliaksandr Valialkin	f4a38f7fb1	app/vmselect/promql: fix panic on `-search.disableCache` Reset the cache if it is disabled instead of stopping, since it is stopped on graceful shutdown.	2019-08-21 17:11:52 +03:00
Aliaksandr Valialkin	ab740afd0d	app/vmselect/promql: explain why empty timeseries arent removed in transformLabelValue	2019-08-21 11:29:24 +03:00
Aliaksandr Valialkin	7b5168adfb	app/vmselect/promql: remove NaNs from `/api/v1/query_range` output like Prometheus does Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/153	2019-08-20 23:01:41 +03:00
Aliaksandr Valialkin	a0d480fbf3	app/vmselect/promql: pre-allocate memory for map for checking for duplicate timeseries This should reduce memory allocations for big number of timeseries	2019-08-20 23:01:39 +03:00
Aliaksandr Valialkin	0dfc1ace53	README.md: add a section about backfilling	2019-08-20 00:34:51 +03:00
Aliaksandr Valialkin	d3fd113a80	app/vmselect/promql: add `label_value(q, label_name)` func, which returns numeric value labels with name `label_name` in `q`	2019-08-20 00:28:34 +03:00
Aliaksandr Valialkin	4f738c8a15	lib/storage: try slower path for searching the tag filter with the minimum number of matching time series before giving up with `increase -search.maxUniqueTimeseries` error	2019-08-19 16:04:21 +03:00
Aliaksandr Valialkin	dd86e6130c	app/vmselect/promql: independently track offset hints for tStart and tEnd This should improve performance if timeseries starts or ends on the selected time range	2019-08-19 13:40:14 +03:00
Aliaksandr Valialkin	6a27657d73	app/vmselect/promql: optimize search for timestamp boundaries in rollupConfig.Do This should improve the performance of queries over big number of time series with big number of output points.	2019-08-19 13:03:29 +03:00
Aliaksandr Valialkin	c23b66a1ad	lib/storage: pre-allocate memory for blockHeader slice in unmarshalBlockHeaders This reduces memory usage and memory fragmentation when working with big number of time series	2019-08-19 12:46:33 +03:00
Aliaksandr Valialkin	be39414f9c	deployment/docker: switch Go builder from go1.12.8 to go1.12.9	2019-08-18 22:07:58 +03:00
Aliaksandr Valialkin	e74fb23189	app/vmselect/promql: add `scrape_interval(q[d])` function, which would return scrape interval for `q` over `d`	2019-08-18 21:08:26 +03:00
Aliaksandr Valialkin	582fdc059a	app/vmselect/promql: hande comparisons with `NaN` similar to Prometheus Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/150	2019-08-18 00:25:50 +03:00
Aliaksandr Valialkin	1c108fc494	app/vmselect/promql: add `lifetime(q[d])` function, which returns the lifetime of `q` over `d` in seconds. This function is useful for determining time series lifetime. `d` must exceed the expected lifetime of the time series, otherwise the function would return values close to `d`.	2019-08-16 11:59:32 +03:00
Aliaksandr Valialkin	d6b5ed6d39	app/vmselect/promql: fix corner-case calculation for `ideriv`	2019-08-16 11:59:28 +03:00
Aliaksandr Valialkin	639b14e8ab	app/vmselect/promql: properly handle corner cases for rollup functions	2019-08-15 23:29:59 +03:00
Aliaksandr Valialkin	483de1cc06	lib/workingsetcache: automatically detect when it is better to double cache capacity	2019-08-15 22:57:55 +03:00
Aliaksandr Valialkin	9e0896055d	deployment/docker: switch Go builder from go1.12.7 to go1.12.8	2019-08-15 20:43:36 +03:00
Aliaksandr Valialkin	5bb61b8b38	vendor: update github.com/valyala/gozstd from v1.5.1 to v1.6.0	2019-08-15 12:56:42 +03:00
Aliaksandr Valialkin	75a58dee02	README.md: typo fix	2019-08-14 03:28:07 +03:00
Aliaksandr Valialkin	5b41122292	lib/storage: properly cache tagFilters -> TSIDs entries from historical index	2019-08-14 02:29:58 +03:00
Aliaksandr Valialkin	964c296f96	lib/storage: compress contents of cache for tagFilters -> TSIDs This should increase cache capacity	2019-08-14 02:29:52 +03:00
Aliaksandr Valialkin	9ecb994671	app/vmselect/promql: store compressed results in the cache This should increase rollup results cache capacity.	2019-08-14 02:29:45 +03:00
Aliaksandr Valialkin	9d41e0dcae	README.md: reduce the recommended `max_shards` value according to test results See https://github.com/prometheus/prometheus/issues/5803#issuecomment-520973662	2019-08-13 22:33:10 +03:00
Aliaksandr Valialkin	09fc6e22e5	all: use workingsetcache instead of fastcache This should reduce the amount of RAM required for processing time series with non-zero churn rate. The previous cache behavior can be restored with `-cache.oldBehavior` command-line flag.	2019-08-13 21:39:34 +03:00
Aliaksandr Valialkin	99c37c2c96	lib/fs: add test for IsTemporaryFileName	2019-08-13 21:33:45 +03:00
Aliaksandr Valialkin	06c2c25544	Makefile: consistency renaming: check_all -> check-all	2019-08-13 21:31:19 +03:00
Aliaksandr Valialkin	ec1b185991	lib/storage: remove broken BenchmarkIndexDBSearchTSIDs	2019-08-13 20:22:08 +03:00
Aliaksandr Valialkin	0967683ae9	lib: move common code for creating flock.lock file into fs.CreateFlockFile	2019-08-13 01:45:46 +03:00
Aliaksandr Valialkin	ad8a43b4e1	README.md: fix metric names in influx line protocol example Default separator between `measurement` and `field_name` is `_`.	2019-08-12 15:58:34 +03:00
Aliaksandr Valialkin	7346982763	README.md: mention that Influx line protocol accepts timestamps in nanoseconds by default	2019-08-12 15:31:52 +03:00
Aliaksandr Valialkin	5d8d110010	lib/fs: atomically create file with the given contents on WriteFileAtomically This should prevent from `transaction` and `metadata.json` files corruption on unclean shutdown such as OOM, `kill -9`, power loss, etc. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/148	2019-08-12 15:02:55 +03:00
Aliaksandr Valialkin	0b488f1e37	lib/storage: do not change timestamps to constant rate if values are constant or have constant delta This breaks the original timestamps, which results in issues like https://github.com/VictoriaMetrics/VictoriaMetrics/issues/120 and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/141 .	2019-08-06 15:40:07 +03:00
Aliaksandr Valialkin	b8bb74ffc6	app/vmstorage: add `vm_concurrent_addrows_*` metrics for tracking concurrency for Storage.AddRows calls Track also the number of dropped rows due to the exceeded timeout on concurrency limit for Storage.AddRows. This number is tracked in `vm_concurrent_addrows_dropped_rows_total`	2019-08-06 15:08:33 +03:00
Aliaksandr Valialkin	5c9e48417a	vendor: update github.com/VictoriaMetrics/metrics to v1.7.1	2019-08-05 19:21:36 +03:00
Aliaksandr Valialkin	5c83f8e203	app: add `vm_concurrent_` metrics for visibility in concurrency limiters for vminsert and vmselect	2019-08-05 18:30:57 +03:00
Aliaksandr Valialkin	05713469c3	vendor: `make vendor-update`	2019-08-05 10:33:21 +03:00
Aliaksandr Valialkin	8822079b77	lib/storage: properly reset `partSearch.fetchData` in `partSearch.reset`	2019-08-05 09:56:06 +03:00