vendor: make vendor-update

vendor: update github.com/valyala/fastjson from v1.5.1 to v1.5.2
lib/promrelabel: properly apply ^ and $ anchors to regex value in Prometheus relabeling rules
2026-06-07 19:06:17 +03:00 · 2020-06-25 23:45:14 +03:00 · 2020-06-25 23:35:03 +03:00 · 2020-06-25 17:19:19 +03:00 · 2020-06-24 19:38:39 +03:00 · 2020-06-24 18:09:33 +03:00
741 changed files with 39904 additions and 61606 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -24,7 +24,7 @@ jobs:
        run: |
          go get -u golang.org/x/lint/golint
          go get -u github.com/kisielk/errcheck
-          go get -u github.com/golangci/golangci-lint/cmd/golangci-lint
+          curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.27.0
      - name: Code checkout
        uses: actions/checkout@master
      - name: Build
--- a/21
+++ b/21
@@ -13,7 +13,8 @@ GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(shell date
 all: \
 	victoria-metrics-prod \
 	vmagent-prod \
-	vmalert-prot \
+	vmalert-prod \
+	vmauth-prod \
 	vmbackup-prod \
 	vmrestore-prod

@@ -27,6 +28,7 @@ publish: \
 	publish-victoria-metrics \
 	publish-vmagent \
 	publish-vmalert \
+	publish-vmauth \
 	publish-vmbackup \
 	publish-vmrestore

@@ -34,12 +36,14 @@ package: \
 	package-victoria-metrics \
 	package-vmagent \
 	package-vmalert \
+	package-vmauth \
 	package-vmbackup \
 	package-vmrestore

 vmutils: \
 	vmagent \
 	vmalert \
+	vmauth \
 	vmbackup \
 	vmrestore

@@ -54,9 +58,10 @@ release-victoria-metrics: victoria-metrics-prod
 release-vmutils: \
 	vmagent-prod \
 	vmalert-prod \
+	vmauth-prod \
 	vmbackup-prod \
 	vmrestore-prod
-	cd bin && tar czf vmutils-$(PKG_TAG).tar.gz vmagent-prod vmalert-prod vmbackup-prod vmrestore-prod && \
+	cd bin && tar czf vmutils-$(PKG_TAG).tar.gz vmagent-prod vmalert-prod vmauth-prod vmbackup-prod vmrestore-prod && \
 		sha256sum vmutils-$(PKG_TAG).tar.gz > vmutils-$(PKG_TAG)_checksums.txt

 pprof-cpu:
@@ -84,9 +89,9 @@ errcheck: install-errcheck
 	errcheck -exclude=errcheck_excludes.txt ./app/vmstorage/...
 	errcheck -exclude=errcheck_excludes.txt ./app/vmagent/...
 	errcheck -exclude=errcheck_excludes.txt ./app/vmalert/...
+	errcheck -exclude=errcheck_excludes.txt ./app/vmauth/...
 	errcheck -exclude=errcheck_excludes.txt ./app/vmbackup/...
 	errcheck -exclude=errcheck_excludes.txt ./app/vmrestore/...
-	errcheck -exclude=errcheck_excludes.txt ./app/vmalert/...

 install-errcheck:
 	which errcheck || GO111MODULE=off go get -u github.com/kisielk/errcheck
@@ -136,7 +141,15 @@ install-qtc:


 golangci-lint: install-golangci-lint
-	golangci-lint run --exclude '(SA4003|SA1019):' -D errcheck -D structcheck --timeout 2m
+	golangci-lint run --exclude '(SA4003|SA1019|SA5011):' -D errcheck -D structcheck --timeout 2m

 install-golangci-lint:
 	which golangci-lint || GO111MODULE=off go get -u github.com/golangci/golangci-lint/cmd/golangci-lint
+
+docs-sync:
+	cp app/vmagent/README.md docs/vmagent.md
+	cp app/vmalert/README.md docs/vmalert.md
+	cp app/vmauth/README.md docs/vmauth.md
+	cp app/vmbackup/README.md docs/vmbackup.md
+	cp app/vmrestore/README.md docs/vmrestore.md
+	cp README.md docs/Single-server-VictoriaMetrics.md
--- a/README.md
+++ b/README.md
@@ -10,17 +10,26 @@

 ## VictoriaMetrics

-VictoriaMetrics is fast, cost-effective and scalable time-series database. It can be used as long-term remote storage for Prometheus.
+VictoriaMetrics is fast, cost-effective and scalable time-series database.
+
 It is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
 [docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and
 in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics). Just download VictoriaMetrics and see [how to start it](#how-to-start-victoriametrics).

 Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).

+See our [Wiki](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki) for additional documentation.
+
+[Contact us](mailto:info@victoriametrics.com) if you need paid enterprise support for VictoriaMetrics.
+See [features available for enterprise customers](https://github.com/VictoriaMetrics/VictoriaMetrics/issues?q=is%3Aissue+label%3Aenterprise).
+
+
 ## Case studies and talks

 * [Adidas](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#adidas)
+* [CERN](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#cern)
 * [COLOPL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#colopl)
+* [Zerodha](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#zerodha)
 * [Wix.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wixcom)
 * [Wedos.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wedoscom)
 * [Synthesio](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#synthesio)
@@ -33,6 +42,8 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM

 ## Prominent features

+* VictoriaMetrics can be used as long-term storage for Prometheus or for [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md).
+  See [these docs](#prometheus-setup) for details.
 * Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
  VictoriaMetrics implements [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL) query language, which is inspired by PromQL.
 * Supports global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query.
@@ -115,6 +126,8 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
 * [Monitoring](#monitoring)
 * [Troubleshooting](#troubleshooting)
 * [Backfilling](#backfilling)
+* [Replication](#replication)
+* [Backups](#backups)
 * [Profiling](#profiling)
 * [Integrations](#integrations)
 * [Third-party contributions](#third-party-contributions)
@@ -137,7 +150,9 @@ The following command-line flags are used the most:

 * `-storageDataPath` - path to data directory. VictoriaMetrics stores all the data in this directory. Default path is `victoria-metrics-data` in current working directory.
 * `-retentionPeriod` - retention period in months for the data. Older data is automatically deleted. Default period is 1 month.
-* `-httpListenAddr` - TCP address to listen to for http requests. By default, it listens port `8428` on all the network interfaces.
+
+Other flags have good enough default values, so set them only if you really need this.
+VictoriaMetrics accepts [Prometheus querying API requests](#prometheus-querying-api-usage) on port `8428` by default.

 Pass `-help` to see all the available flags with description and default values.

@@ -262,6 +277,8 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la
 * [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config)
 * [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
 * [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
+* [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config)
+* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)

 In the future other `*_sd_config` types will be supported.

@@ -312,7 +329,7 @@ to local VictoriaMetrics using `curl`:
 curl -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST 'http://localhost:8428/write'
 ```

-An arbitrary number of lines delimited by '\n' may be sent in a single request.
+An arbitrary number of lines delimited by '\n' (aka newline char) may be sent in a single request.
 After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:

 ```bash
@@ -348,7 +365,7 @@ echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N localhost 2003
 ```

 VictoriaMetrics sets the current time if the timestamp is omitted.
-An arbitrary number of lines delimited by `\n` may be sent in one go.
+An arbitrary number of lines delimited by `\n` (aka newline char) may be sent in one go.
 After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:

 ```bash
@@ -389,7 +406,7 @@ Example for writing data with OpenTSDB protocol to local VictoriaMetrics using `
 echo "put foo.bar.baz `date +%s` 123 tag1=value1 tag2=value2" | nc -N localhost 4242
 ```

-An arbitrary number of lines delimited by `\n` may be sent in one go.
+An arbitrary number of lines delimited by `\n` (aka newline char) may be sent in one go.
 After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:

 ```bash
@@ -566,11 +583,11 @@ Run `make package-victoria-metrics`. It builds `victoriametrics/victoria-metrics
 `<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
 The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-victoria-metrics`.

-By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
-by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
+By default the image is built on top of `alpine` image for improved debuggability. It is possible to build the package on top of any other base image
+by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `scratch` image:

 ```bash
-ROOT_IMAGE=alpine:3.11 make package-victoria-metrics
+ROOT_IMAGE=scratch make package-victoria-metrics
 ```

 ### Start with docker-compose
@@ -758,7 +775,13 @@ The required resources for query path:
 ### High availability

 1) Install multiple VictoriaMetrics instances in distinct datacenters (availability zones).
-2) Add addresses of these instances to `remote_write` section in Prometheus config:
+2) Pass addresses of these instances to [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) via `-remoteWrite.url` command-line flag:
+
+```bash
+/path/to/vmagent -remoteWrite.url=http://<victoriametrics-addr-1>:8428/api/v1/write -remoteWrite.url=http://<victoriametrics-addr-2>:8428/api/v1/write
+```
+
+Alternatively these addresses may be passed to `remote_write` section in Prometheus config:

 ```yml
 remote_write:
@@ -777,6 +800,8 @@ remote_write:
 kill -HUP `pidof prometheus`
 ```

+It is recommended to use [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) instead of Prometheus for highly loaded setups.
+
 4) Now Prometheus should write data into all the configured `remote_write` urls in parallel.
 5) Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas.
 6) Set up Prometheus datasource in Grafana that points to Promxy.
@@ -787,6 +812,7 @@ to write data to `victoriametrics-addr-1`, while each `r2` should write data to
 Another option is to write data simultaneously from Prometheus HA pair to a pair of VictoriaMetrics instances
 with the enabled de-duplication. See [this section](#deduplication) for details.

+
 ### Deduplication

 VictoriaMetrics de-duplicates data points if `-dedup.minScrapeInterval` command-line flag
@@ -804,6 +830,8 @@ Data is split in per-month subdirectories inside `<-storageDataPath>/data/small`
 Directories for months outside the configured retention are deleted on the first day of new month.
 In order to keep data according to `-retentionPeriod` max disk space usage is going to be `-retentionPeriod` + 1 month.
 For example if `-retentionPeriod` is set to 1, data for January is deleted on March 1st.
+It is safe to extend `-retentionPeriod` on existing data. If `-retentionPeriod` is set to lower
+value than before then data outside the configured period will be eventually deleted.

 ### Multiple retentions

@@ -813,6 +841,11 @@ Just start multiple VictoriaMetrics instances with distinct values for the follo
 * `-storageDataPath`, so the data for each retention period is saved in a separate directory
 * `-httpListenAddr`, so clients may reach VictoriaMetrics instance with proper retention

+Then set up [vmauth](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/README.md) in front of VictoriaMetrics instances,
+so it could route requests from particular user to VictoriaMetrics with the desired retention.
+The same scheme could be implemented for multiple tenants in [VictoriaMetrics cluster](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md).
+
+
 ### Downsampling

 There is no downsampling support at the moment, but:
@@ -825,6 +858,10 @@ There is no downsampling support at the moment, but:
 These properties reduce the need of downsampling. We plan to implement downsampling in the future.
 See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36) for details.

+It is possible to (ab)use [-dedup.minScrapeInterval](#deduplication) for basic downsampling.
+For instance, if interval between the ingested data points is 15s, then `-dedup.minScrapeInterval=5m` will leave
+only a single data point out of 20 initial data points per each 5m interval.
+
 ### Multi-tenancy

 Single-node VictoriaMetrics doesn't support multi-tenancy. Use [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster) instead.
@@ -841,11 +878,14 @@ horizontally scalable long-term remote storage for really large Prometheus deplo

 ### Alerting

-VictoriaMetrics doesn't support rule evaluation and alerting yet, so these actions can be performed at the following places:
+It is recommended using [vmalert](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/README.md) for alerting.
+
+Additionally, alerting can be set up with the following tools:
+
+* With Prometheus - see [the corresponding docs](https://prometheus.io/docs/alerting/overview/).
+* With Promxy - see [the corresponding docs](https://github.com/jacksontj/promxy/blob/master/README.md#how-do-i-use-alertingrecording-rules-in-promxy).
+* With Grafana - see [the corresponding docs](https://grafana.com/docs/alerting/rules/).

-* At Prometheus - see [the corresponding docs](https://prometheus.io/docs/alerting/overview/).
-* At Promxy - see [the corresponding docs](https://github.com/jacksontj/promxy/blob/master/README.md#how-do-i-use-alertingrecording-rules-in-promxy).
-* At Grafana - see [the corresponding docs](https://grafana.com/docs/alerting/rules/).

 ### Security

@@ -862,6 +902,10 @@ Consider setting the following command-line flags:
 Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
 For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`.

+Prefer authorizing all the incoming requests from untrusted networks with [vmauth](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/README.md)
+or similar auth proxy.
+
+
 ### Tuning

 * There is no need for VictoriaMetrics tuning since it uses reasonable defaults for command-line flags,
@@ -881,7 +925,8 @@ mkfs.ext4 ... -O 64bit,huge_file,extent -T huge
 ### Monitoring

 VictoriaMetrics exports internal metrics in Prometheus format at `/metrics` page.
-These metrics may be collected via Prometheus by adding the corresponding scrape config to it.
+These metrics may be collected by [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md)
+or Prometheus by adding the corresponding scrape config to it.
 Alternatively they can be self-scraped by setting `-selfScrapeInterval` command-line flag to duration greater than 0.
 For example, `-selfScrapeInterval=10s` would enable self-scraping of `/metrics` page with 10 seconds interval.

@@ -892,23 +937,32 @@ The most interesting metrics are:

 * `vm_cache_entries{type="storage/hour_metric_ids"}` - the number of time series with new data points during the last hour
  aka active time series.
-* `rate(vm_new_timeseries_created_total[5m])` - time series churn rate.
-* `vm_rows{type="indexdb"}` - the number of rows in inverted index. High value for this number usually mean high churn rate for time series.
-* Sum of `vm_rows{type="storage/big"}` and `vm_rows{type="storage/small"}` - total number of `(timestamp, value)` data points
-  in the database.
-* `vm_rows_inserted_total` - the total number of inserted rows since VictoriaMetrics start.
+* `increase(vm_new_timeseries_created_total[1h])` - time series churn rate during the previous hour.
+* `sum(vm_rows{type=~"storage/.*"})` - total number of `(timestamp, value)` data points in the database.
+* `sum(rate(vm_rows_inserted_total[5m]))` - ingestion rate, i.e. how many samples are inserted int the database per second.
 * `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
-* `sum(vm_data_size_bytes)` - the total data size on disk.
+* `sum(vm_data_size_bytes)` - the total size of data on disk.
+* `increase(vm_slow_row_inserts_total[5m])` - the number of slow inserts during the last 5 minutes.
+  If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
+  of the current number of active time series.
+* `increase(vm_slow_metric_name_loads_total[5m])` - the number of slow loads of metric names during the last 5 minutes.
+  If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
+  of the current number of active time series.
+

 ### Troubleshooting

 * It is recommended to use default command-line flag values (i.e. don't set them explicitly) until the need
  of tweaking these flag values arises.

+* It is recommended upgrading to the latest available release from [this page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
+  since the issue could be already fixed there.
+
 * If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
  then it is likely you have too many active time series for the current amount of RAM.
+  VictoriaMetrics [exposes](#monitoring) `vm_slow_*` metrics, which could be used as an indicator of low amounts of RAM.
  It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
-  ingestion performance.
+  ingestion and query performance in this case.
  Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
  option, since too big value for `-memory.allowedPercent` may result in high I/O usage.

@@ -939,6 +993,10 @@ The most interesting metrics are:
  VictoriaMetrics accepts optional `date=YYYY-MM-DD` and `topN=42` args on this page. By default `date` equals to the current date,
  while `topN` equals to 10.

+* VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.
+  This prevents from ingesting metrics with too many labels. It is recommended [monitoring](#monitoring) `vm_metrics_with_dropped_labels_total`
+  metric in order to determine whether `-maxLabelsPerTimeseries` must be adjusted for your workload.
+

 ### Backfilling

@@ -955,6 +1013,24 @@ the query cache, which could contain incomplete data cached during the backfilli
 Yet another solution is to increase `-search.cacheTimestampOffset` flag value in order to disable caching
 for data with timestamps close to the current time.

+
+### Replication
+
+Single-node VictoriaMetrics doesn't support application-level replication. Use cluster version instead.
+See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#replication-and-data-safety) for details.
+
+Storage-level replication may be offloaded to durable persistent storage such as [Google Cloud disks](https://cloud.google.com/compute/docs/disks#pdspecs).
+
+See also [high availability docs](#high-availability) and [backup docs](#backups).
+
+
+### Backups
+
+VictoriaMetrics supports backups via [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md)
+and [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md) tools.
+We also provide provide `vmbackuper` tool for paid enterprise subscribers - see [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) for details.
+
+
 ### Profiling

 VictoriaMetrics provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
--- a/app/victoria-metrics/main.go
+++ b/app/victoria-metrics/main.go
@@ -3,6 +3,7 @@ package main
 import (
 	"flag"
 	"net/http"
+	"os"
 	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
@@ -25,6 +26,8 @@ var (
 )

 func main() {
+	// Write flags and help message to stdout, since it is easier to grep or pipe.
+	flag.CommandLine.SetOutput(os.Stdout)
 	envflag.Parse()
 	buildinfo.Init()
 	logger.Init()
--- a/app/vmagent/Makefile
+++ b/app/vmagent/Makefile
@@ -52,8 +52,9 @@ publish-vmagent:
 	APP_NAME=vmagent $(MAKE) publish-via-docker

 run-vmagent:
-	mkdir -p vmagent-data
-	DOCKER_OPTS='-v $(shell pwd)/vmagent-data:/vmagent-data' \
+	mkdir -p vmagent-remotewrite-data
+	DOCKER_OPTS='-v $(shell pwd)/vmagent-remotewrite-data:/vmagent-remotewrite-data' \
+	ARGS='-remoteWrite.url=http://localhost:8428/api/v1/write' \
 	APP_NAME=vmagent \
 	$(MAKE) run-via-docker

--- a/app/vmagent/README.md
+++ b/app/vmagent/README.md
@@ -1,8 +1,8 @@
 ## vmagent

-`vmagent` is a tiny but brave agent, which helps you collecting metrics from various sources
-and storing them to [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
-or any other Prometheus-compatible storage system that supports `remote_write` protocol.
+`vmagent` is a tiny but brave agent, which helps you collect metrics from various sources
+and stores them in [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
+or any other Prometheus-compatible storage system that supports the `remote_write` protocol.

 <img alt="vmagent" src="vmagent.png">

@@ -11,7 +11,7 @@ or any other Prometheus-compatible storage system that supports `remote_write` p

 While VictoriaMetrics provides an efficient solution to store and observe metrics, our users needed something fast
 and RAM friendly to scrape metrics from Prometheus-compatible exporters to VictoriaMetrics.
-Also, we found that users’ infrastructure is like snowflakes - never alike, and we decided to add more flexibility
+Also, we found that users’ infrastructure are snowflakes - no two are alike, and we decided to add more flexibility
 to `vmagent` (like the ability to push metrics instead of pulling them). We did our best and plan to do even more.


@@ -31,7 +31,7 @@ to `vmagent` (like the ability to push metrics instead of pulling them). We did
 * Works in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
  are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as connection
  to remote storage is recovered. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
-* Uses lower amounts of RAM, CPU, disk IO and network bandwidth comparing to Prometheus.
+* Uses lower amounts of RAM, CPU, disk IO and network bandwidth compared to Prometheus.


 ### Quick Start
@@ -40,8 +40,7 @@ Just download `vmutils-*` archive from [releases page](https://github.com/Victor
 and pass the following flags to `vmagent` binary in order to start scraping Prometheus targets:

 * `-promscrape.config` with the path to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`)
-* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics. Multiple `-remoteWrite.url` args can be set in parallel
-  in order to replicate data concurrently to multiple remote storage systems.
+* `-remoteWrite.url` with the remote storage endpoint such as VictoriaMetrics. The `-remoteWrite.url` argument can be specified multiple times in order to replicate data concurrently to an arbitrary amount of remote storage systems.

 Example command line:

@@ -49,7 +48,7 @@ Example command line:
 /path/to/vmagent -promscrape.config=/path/to/prometheus.yml -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
 ```

-If you need collecting only Influx data, then the following command line would be enough:
+If you only need to collect Influx data, then the following is sufficient:

 ```
 /path/to/vmagent -remoteWrite.url=https://victoria-metrics-host:8428/api/v1/write
@@ -57,7 +56,7 @@ If you need collecting only Influx data, then the following command line would b

 Then send Influx data to `http://vmagent-host:8429`. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) for more details.

-`vmagent` is also available in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/).
+`vmagent` is also available in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags).

 Pass `-help` to `vmagent` in order to see the full list of supported command-line flags with their descriptions.

@@ -79,14 +78,14 @@ See [the corresponding Makefile rules](https://github.com/VictoriaMetrics/Victor
 #### Drop-in replacement for Prometheus

 If you use Prometheus only for scraping metrics from various targets and forwarding these metrics to remote storage,
-then `vmagent` can replace such Prometheus setup. Usually `vmagent` requires lower amounts of RAM, CPU and network bandwidth comparing to Prometheus for such setup.
+then `vmagent` can replace such Prometheus setup. Usually `vmagent` requires lower amounts of RAM, CPU and network bandwidth comparing to Prometheus for such a setup.
 See [these docs](#how-to-collect-metrics-in-prometheus-format) for details.


 #### Replication and high availability

 `vmagent` replicates the collected metrics among multiple remote storage instances configured via `-remoteWrite.url` args.
-If a single remote storage instance temporarily goes out of service, then the collected data remains available in another remote storage instances.
+If a single remote storage instance temporarily is out of service, then the collected data remains available in another remote storage instances.
 `vmagent` buffers the collected data in files at `-remoteWrite.tmpDataPath` until the remote storage becomes available again.
 Then it sends the buffered data to the remote storage in order to prevent data gaps in the remote storage.

@@ -94,13 +93,13 @@ Then it sends the buffered data to the remote storage in order to prevent data g
 #### Relabeling and filtering

 `vmagent` can add, remove or update labels on the collected data before sending it to remote storage. Additionally,
-it can remove unneeded samples via Prometheus-like relabeling before sending the collected data to remote storage.
+it can remove unwanted samples via Prometheus-like relabeling before sending the collected data to remote storage.
 See [these docs](#relabeling) for details.


 #### Splitting data streams among multiple systems

-`vmagent` supports splitting of the collected data among muliple destinations with the help of `-remoteWrite.urlRelabelConfig`,
+`vmagent` supports splitting the collected data between muliple destinations with the help of `-remoteWrite.urlRelabelConfig`,
 which is applied independently for each configured `-remoteWrite.url` destination. For instance, it is possible to replicate or split
 data among long-term remote storage, short-term remote storage and real-time analytical system [built on top of Kafka](https://github.com/Telefonica/prometheus-kafka-adapter).
 Note that each destination can receive its own subset of the collected data thanks to per-destination relabeling via `-remoteWrite.urlRelabelConfig`.
@@ -133,21 +132,24 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh
  See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config) for details.
 * `kubernetes_sd_configs` - for scraping targets in Kubernetes (k8s).
  See [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config) for details.
-* `ec2_sd_configs` - for scraping targets in Amazone EC2.
+* `ec2_sd_configs` - for scraping targets in Amazon EC2.
  See [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config) for details.
  `vmagent` doesn't support `role_arn` config param yet.
 * `gce_sd_configs` - for scraping targets in Google Compute Engine (GCE).
  See [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) for details.
-  `vmagent` provides the following additional functionality `gce_sd_config`:
+  `vmagent` provides the following additional functionality for `gce_sd_config`:
  * if `project` arg is missing, then `vmagent` uses the project for the instance where it runs;
  * if `zone` arg is missing, then `vmagent` uses the zone for the instance where it runs;
  * if `zone` arg equals to `"*"`, then `vmagent` discovers all the zones for the given project;
  * `zone` may contain arbitrary number of zones, i.e. `zone: [us-east1-a, us-east1-b]`.
+* `consul_sd_configs` - for scraping targets registered in Consul.
+  See [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) for details.
+* `dns_sd_configs` - for scraping targets discovered from DNS records (SRV, A and AAAA).
+  See [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) for details.

-The following service discovery mechanisms will be added to `vmagent` soon:
-
-* [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config)
-* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)
+Note that `vmagent` doesn't support `refresh_interval` option these scrape configs. Use the corresponding `-promscrape.*CheckInterval`
+command-line flag instead. For example, `-promscrape.consulSDCheckInterval=60s` sets `refresh_interval` for all the `consul_sd_configs`
+entries to 60s. Run `vmagent -help` in order to see default values for `-promscrape.*CheckInterval` flags.


 File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.
@@ -168,12 +170,13 @@ Additionally it provides the following extra actions:

 * `replace_all`: replaces all the occurences of `regex` in the values of `source_labels` with the `replacement` and stores the result in the `target_label`.
 * `labelmap_all`: replaces all the occurences of `regex` in all the label names with the `replacement`.
+* `keep_if_equal`: keeps the entry if all label values from `source_labels` are equal.
+* `drop_if_equal`: drops the entry if all the label values from `source_labels` are equal.

 The relabeling can be defined in the following places:

-* At `scrape_config -> relabel_configs` section in `-promscrape.config` file. This relabeling is applied to targets when parsing the file during `vmagent` startup
-  or during config reload after sending `SIGHUP` signal to `vmagent`  via `kill -HUP`.
-* At `scrape_config -> metric_relabel_configs` section in `-promscrape.config` file. This relabeling is applied to metrics after each scrape for the configured targets.
+* At `scrape_config -> relabel_configs` section in `-promscrape.config` file. This relabeling is applied to target labels.
+* At `scrape_config -> metric_relabel_configs` section in `-promscrape.config` file. This relabeling is applied to all the scraped metrics in the given `scrape_config`.
 * At `-remoteWrite.relabelConfig` file. This relabeling is aplied to all the collected metrics before sending them to remote storage.
 * At `-remoteWrite.urlRelabelConfig` files. This relabeling is applied to metrics before sending them to the corresponding `-remoteWrite.url`.

@@ -191,7 +194,7 @@ Read more about relabeling in the following articles:
 `vmagent` exports various metrics in Prometheus exposition format at `http://vmagent-host:8429/metrics` page. It is recommended setting up regular scraping of this page
 either via `vmagent` itself or via Prometheus, so the exported metrics could be analyzed later.

-`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format. This page also exports information on improperly configured scrape configs.
+`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format.


 ### Troubleshooting
@@ -202,12 +205,20 @@ either via `vmagent` itself or via Prometheus, so the exported metrics could be
 * When `vmagent` scrapes many unreliable targets, it can flood error log with scrape errors. These errors can be suppressed
  by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`.

-* It is recommended increasing `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
-  and `vmagent_remotewrite_pending_data_bytes` metric exported by `vmagent` at `/metrics` page constantly grows.
+* It is recommended to increase `-remoteWrite.queues` if `vmagent` collects more than 100K samples per second
+  and `vmagent_remotewrite_pending_data_bytes` metric exported at `http://vmagent-host:8429/metrics` page constantly grows.

 * `vmagent` buffers scraped data at `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
-  The directory can grow big when remote storage is unavailable during extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
-  If you don't want sending all the data from the directory to remote storage, just stop `vmagent` and delete the directory.
+  The directory can grow large when remote storage is unavailable for extended periods of time and if `-remoteWrite.maxDiskUsagePerURL` isn't set.
+  If you don't want to send all the data from the directory to remote storage, simply stop `vmagent` and delete the directory.
+
+* If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports.
+  Just add the following relabeling rule to `relabel_configs` section in order to filter out targets with unneeded ports:
+
+```yml
+- action: keep_if_equal
+  source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_container_port_number]
+```


 ### How to build from sources
@@ -239,3 +250,24 @@ by setting it via `<ROOT_IMAGE>` environment variable. For example, the followin
 ```bash
 ROOT_IMAGE=alpine:3.11 make package-vmagent
 ```
+
+
+### Profiling
+
+`vmagent` provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
+
+* Memory profile. It can be collected with the following command:
+
+```bash
+curl -s http://<vmagent-host>:8429/debug/pprof/heap > mem.pprof
+```
+
+* CPU profile. It can be collected with the following command:
+
+```bash
+curl -s http://<vmagent-host>:8429/debug/pprof/profile > cpu.pprof
+```
+
+The command for collecting CPU profile waits for 30 seconds before returning.
+
+The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
--- a/app/vmagent/main.go
+++ b/app/vmagent/main.go
@@ -4,6 +4,7 @@ import (
 	"flag"
 	"fmt"
 	"net/http"
+	"os"
 	"strings"
 	"time"

@@ -39,6 +40,8 @@ var (
 		"Telnet put messages and HTTP /api/put messages are simultaneously served on TCP port. "+
 		"Usually :4242 must be set. Doesn't work if empty")
 	opentsdbHTTPListenAddr = flag.String("opentsdbHTTPListenAddr", "", "TCP address to listen for OpentTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty")
+	dryRun                 = flag.Bool("dryRun", false, "Whether to check only config files without running vmagent. The following files are checked: "+
+		"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig . See also -promscrape.config.dryRun")
 )

 var (
@@ -49,9 +52,27 @@ var (
 )

 func main() {
+	// Write flags and help message to stdout, since it is easier to grep or pipe.
+	flag.CommandLine.SetOutput(os.Stdout)
+	flag.Usage = usage
 	envflag.Parse()
 	buildinfo.Init()
 	logger.Init()
+
+	if *dryRun {
+		if err := flag.Set("promscrape.config.strictParse", "true"); err != nil {
+			logger.Panicf("BUG: cannot set promscrape.config.strictParse=true: %s", err)
+		}
+		if err := remotewrite.CheckRelabelConfigs(); err != nil {
+			logger.Fatalf("error when checking relabel configs: %s", err)
+		}
+		if err := promscrape.CheckConfig(); err != nil {
+			logger.Fatalf("error when checking Prometheus config: %s", err)
+		}
+		logger.Infof("all the configs are ok; exitting with 0 status code")
+		return
+	}
+
 	logger.Infof("starting vmagent at %q...", *httpListenAddr)
 	startTime := time.Now()
 	remotewrite.Init()
@@ -157,6 +178,11 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
 		w.Header().Set("Content-Type", "text/plain")
 		promscrape.WriteHumanReadableTargetsStatus(w)
 		return true
+	case "/-/reload":
+		promscrapeConfigReloadRequests.Inc()
+		procutil.SelfSIGHUP()
+		w.WriteHeader(http.StatusOK)
+		return true
 	}
 	return false
 }
@@ -177,4 +203,18 @@ var (
 	influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/query", protocol="influx"}`)

 	promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`)
+
+	promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
 )
+
+func usage() {
+	const s = `
+vmagent collects metrics data via popular data ingestion protocols and routes it to VictoriaMetrics.
+
+See the docs at https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md .
+`
+
+	f := flag.CommandLine.Output()
+	fmt.Fprintf(f, "%s\n", s)
+	flag.PrintDefaults()
+}
--- a/app/vmagent/remotewrite/client.go
+++ b/app/vmagent/remotewrite/client.go
@@ -2,34 +2,40 @@ package remotewrite

 import (
 	"crypto/tls"
-	"crypto/x509"
 	"encoding/base64"
 	"flag"
 	"fmt"
-	"io/ioutil"
 	"strings"
 	"sync"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
+	"github.com/VictoriaMetrics/fasthttp"
 	"github.com/VictoriaMetrics/metrics"
-	"github.com/valyala/fasthttp"
 )

 var (
 	sendTimeout = flag.Duration("remoteWrite.sendTimeout", time.Minute, "Timeout for sending a single block of data to -remoteWrite.url")

 	tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
-	tlsCertFile           = flag.String("remoteWrite.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url")
-	tlsKeyFile            = flag.String("remoteWrite.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url")
-	tlsCAFile             = flag.String("remoteWrite.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. "+
-		"By default system CA is used")
+	tlsCertFile           = flagutil.NewArray("remoteWrite.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url. "+
+		"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
+	tlsKeyFile = flagutil.NewArray("remoteWrite.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url. "+
+		"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
+	tlsCAFile = flagutil.NewArray("remoteWrite.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. "+
+		"By default system CA is used. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
+	tlsServerName = flagutil.NewArray("remoteWrite.tlsServerName", "Optional TLS server name to use for connections to -remoteWrite.url. "+
+		"By default the server name from -remoteWrite.url is used. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")

-	basicAuthUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username to use for -remoteWrite.url")
-	basicAuthPassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password to use for -remoteWrite.url")
-	bearerToken       = flag.String("remoteWrite.bearerToken", "", "Optional bearer auth token to use for -remoteWrite.url")
+	basicAuthUsername = flagutil.NewArray("remoteWrite.basicAuth.username", "Optional basic auth username to use for -remoteWrite.url. "+
+		"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
+	basicAuthPassword = flagutil.NewArray("remoteWrite.basicAuth.password", "Optional basic auth password to use for -remoteWrite.url. "+
+		"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
+	bearerToken = flagutil.NewArray("remoteWrite.bearerToken", "Optional bearer auth token to use for -remoteWrite.url. "+
+		"If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url")
 )

 type client struct {
@@ -50,19 +56,22 @@ type client struct {
 	stopCh chan struct{}
 }

-func newClient(remoteWriteURL, urlLabelValue string, fq *persistentqueue.FastQueue, concurrency int) *client {
+func newClient(argIdx int, remoteWriteURL, urlLabelValue string, fq *persistentqueue.FastQueue, concurrency int) *client {
 	authHeader := ""
-	if len(*basicAuthUsername) > 0 || len(*basicAuthPassword) > 0 {
+	username := basicAuthUsername.GetOptionalArg(argIdx)
+	password := basicAuthPassword.GetOptionalArg(argIdx)
+	if len(username) > 0 || len(password) > 0 {
 		// See https://en.wikipedia.org/wiki/Basic_access_authentication
-		token := *basicAuthUsername + ":" + *basicAuthPassword
+		token := username + ":" + password
 		token64 := base64.StdEncoding.EncodeToString([]byte(token))
 		authHeader = "Basic " + token64
 	}
-	if len(*bearerToken) > 0 {
+	token := bearerToken.GetOptionalArg(argIdx)
+	if len(token) > 0 {
 		if authHeader != "" {
-			logger.Panicf("FATAL: `-remoteWrite.bearerToken`=%q cannot be set when `-remoteWrite.basicAuth.*` flags are set", *bearerToken)
+			logger.Fatalf("`-remoteWrite.bearerToken`=%q cannot be set when `-remoteWrite.basicAuth.*` flags are set", token)
 		}
-		authHeader = "Bearer " + *bearerToken
+		authHeader = "Bearer " + token
 	}

 	readTimeout := *sendTimeout
@@ -76,18 +85,18 @@ func newClient(remoteWriteURL, urlLabelValue string, fq *persistentqueue.FastQue
 	switch scheme {
 	case "http", "https":
 	default:
-		logger.Panicf("FATAL: unsupported scheme in -remoteWrite.url=%q: %q. It must be http or https", remoteWriteURL, scheme)
+		logger.Fatalf("unsupported scheme in -remoteWrite.url=%q: %q. It must be http or https", remoteWriteURL, scheme)
 	}
 	host := string(u.Host())
 	if len(host) == 0 {
-		logger.Panicf("FATAL: invalid -remoteWrite.url=%q: host cannot be empty. Make sure the url looks like `http://host:port/path`", remoteWriteURL)
+		logger.Fatalf("invalid -remoteWrite.url=%q: host cannot be empty. Make sure the url looks like `http://host:port/path`", remoteWriteURL)
 	}
 	requestURI := string(u.RequestURI())
 	isTLS := scheme == "https"
 	var tlsCfg *tls.Config
 	if isTLS {
 		var err error
-		tlsCfg, err = getTLSConfig()
+		tlsCfg, err = getTLSConfig(argIdx)
 		if err != nil {
 			logger.Panicf("FATAL: cannot initialize TLS config: %s", err)
 		}
@@ -104,7 +113,6 @@ func newClient(remoteWriteURL, urlLabelValue string, fq *persistentqueue.FastQue
 		Addr:                host,
 		Name:                "vmagent",
 		Dial:                statDial,
-		DialDualStack:       netutil.TCP6Enabled(),
 		IsTLS:               isTLS,
 		TLSConfig:           tlsCfg,
 		MaxConns:            maxConns,
@@ -144,34 +152,19 @@ func (c *client) MustStop() {
 	logger.Infof("stopped client for -remoteWrite.url=%q", c.remoteWriteURL)
 }

-func getTLSConfig() (*tls.Config, error) {
-	var tlsRootCA *x509.CertPool
-	var tlsCertificate *tls.Certificate
-	if *tlsCertFile != "" || *tlsKeyFile != "" {
-		cert, err := tls.LoadX509KeyPair(*tlsCertFile, *tlsKeyFile)
-		if err != nil {
-			return nil, fmt.Errorf("cannot load TLS certificate for -remoteWrite.tlsCertFile=%q and -remoteWrite.tlsKeyFile=%q: %s", *tlsCertFile, *tlsKeyFile, err)
-		}
-		tlsCertificate = &cert
+func getTLSConfig(argIdx int) (*tls.Config, error) {
+	tlsConfig := &promauth.TLSConfig{
+		CAFile:             tlsCAFile.GetOptionalArg(argIdx),
+		CertFile:           tlsCertFile.GetOptionalArg(argIdx),
+		KeyFile:            tlsKeyFile.GetOptionalArg(argIdx),
+		ServerName:         tlsServerName.GetOptionalArg(argIdx),
+		InsecureSkipVerify: *tlsInsecureSkipVerify,
 	}
-	if *tlsCAFile != "" {
-		data, err := ioutil.ReadFile(*tlsCAFile)
-		if err != nil {
-			return nil, fmt.Errorf("cannot read -remoteWrite.tlsCAFile=%q: %s", *tlsCAFile, err)
-		}
-		tlsRootCA = x509.NewCertPool()
-		if !tlsRootCA.AppendCertsFromPEM(data) {
-			return nil, fmt.Errorf("cannot parse data -remoteWrite.tlsCAFile=%q", *tlsCAFile)
-		}
+	cfg, err := promauth.NewConfig(".", nil, "", "", tlsConfig)
+	if err != nil {
+		return nil, fmt.Errorf("cannot populate TLS config: %s", err)
 	}
-	tlsCfg := &tls.Config{
-		RootCAs:            tlsRootCA,
-		ClientSessionCache: tls.NewLRUClientSessionCache(0),
-	}
-	if tlsCertificate != nil {
-		tlsCfg.Certificates = []tls.Certificate{*tlsCertificate}
-	}
-	tlsCfg.InsecureSkipVerify = *tlsInsecureSkipVerify
+	tlsCfg := cfg.NewTLSConfig()
 	return tlsCfg, nil
 }

--- a/app/vmagent/remotewrite/pendingseries.go
+++ b/app/vmagent/remotewrite/pendingseries.go
@@ -6,7 +6,7 @@ import (
 	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
 	"github.com/VictoriaMetrics/metrics"
@@ -15,7 +15,8 @@ import (

 var (
 	flushInterval = flag.Duration("remoteWrite.flushInterval", time.Second, "Interval for flushing the data to remote storage. "+
-		"Higher value reduces network bandwidth usage at the cost of delayed push of scraped data to remote storage")
+		"Higher value reduces network bandwidth usage at the cost of delayed push of scraped data to remote storage. "+
+		"Minimum supported interval is 1 second")
 	maxUnpackedBlockSize = flag.Int("remoteWrite.maxBlockSize", 32*1024*1024, "The maximum size in bytes of unpacked request to send to remote storage. "+
 		"It shouldn't exceed -maxInsertRequestSize from VictoriaMetrics")
 )
@@ -55,6 +56,10 @@ func (ps *pendingSeries) Push(tss []prompbmarshal.TimeSeries) {
 }

 func (ps *pendingSeries) periodicFlusher() {
+	flushSeconds := int64(flushInterval.Seconds())
+	if flushSeconds <= 0 {
+		flushSeconds = 1
+	}
 	ticker := time.NewTicker(*flushInterval)
 	defer ticker.Stop()
 	mustStop := false
@@ -63,7 +68,7 @@ func (ps *pendingSeries) periodicFlusher() {
 		case <-ps.stopCh:
 			mustStop = true
 		case <-ticker.C:
-			if time.Since(ps.wr.lastFlushTime) < *flushInterval/2 {
+			if fasttime.UnixTimestamp()-ps.wr.lastFlushTime < uint64(flushSeconds) {
 				continue
 			}
 		}
@@ -76,7 +81,7 @@ func (ps *pendingSeries) periodicFlusher() {
 type writeRequest struct {
 	wr            prompbmarshal.WriteRequest
 	pushBlock     func(block []byte)
-	lastFlushTime time.Time
+	lastFlushTime uint64

 	tss []prompbmarshal.TimeSeries

@@ -108,7 +113,7 @@ func (wr *writeRequest) reset() {

 func (wr *writeRequest) flush() {
 	wr.wr.Timeseries = wr.tss
-	wr.lastFlushTime = time.Now()
+	wr.lastFlushTime = fasttime.UnixTimestamp()
 	pushWriteRequest(&wr.wr, wr.pushBlock)
 	wr.reset()
 }
@@ -144,13 +149,8 @@ func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
 	}
 	dst.Labels = labelsDst[labelsLen:]

-	samplesDst = append(samplesDst, prompbmarshal.Sample{})
-	dstSample := &samplesDst[len(samplesDst)-1]
-	if len(src.Samples) != 1 {
-		logger.Panicf("BUG: unexpected number of samples in time series; got %d; want 1", len(src.Samples))
-	}
-	*dstSample = src.Samples[0]
-	dst.Samples = samplesDst[len(samplesDst)-1:]
+	samplesDst = append(samplesDst, src.Samples...)
+	dst.Samples = samplesDst[len(samplesDst)-len(src.Samples):]

 	wr.samples = samplesDst
 	wr.labels = labelsDst
--- a/app/vmagent/remotewrite/relabel.go
+++ b/app/vmagent/remotewrite/relabel.go
@@ -2,6 +2,7 @@ package remotewrite

 import (
 	"flag"
+	"fmt"
 	"strings"
 	"sync"

@@ -16,35 +17,60 @@ var (
 		"Pass multiple -remoteWrite.label flags in order to add multiple flags to metrics before sending them to remote storage")
 	relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabel_config entries. These entries are applied to all the metrics "+
 		"before sending them to -remoteWrite.url. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config for details")
+	relabelConfigPaths = flagutil.NewArray("remoteWrite.urlRelabelConfig", "Optional path to relabel config for the corresponding -remoteWrite.url")
 )

 var labelsGlobal []prompbmarshal.Label
-var prcsGlobal []promrelabel.ParsedRelabelConfig

-// initRelabelGlobal must be called after parsing command-line flags.
-func initRelabelGlobal() {
+// CheckRelabelConfigs checks -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig.
+func CheckRelabelConfigs() error {
+	_, err := loadRelabelConfigs()
+	return err
+}
+
+func loadRelabelConfigs() (*relabelConfigs, error) {
+	var rcs relabelConfigs
+	if *relabelConfigPathGlobal != "" {
+		global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
+		if err != nil {
+			return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %s", *relabelConfigPathGlobal, err)
+		}
+		rcs.global = global
+	}
+	if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
+		return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
+			len(*relabelConfigPaths), len(*remoteWriteURLs))
+	}
+	rcs.perURL = make([][]promrelabel.ParsedRelabelConfig, len(*remoteWriteURLs))
+	for i, path := range *relabelConfigPaths {
+		prc, err := promrelabel.LoadRelabelConfigs(path)
+		if err != nil {
+			return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %s", path, err)
+		}
+		rcs.perURL[i] = prc
+	}
+	return &rcs, nil
+}
+
+type relabelConfigs struct {
+	global []promrelabel.ParsedRelabelConfig
+	perURL [][]promrelabel.ParsedRelabelConfig
+}
+
+// initLabelsGlobal must be called after parsing command-line flags.
+func initLabelsGlobal() {
 	// Init labelsGlobal
 	labelsGlobal = nil
 	for _, s := range *unparsedLabelsGlobal {
 		n := strings.IndexByte(s, '=')
 		if n < 0 {
-			logger.Panicf("FATAL: missing '=' in `-remoteWrite.label`. It must contain label in the form `name=value`; got %q", s)
+			logger.Fatalf("missing '=' in `-remoteWrite.label`. It must contain label in the form `name=value`; got %q", s)
 		}
 		labelsGlobal = append(labelsGlobal, prompbmarshal.Label{
 			Name:  s[:n],
 			Value: s[n+1:],
 		})
 	}
-
-	// Init prcsGlobal
-	prcsGlobal = nil
-	if len(*relabelConfigPathGlobal) > 0 {
-		var err error
-		prcsGlobal, err = promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
-		if err != nil {
-			logger.Panicf("FATAL: cannot load relabel configs from -remoteWrite.relabelConfig=%q: %s", *relabelConfigPathGlobal, err)
-		}
-	}
 }

 func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLabels []prompbmarshal.Label, prcs []promrelabel.ParsedRelabelConfig) []prompbmarshal.TimeSeries {
--- a/app/vmagent/remotewrite/remotewrite.go
+++ b/app/vmagent/remotewrite/remotewrite.go
@@ -3,6 +3,7 @@ package remotewrite
 import (
 	"flag"
 	"fmt"
+	"sync"
 	"sync/atomic"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
@@ -10,8 +11,8 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
 	"github.com/VictoriaMetrics/metrics"
 	xxhash "github.com/cespare/xxhash/v2"
 )
@@ -20,9 +21,8 @@ var (
 	remoteWriteURLs = flagutil.NewArray("remoteWrite.url", "Remote storage URL to write data to. It must support Prometheus remote_write API. "+
 		"It is recommended using VictoriaMetrics as remote storage. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
 		"Pass multiple -remoteWrite.url flags in order to write data concurrently to multiple remote storage systems")
-	relabelConfigPaths = flagutil.NewArray("remoteWrite.urlRelabelConfig", "Optional path to relabel config for the corresponding -remoteWrite.url")
-	tmpDataPath        = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored")
-	queues             = flag.Int("remoteWrite.queues", 1, "The number of concurrent queues to each -remoteWrite.url. Set more queues if a single queue "+
+	tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored")
+	queues      = flag.Int("remoteWrite.queues", 1, "The number of concurrent queues to each -remoteWrite.url. Set more queues if a single queue "+
 		"isn't enough for sending high volume of collected data to remote storage")
 	showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
 		"It is hidden by default, since it can contain sensistive auth info")
@@ -34,6 +34,9 @@ var (

 var rwctxs []*remoteWriteCtx

+// Contains the current relabelConfigs.
+var allRelabelConfigs atomic.Value
+
 // Init initializes remotewrite.
 //
 // It must be called after flag.Parse().
@@ -41,14 +44,19 @@ var rwctxs []*remoteWriteCtx
 // Stop must be called for graceful shutdown.
 func Init() {
 	if len(*remoteWriteURLs) == 0 {
-		logger.Panicf("FATAL: at least one `-remoteWrite.url` must be set")
+		logger.Fatalf("at least one `-remoteWrite.url` must be set")
 	}

 	if !*showRemoteWriteURL {
 		// remoteWrite.url can contain authentication codes, so hide it at `/metrics` output.
 		httpserver.RegisterSecretFlag("remoteWrite.url")
 	}
-	initRelabelGlobal()
+	initLabelsGlobal()
+	rcs, err := loadRelabelConfigs()
+	if err != nil {
+		logger.Fatalf("cannot load relabel configs: %s", err)
+	}
+	allRelabelConfigs.Store(rcs)

 	maxInmemoryBlocks := memory.Allowed() / len(*remoteWriteURLs) / maxRowsPerBlock / 100
 	if maxInmemoryBlocks > 200 {
@@ -61,23 +69,47 @@ func Init() {
 		maxInmemoryBlocks = 2
 	}
 	for i, remoteWriteURL := range *remoteWriteURLs {
-		relabelConfigPath := ""
-		if i < len(*relabelConfigPaths) {
-			relabelConfigPath = (*relabelConfigPaths)[i]
-		}
 		urlLabelValue := fmt.Sprintf("secret-url-%d", i+1)
 		if *showRemoteWriteURL {
 			urlLabelValue = remoteWriteURL
 		}
-		rwctx := newRemoteWriteCtx(remoteWriteURL, relabelConfigPath, maxInmemoryBlocks, urlLabelValue)
+		rwctx := newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, urlLabelValue)
 		rwctxs = append(rwctxs, rwctx)
 	}
+
+	// Start config reloader.
+	sighupCh := procutil.NewSighupChan()
+	configReloaderWG.Add(1)
+	go func() {
+		defer configReloaderWG.Done()
+		for {
+			select {
+			case <-sighupCh:
+			case <-stopCh:
+				return
+			}
+			logger.Infof("SIGHUP received; reloading relabel configs pointed by -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig")
+			rcs, err := loadRelabelConfigs()
+			if err != nil {
+				logger.Errorf("cannot reload relabel configs; preserving the previous configs; error: %s", err)
+				continue
+			}
+			allRelabelConfigs.Store(rcs)
+			logger.Infof("Successfully reloaded relabel configs")
+		}
+	}()
 }

+var stopCh = make(chan struct{})
+var configReloaderWG sync.WaitGroup
+
 // Stop stops remotewrite.
 //
 // It is expected that nobody calls Push during and after the call to this func.
 func Stop() {
+	close(stopCh)
+	configReloaderWG.Wait()
+
 	for _, rwctx := range rwctxs {
 		rwctx.MustStop()
 	}
@@ -86,9 +118,11 @@ func Stop() {

 // Push sends wr to remote storage systems set via `-remoteWrite.url`.
 //
-// Each timeseries in wr.Timeseries must contain one sample.
+// Note that wr may be modified by Push due to relabeling.
 func Push(wr *prompbmarshal.WriteRequest) {
 	var rctx *relabelCtx
+	rcs := allRelabelConfigs.Load().(*relabelConfigs)
+	prcsGlobal := rcs.global
 	if len(prcsGlobal) > 0 || len(labelsGlobal) > 0 {
 		rctx = getRelabelCtx()
 	}
@@ -122,16 +156,18 @@ func Push(wr *prompbmarshal.WriteRequest) {
 var globalRelabelMetricsDropped = metrics.NewCounter("vmagent_remotewrite_global_relabel_metrics_dropped_total")

 type remoteWriteCtx struct {
+	idx        int
 	fq         *persistentqueue.FastQueue
 	c          *client
-	prcs       []promrelabel.ParsedRelabelConfig
 	pss        []*pendingSeries
 	pssNextIdx uint64

+	tss []prompbmarshal.TimeSeries
+
 	relabelMetricsDropped *metrics.Counter
 }

-func newRemoteWriteCtx(remoteWriteURL, relabelConfigPath string, maxInmemoryBlocks int, urlLabelValue string) *remoteWriteCtx {
+func newRemoteWriteCtx(argIdx int, remoteWriteURL string, maxInmemoryBlocks int, urlLabelValue string) *remoteWriteCtx {
 	h := xxhash.Sum64([]byte(remoteWriteURL))
 	path := fmt.Sprintf("%s/persistent-queue/%016X", *tmpDataPath, h)
 	fq := persistentqueue.MustOpenFastQueue(path, remoteWriteURL, maxInmemoryBlocks, *maxPendingBytesPerURL)
@@ -141,24 +177,16 @@ func newRemoteWriteCtx(remoteWriteURL, relabelConfigPath string, maxInmemoryBloc
 	_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, path, urlLabelValue), func() float64 {
 		return float64(fq.GetInmemoryQueueLen())
 	})
-	c := newClient(remoteWriteURL, urlLabelValue, fq, *queues)
-	var prcs []promrelabel.ParsedRelabelConfig
-	if len(relabelConfigPath) > 0 {
-		var err error
-		prcs, err = promrelabel.LoadRelabelConfigs(relabelConfigPath)
-		if err != nil {
-			logger.Panicf("FATAL: cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %s", relabelConfigPath, err)
-		}
-	}
+	c := newClient(argIdx, remoteWriteURL, urlLabelValue, fq, *queues)
 	pss := make([]*pendingSeries, *queues)
 	for i := range pss {
 		pss[i] = newPendingSeries(fq.MustWriteBlock)
 	}
 	return &remoteWriteCtx{
-		fq:   fq,
-		c:    c,
-		prcs: prcs,
-		pss:  pss,
+		idx: argIdx,
+		fq:  fq,
+		c:   c,
+		pss: pss,

 		relabelMetricsDropped: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q, url=%q}`, path, urlLabelValue)),
 	}
@@ -168,10 +196,10 @@ func (rwctx *remoteWriteCtx) MustStop() {
 	for _, ps := range rwctx.pss {
 		ps.MustStop()
 	}
+	rwctx.idx = 0
 	rwctx.pss = nil
 	rwctx.fq.MustClose()
 	rwctx.fq = nil
-	rwctx.prcs = nil
 	rwctx.c.MustStop()
 	rwctx.c = nil

@@ -180,10 +208,17 @@ func (rwctx *remoteWriteCtx) MustStop() {

 func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
 	var rctx *relabelCtx
-	if len(rwctx.prcs) > 0 {
+	rcs := allRelabelConfigs.Load().(*relabelConfigs)
+	prcs := rcs.perURL[rwctx.idx]
+	if len(prcs) > 0 {
+		// Make a copy of tss before applying relabeling in order to prevent
+		// from affecting time series for other remoteWrite.url configs.
+		// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/467 for details.
+		rwctx.tss = append(rwctx.tss[:0], tss...)
+		tss = rwctx.tss
 		rctx = getRelabelCtx()
 		tssLen := len(tss)
-		tss = rctx.applyRelabeling(tss, nil, rwctx.prcs)
+		tss = rctx.applyRelabeling(tss, nil, prcs)
 		rwctx.relabelMetricsDropped.Add(tssLen - len(tss))
 	}
 	pss := rwctx.pss
@@ -191,5 +226,7 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
 	pss[idx].Push(tss)
 	if rctx != nil {
 		putRelabelCtx(rctx)
+		// Zero rwctx.tss in order to free up GC references.
+		rwctx.tss = prompbmarshal.ResetTimeSeries(rwctx.tss)
 	}
 }
--- a/app/vmagent/remotewrite/statconn.go
+++ b/app/vmagent/remotewrite/statconn.go
@@ -4,12 +4,17 @@ import (
 	"net"
 	"sync/atomic"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
+	"github.com/VictoriaMetrics/fasthttp"
 	"github.com/VictoriaMetrics/metrics"
-	"github.com/valyala/fasthttp"
 )

-func statDial(addr string) (net.Conn, error) {
-	conn, err := fasthttp.Dial(addr)
+func statDial(addr string) (conn net.Conn, err error) {
+	if netutil.TCP6Enabled() {
+		conn, err = fasthttp.DialDualStack(addr)
+	} else {
+		conn, err = fasthttp.Dial(addr)
+	}
 	dialsTotal.Inc()
 	if err != nil {
 		dialErrors.Inc()
--- a/app/vmalert/Makefile
+++ b/app/vmalert/Makefile
@@ -52,11 +52,17 @@ publish-vmalert:
 	APP_NAME=vmalert $(MAKE) publish-via-docker

 test-vmalert:
-	go test -race -cover ./app/vmalert
+	go test -v -race -cover ./app/vmalert -loggerLevel=ERROR
+	go test -v -race -cover ./app/vmalert/datasource
+	go test -v -race -cover ./app/vmalert/notifier
+	go test -v -race -cover ./app/vmalert/config

 run-vmalert: vmalert
-	./bin/vmalert -rule=app/vmalert/testdata/rules0-good.rules \
-		-datasource.url=http://localhost:8428 -notifier.url=http://localhost:9093 \
+	./bin/vmalert -rule=app/vmalert/config/testdata/rules2-good.rules \
+		-datasource.url=http://localhost:8428 \
+		-notifier.url=http://localhost:9093 \
+		-remoteWrite.url=http://localhost:8428 \
+		-remoteRead.url=http://localhost:8428 \
 		-evaluationInterval=3s

 vmalert-amd64:
--- a/app/vmalert/README.md
+++ b/app/vmalert/README.md
@@ -1,23 +1,27 @@
-## VM Alert
+## vmalert

-`vmalert` executes a list of given MetricsQL expressions (rules) and
-sends alerts to [Alert Manager](https://github.com/prometheus/alertmanager).   
-
-NOTE: `vmalert` is in early alpha and wasn't tested in production systems yet.
+`vmalert` executes a list of given [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/)
+or [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
+rules against configured address.

 ### Features:
 * Integration with [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) TSDB;
 * VictoriaMetrics [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL)
- expressions validation;
+ support and expressions validation;
 * Prometheus [alerting rules definition format](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/#defining-alerting-rules)
 support;
 * Integration with [Alertmanager](https://github.com/prometheus/alertmanager);
 * Lightweight without extra dependencies.

-### TODO:
-* Persist alerts state as timeseries in TSDB. Currently, alerts state is stored
-in process memory only and will be lost on restart;
-* Configuration hot reload.
+### Limitations:
+* `vmalert` execute queries against remote datasource which has reliability risks because of network. 
+It is recommended to configure alerts thresholds and rules expressions with understanding that network request
+may fail;
+* by default, rules execution is sequential within one group, but persisting of execution results to remote
+storage is asynchronous. Hence, user shouldn't rely on recording rules chaining when result of previous
+recording rule is reused in next one;
+* there is no `query` function support in templates yet;
+* `vmalert` has no UI, just an API for getting groups and rules statuses.

 ### QuickStart

@@ -30,10 +34,12 @@ make vmalert
 The build binary will be placed to `VictoriaMetrics/bin` folder.

 To start using `vmalert` you will need the following things:
-* list of alert rules - PromQL/MetricsQL expressions to execute;
+* list of rules - PromQL/MetricsQL expressions to execute;
 * datasource address - reachable VictoriaMetrics instance for rules execution;
-* notifier address - reachable Alertmanager instance for processing, 
+* notifier address - reachable [Alert Manager](https://github.com/prometheus/alertmanager) instance for processing, 
 aggregating alerts and sending notifications.
+* remote write address - [remote write](https://prometheus.io/docs/prometheus/latest/storage/#remote-storage-integrations)
+compatible storage address for storing recording rules results and alerts state in for of timeseries. This is optional.

 Then configure `vmalert` accordingly:
 ```
@@ -42,38 +48,188 @@ Then configure `vmalert` accordingly:
        -notifier.url=http://localhost:9093
 ```

-Example for `.rules` file may be found [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/testdata/rules0-good.rules)
+Configuration for [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) 
+and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very 
+similar to Prometheus rules and configured using YAML. Configuration examples may be found 
+in [testdata](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/config/testdata) folder.
+Every `rule` belongs to `group` and every configuration file may contain arbitrary number of groups:
+```yaml
+groups:
+  [ - <rule_group> ]
+```

-`vmalert` runs evaluation for every group in a separate goroutine.
-Rules in group evaluated one-by-one sequentially. 
+#### Groups

-`vmalert` also runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
+Each group has following attributes:
+```yaml
+# The name of the group. Must be unique within a file.
+name: <string>
+
+# How often rules in the group are evaluated.
+[ interval: <duration> | default = global.evaluation_interval ]
+
+# How many rules execute at once. Increasing concurrency may speed
+# up round execution speed. 
+[ concurrency: <integer> | default = 1 ]
+
+rules:
+  [ - <rule> ... ]
+```
+
+#### Rules
+
+There are two types of Rules:
+* [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) - 
+Alerting rules allows to define alert conditions via [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL)
+and to send notifications about firing alerts to [Alertmanager](https://github.com/prometheus/alertmanager).
+* [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) - 
+Recording rules allow you to precompute frequently needed or computationally expensive expressions 
+and save their result as a new set of time series.
+
+`vmalert` forbids to define duplicates - rules with the same combination of name, expression and labels
+within one group. 
+
+##### Alerting rules
+
+The syntax for alerting rule is following:
+```yaml
+# The name of the alert. Must be a valid metric name.
+alert: <string>
+
+# The MetricsQL expression to evaluate.
+expr: <string>
+
+# Alerts are considered firing once they have been returned for this long.
+# Alerts which have not yet fired for long enough are considered pending.
+[ for: <duration> | default = 0s ]
+
+# Labels to add or overwrite for each alert.
+labels:
+  [ <labelname>: <tmpl_string> ]
+
+# Annotations to add to each alert.
+annotations:
+  [ <labelname>: <tmpl_string> ]
+``` 
+
+`vmalert` has no local storage and alerts state is stored in process memory. Hence, after reloading of `vmalert` process
+alerts state will be lost. To avoid this situation, `vmalert` may be configured via following flags:
+* `-remoteWrite.url` - URL to Victoria Metrics or VMInsert. `vmalert` will persist alerts state into the configured
+address in form of timeseries with name `ALERTS` via remote-write protocol.
+* `-remoteRead.url` - URL to Victoria Metrics or VMSelect. `vmalert` will try to restore alerts state from configured
+address by querying `ALERTS` timeseries.
+
+
+##### Recording rules
+
+The syntax for recording rules is following:
+```yaml
+# The name of the time series to output to. Must be a valid metric name.
+record: <string>
+
+# The MetricsQL expression to evaluate.
+expr: <string>
+
+# Labels to add or overwrite before storing the result.
+labels:
+  [ <labelname>: <labelvalue> ]
+```
+
+For recording rules to work `-remoteWrite.url` must specified.
+
+
+#### WEB
+
+`vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
+* `http://<vmalert-addr>/api/v1/groups` - list of all loaded groups and rules;
 * `http://<vmalert-addr>/api/v1/alerts` - list of all active alerts;
 * `http://<vmalert-addr>/api/v1/<groupName>/<alertID>/status" ` - get alert status by ID.
 Used as alert source in AlertManager.
 * `http://<vmalert-addr>/metrics` - application metrics.
+* `http://<vmalert-addr>/-/reload` - hot configuration reload.
+

 ### Configuration

 The shortlist of configuration flags is the following:
 ```
 Usage of vmalert:
-  -datasource.url string
-        Victoria Metrics or VMSelect url. Required parameter. e.g. http://127.0.0.1:8428
  -datasource.basicAuth.password string
-        Optional basic auth password to use for -datasource.url
+        Optional basic auth password for -datasource.url
  -datasource.basicAuth.username string
-        Optional basic auth username to use for -datasource.url
+        Optional basic auth username for -datasource.url
+  -datasource.tlsCAFile value
+        Optional path to TLS CA file to use for verifying connections to -datasource.url. By default system CA is used.
+  -datasource.tlsCertFile value
+        Optional path to client-side TLS certificate file to use when connecting to -datasource.url.
+  -datasource.tlsInsecureSkipVerify
+        Whether to skip tls verification when connecting to -datasource.url
+  -datasource.tlsKeyFile value
+        Optional path to client-side TLS certificate key to use when connecting to -datasource.url.
+  -datasource.tlsServerName value
+        Optional TLS server name to use for connections to -datasource.url. By default the server name from -datasource.url is used.
+  -datasource.url string
+        Victoria Metrics or VMSelect url. Required parameter. E.g. http://127.0.0.1:8428
  -evaluationInterval duration
-        How often to evaluate the rules. Default 1m (default 1m0s)
+        How often to evaluate the rules (default 1m0s)
  -external.url string
        External URL is used as alert's source for sent alerts to the notifier
  -httpListenAddr string
        Address to listen for http connections (default ":8880")
+  -metricsAuthKey string
+        Auth key for /metrics. It overrides httpAuth settings
+  -notifier.tlsCAFile value
+        Optional path to TLS CA file to use for verifying connections to -notifier.url. By default system CA is used.
+  -notifier.tlsCertFile value
+        Optional path to client-side TLS certificate file to use when connecting to -notifier.url.
+  -notifier.tlsInsecureSkipVerify
+        Whether to skip tls verification when connecting to -notifier.url
+  -notifier.tlsKeyFile value
+        Optional path to client-side TLS certificate key to use when connecting to -notifier.url.
+  -notifier.tlsServerName value
+        Optional TLS server name to use for connections to -notifier.url. By default the server name from -notifier.url is used.
  -notifier.url string
        Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093
-  -remotewrite.url string
-        Optional URL to remote-write compatible storage where to write timeseriesbased on active alerts. E.g. http://127.0.0.1:8428
+  -remoteRead.basicAuth.password string
+        Optional basic auth password for -remoteRead.url
+  -remoteRead.basicAuth.username string
+        Optional basic auth username for -remoteRead.url
+  -remoteRead.lookback duration
+        Lookback defines how far to look into past for alerts timeseries. For example, if lookback=1h then range from now() to now()-1h will be scanned. (default 1h0m0s)
+  -remoteRead.tlsCAFile value
+        Optional path to TLS CA file to use for verifying connections to -remoteRead.url. By default system CA is used.
+  -remoteRead.tlsCertFile value
+        Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url.
+  -remoteRead.tlsInsecureSkipVerify
+        Whether to skip tls verification when connecting to -remoteRead.url
+  -remoteRead.tlsKeyFile value
+        Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url.
+  -remoteRead.tlsServerName value
+        Optional TLS server name to use for connections to -remoteRead.url. By default the server name from -remoteRead.url is used.
+  -remoteRead.url vmalert
+        Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts state. This configuration makes sense only if vmalert was configured with `remoteWrite.url` before and has been successfully persisted its state. E.g. http://127.0.0.1:8428
+  -remoteWrite.basicAuth.password string
+        Optional basic auth password for -remoteWrite.url
+  -remoteWrite.basicAuth.username string
+        Optional basic auth username for -remoteWrite.url
+  -remoteWrite.concurrency int
+        Defines number of readers that concurrently write into remote storage (default 1)
+  -remoteWrite.maxBatchSize int
+        Defines defines max number of timeseries to be flushed at once (default 1000)
+  -remoteWrite.maxQueueSize int
+        Defines the max number of pending datapoints to remote write endpoint (default 100000)
+  -remoteWrite.tlsCAFile value
+        Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. By default system CA is used.
+  -remoteWrite.tlsCertFile value
+        Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url.
+  -remoteWrite.tlsInsecureSkipVerify
+        Whether to skip tls verification when connecting to -remoteWrite.url
+  -remoteWrite.tlsKeyFile value
+        Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url.
+  -remoteWrite.tlsServerName value
+        Optional TLS server name to use for connections to -remoteWrite.url. By default the server name from -remoteWrite.url is used.
+  -remoteWrite.url string
+        Optional URL to Victoria Metrics or VMInsert where to persist alerts state in form of timeseries. E.g. http://127.0.0.1:8428
  -rule value
        Path to the file with alert rules. 
        Supports patterns. Flag can be specified multiple times. 
@@ -81,15 +237,39 @@ Usage of vmalert:
         -rule /path/to/file. Path to a single file with alerting rules
         -rule dir/*.yaml -rule /*.yaml. Relative path to all .yaml files in "dir" folder, 
        absolute path to all .yaml files in root.
+  -rule.validateExpressions
+        Whether to validate rules expressions via MetricsQL engine (default true)
  -rule.validateTemplates
-        Indicates to validate annotation and label templates (default true)
+        Whether to validate annotation and label templates (default true)
 ```

 Pass `-help` to `vmalert` in order to see the full list of supported 
 command-line flags with their descriptions.

+To reload configuration without `vmalert` restart send SIGHUP signal
+or send GET request to `/-/reload` endpoint.
+
 ### Contributing

 `vmalert` is mostly designed and built by VictoriaMetrics community.
 Feel free to share your experience and ideas for improving this 
-software. Please keep simplicity as the main priority.
+software. Please keep simplicity as the main priority.
+
+### How to build from sources
+
+It is recommended using 
+[binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) 
+- `vmalert` is located in `vmutils-*` archives there.
+
+
+#### Development build
+
+1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
+2. Run `make vmalert` from the root folder of the repository.
+   It builds `vmalert` binary and puts it into the `bin` folder.
+
+#### Production build
+
+1. [Install docker](https://docs.docker.com/install/).
+2. Run `make vmalert-prod` from the root folder of the repository.
+   It builds `vmalert-prod` binary and puts it into the `bin` folder.
--- a/app/vmalert/alerting.go
+++ b/app/vmalert/alerting.go
@@ -0,0 +1,374 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"hash/fnv"
+	"sort"
+	"strconv"
+	"sync"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
+)
+
+// AlertingRule is basic alert entity
+type AlertingRule struct {
+	RuleID      uint64
+	Name        string
+	Expr        string
+	For         time.Duration
+	Labels      map[string]string
+	Annotations map[string]string
+	GroupID     uint64
+
+	// guard status fields
+	mu sync.RWMutex
+	// stores list of active alerts
+	alerts map[uint64]*notifier.Alert
+	// stores last moment of time Exec was called
+	lastExecTime time.Time
+	// stores last error that happened in Exec func
+	// resets on every successful Exec
+	// may be used as Health state
+	lastExecError error
+}
+
+func newAlertingRule(gID uint64, cfg config.Rule) *AlertingRule {
+	return &AlertingRule{
+		RuleID:      cfg.ID,
+		Name:        cfg.Alert,
+		Expr:        cfg.Expr,
+		For:         cfg.For,
+		Labels:      cfg.Labels,
+		Annotations: cfg.Annotations,
+		GroupID:     gID,
+		alerts:      make(map[uint64]*notifier.Alert),
+	}
+}
+
+// String implements Stringer interface
+func (ar *AlertingRule) String() string {
+	return ar.Name
+}
+
+// ID returns unique Rule ID
+// within the parent Group.
+func (ar *AlertingRule) ID() uint64 {
+	return ar.RuleID
+}
+
+// Exec executes AlertingRule expression via the given Querier.
+// Based on the Querier results AlertingRule maintains notifier.Alerts
+func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series bool) ([]prompbmarshal.TimeSeries, error) {
+	qMetrics, err := q.Query(ctx, ar.Expr)
+	ar.mu.Lock()
+	defer ar.mu.Unlock()
+
+	ar.lastExecError = err
+	ar.lastExecTime = time.Now()
+	if err != nil {
+		return nil, fmt.Errorf("failed to execute query %q: %s", ar.Expr, err)
+	}
+
+	for h, a := range ar.alerts {
+		// cleanup inactive alerts from previous Exec
+		if a.State == notifier.StateInactive {
+			delete(ar.alerts, h)
+		}
+	}
+
+	updated := make(map[uint64]struct{})
+	// update list of active alerts
+	for _, m := range qMetrics {
+		h := hash(m)
+		updated[h] = struct{}{}
+		if a, ok := ar.alerts[h]; ok {
+			if a.Value != m.Value {
+				// update Value field with latest value
+				a.Value = m.Value
+				// and re-exec template since Value can be used
+				// in templates
+				err = ar.template(a)
+				if err != nil {
+					return nil, err
+				}
+			}
+			continue
+		}
+		a, err := ar.newAlert(m, ar.lastExecTime)
+		if err != nil {
+			ar.lastExecError = err
+			return nil, fmt.Errorf("failed to create alert: %s", err)
+		}
+		a.ID = h
+		a.State = notifier.StatePending
+		ar.alerts[h] = a
+	}
+
+	for h, a := range ar.alerts {
+		// if alert wasn't updated in this iteration
+		// means it is resolved already
+		if _, ok := updated[h]; !ok {
+			if a.State == notifier.StatePending {
+				// alert was in Pending state - it is not
+				// active anymore
+				delete(ar.alerts, h)
+				continue
+			}
+			a.State = notifier.StateInactive
+			continue
+		}
+		if a.State == notifier.StatePending && time.Since(a.Start) >= ar.For {
+			a.State = notifier.StateFiring
+			alertsFired.Inc()
+		}
+	}
+	if series {
+		return ar.toTimeSeries(ar.lastExecTime), nil
+	}
+	return nil, nil
+}
+
+func (ar *AlertingRule) toTimeSeries(timestamp time.Time) []prompbmarshal.TimeSeries {
+	var tss []prompbmarshal.TimeSeries
+	for _, a := range ar.alerts {
+		if a.State == notifier.StateInactive {
+			continue
+		}
+		ts := ar.alertToTimeSeries(a, timestamp)
+		tss = append(tss, ts...)
+	}
+	return tss
+}
+
+// UpdateWith copies all significant fields.
+// alerts state isn't copied since
+// it should be updated in next 2 Execs
+func (ar *AlertingRule) UpdateWith(r Rule) error {
+	nr, ok := r.(*AlertingRule)
+	if !ok {
+		return fmt.Errorf("BUG: attempt to update alerting rule with wrong type %#v", r)
+	}
+	ar.Expr = nr.Expr
+	ar.For = nr.For
+	ar.Labels = nr.Labels
+	ar.Annotations = nr.Annotations
+	return nil
+}
+
+// TODO: consider hashing algorithm in VM
+func hash(m datasource.Metric) uint64 {
+	hash := fnv.New64a()
+	labels := m.Labels
+	sort.Slice(labels, func(i, j int) bool {
+		return labels[i].Name < labels[j].Name
+	})
+	for _, l := range labels {
+		// drop __name__ to be consistent with Prometheus alerting
+		if l.Name == "__name__" {
+			continue
+		}
+		hash.Write([]byte(l.Name))
+		hash.Write([]byte(l.Value))
+		hash.Write([]byte("\xff"))
+	}
+	return hash.Sum64()
+}
+
+func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time) (*notifier.Alert, error) {
+	a := &notifier.Alert{
+		GroupID: ar.GroupID,
+		Name:    ar.Name,
+		Labels:  map[string]string{},
+		Value:   m.Value,
+		Start:   start,
+		Expr:    ar.Expr,
+	}
+	for _, l := range m.Labels {
+		// drop __name__ to be consistent with Prometheus alerting
+		if l.Name == "__name__" {
+			continue
+		}
+		a.Labels[l.Name] = l.Value
+	}
+	return a, ar.template(a)
+}
+
+func (ar *AlertingRule) template(a *notifier.Alert) error {
+	// 1. template rule labels with data labels
+	rLabels, err := a.ExecTemplate(ar.Labels)
+	if err != nil {
+		return err
+	}
+
+	// 2. merge data labels and rule labels
+	// metric labels may be overridden by
+	// rule labels
+	for k, v := range rLabels {
+		a.Labels[k] = v
+	}
+
+	// 3. template merged labels
+	a.Labels, err = a.ExecTemplate(a.Labels)
+	if err != nil {
+		return err
+	}
+
+	a.Annotations, err = a.ExecTemplate(ar.Annotations)
+	return err
+}
+
+// AlertAPI generates APIAlert object from alert by its id(hash)
+func (ar *AlertingRule) AlertAPI(id uint64) *APIAlert {
+	ar.mu.RLock()
+	defer ar.mu.RUnlock()
+	a, ok := ar.alerts[id]
+	if !ok {
+		return nil
+	}
+	return ar.newAlertAPI(*a)
+}
+
+// RuleAPI returns Rule representation in form
+// of APIAlertingRule
+func (ar *AlertingRule) RuleAPI() APIAlertingRule {
+	var lastErr string
+	if ar.lastExecError != nil {
+		lastErr = ar.lastExecError.Error()
+	}
+	return APIAlertingRule{
+		// encode as strings to avoid rounding
+		ID:          fmt.Sprintf("%d", ar.ID()),
+		GroupID:     fmt.Sprintf("%d", ar.GroupID),
+		Name:        ar.Name,
+		Expression:  ar.Expr,
+		For:         ar.For.String(),
+		LastError:   lastErr,
+		LastExec:    ar.lastExecTime,
+		Labels:      ar.Labels,
+		Annotations: ar.Annotations,
+	}
+}
+
+// AlertsAPI generates list of APIAlert objects from existing alerts
+func (ar *AlertingRule) AlertsAPI() []*APIAlert {
+	var alerts []*APIAlert
+	ar.mu.RLock()
+	for _, a := range ar.alerts {
+		alerts = append(alerts, ar.newAlertAPI(*a))
+	}
+	ar.mu.RUnlock()
+	return alerts
+}
+
+func (ar *AlertingRule) newAlertAPI(a notifier.Alert) *APIAlert {
+	return &APIAlert{
+		// encode as strings to avoid rounding
+		ID:      fmt.Sprintf("%d", a.ID),
+		GroupID: fmt.Sprintf("%d", a.GroupID),
+
+		Name:        a.Name,
+		Expression:  ar.Expr,
+		Labels:      a.Labels,
+		Annotations: a.Annotations,
+		State:       a.State.String(),
+		ActiveAt:    a.Start,
+		Value:       strconv.FormatFloat(a.Value, 'e', -1, 64),
+	}
+}
+
+const (
+	// AlertMetricName is the metric name for synthetic alert timeseries.
+	alertMetricName = "ALERTS"
+	// AlertForStateMetricName is the metric name for 'for' state of alert.
+	alertForStateMetricName = "ALERTS_FOR_STATE"
+
+	// AlertNameLabel is the label name indicating the name of an alert.
+	alertNameLabel = "alertname"
+	// AlertStateLabel is the label name indicating the state of an alert.
+	alertStateLabel = "alertstate"
+)
+
+// alertToTimeSeries converts the given alert with the given timestamp to timeseries
+func (ar *AlertingRule) alertToTimeSeries(a *notifier.Alert, timestamp time.Time) []prompbmarshal.TimeSeries {
+	var tss []prompbmarshal.TimeSeries
+	tss = append(tss, alertToTimeSeries(ar.Name, a, timestamp))
+	if ar.For > 0 {
+		tss = append(tss, alertForToTimeSeries(ar.Name, a, timestamp))
+	}
+	return tss
+}
+
+func alertToTimeSeries(name string, a *notifier.Alert, timestamp time.Time) prompbmarshal.TimeSeries {
+	labels := make(map[string]string)
+	for k, v := range a.Labels {
+		labels[k] = v
+	}
+	labels["__name__"] = alertMetricName
+	labels[alertNameLabel] = name
+	labels[alertStateLabel] = a.State.String()
+	return newTimeSeries(1, labels, timestamp)
+}
+
+// alertForToTimeSeries returns a timeseries that represents
+// state of active alerts, where value is time when alert become active
+func alertForToTimeSeries(name string, a *notifier.Alert, timestamp time.Time) prompbmarshal.TimeSeries {
+	labels := make(map[string]string)
+	for k, v := range a.Labels {
+		labels[k] = v
+	}
+	labels["__name__"] = alertForStateMetricName
+	labels[alertNameLabel] = name
+	return newTimeSeries(float64(a.Start.Unix()), labels, timestamp)
+}
+
+// Restore restores the state of active alerts basing on previously written timeseries.
+// Restore restores only Start field. Field State will be always Pending and supposed
+// to be updated on next Exec, as well as Value field.
+// Only rules with For > 0 will be restored.
+func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration) error {
+	if q == nil {
+		return fmt.Errorf("querier is nil")
+	}
+	// Get the last datapoint in range via MetricsQL `last_over_time`.
+	// We don't use plain PromQL since Prometheus doesn't support
+	// remote write protocol which is used for state persistence in vmalert.
+	expr := fmt.Sprintf("last_over_time(%s{alertname=%q}[%ds])",
+		alertForStateMetricName, ar.Name, int(lookback.Seconds()))
+	qMetrics, err := q.Query(ctx, expr)
+	if err != nil {
+		return err
+	}
+
+	for _, m := range qMetrics {
+		labels := m.Labels
+		m.Labels = make([]datasource.Label, 0)
+		// drop all extra labels, so hash key will
+		// be identical to timeseries received in Exec
+		for _, l := range labels {
+			if l.Name == alertNameLabel {
+				continue
+			}
+			// drop all overridden labels
+			if _, ok := ar.Labels[l.Name]; ok {
+				continue
+			}
+			m.Labels = append(m.Labels, l)
+		}
+
+		a, err := ar.newAlert(m, time.Unix(int64(m.Value), 0))
+		if err != nil {
+			return fmt.Errorf("failed to create alert: %s", err)
+		}
+		a.ID = hash(m)
+		a.State = notifier.StatePending
+		ar.alerts[a.ID] = a
+		logger.Infof("alert %q(%d) restored to state at %v", a.Name, a.ID, a.Start)
+	}
+	return nil
+}
--- a/app/vmalert/alerting_test.go
+++ b/app/vmalert/alerting_test.go
@@ -0,0 +1,455 @@
+package main
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
+)
+
+func TestAlertingRule_ToTimeSeries(t *testing.T) {
+	timestamp := time.Now()
+	testCases := []struct {
+		rule  *AlertingRule
+		alert *notifier.Alert
+		expTS []prompbmarshal.TimeSeries
+	}{
+		{
+			newTestAlertingRule("instant", 0),
+			&notifier.Alert{State: notifier.StateFiring},
+			[]prompbmarshal.TimeSeries{
+				newTimeSeries(1, map[string]string{
+					"__name__":      alertMetricName,
+					alertStateLabel: notifier.StateFiring.String(),
+					alertNameLabel:  "instant",
+				}, timestamp),
+			},
+		},
+		{
+			newTestAlertingRule("instant extra labels", 0),
+			&notifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
+				"job":      "foo",
+				"instance": "bar",
+			}},
+			[]prompbmarshal.TimeSeries{
+				newTimeSeries(1, map[string]string{
+					"__name__":      alertMetricName,
+					alertStateLabel: notifier.StateFiring.String(),
+					alertNameLabel:  "instant extra labels",
+					"job":           "foo",
+					"instance":      "bar",
+				}, timestamp),
+			},
+		},
+		{
+			newTestAlertingRule("instant labels override", 0),
+			&notifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
+				alertStateLabel: "foo",
+				"__name__":      "bar",
+			}},
+			[]prompbmarshal.TimeSeries{
+				newTimeSeries(1, map[string]string{
+					"__name__":      alertMetricName,
+					alertStateLabel: notifier.StateFiring.String(),
+					alertNameLabel:  "instant labels override",
+				}, timestamp),
+			},
+		},
+		{
+			newTestAlertingRule("for", time.Second),
+			&notifier.Alert{State: notifier.StateFiring, Start: timestamp.Add(time.Second)},
+			[]prompbmarshal.TimeSeries{
+				newTimeSeries(1, map[string]string{
+					"__name__":      alertMetricName,
+					alertStateLabel: notifier.StateFiring.String(),
+					alertNameLabel:  "for",
+				}, timestamp),
+				newTimeSeries(float64(timestamp.Add(time.Second).Unix()), map[string]string{
+					"__name__":     alertForStateMetricName,
+					alertNameLabel: "for",
+				}, timestamp),
+			},
+		},
+		{
+			newTestAlertingRule("for pending", 10*time.Second),
+			&notifier.Alert{State: notifier.StatePending, Start: timestamp.Add(time.Second)},
+			[]prompbmarshal.TimeSeries{
+				newTimeSeries(1, map[string]string{
+					"__name__":      alertMetricName,
+					alertStateLabel: notifier.StatePending.String(),
+					alertNameLabel:  "for pending",
+				}, timestamp),
+				newTimeSeries(float64(timestamp.Add(time.Second).Unix()), map[string]string{
+					"__name__":     alertForStateMetricName,
+					alertNameLabel: "for pending",
+				}, timestamp),
+			},
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.rule.Name, func(t *testing.T) {
+			tc.rule.alerts[tc.alert.ID] = tc.alert
+			tss := tc.rule.toTimeSeries(timestamp)
+			if err := compareTimeSeries(t, tc.expTS, tss); err != nil {
+				t.Fatalf("timeseries missmatch: %s", err)
+			}
+		})
+	}
+}
+
+func TestAlertingRule_Exec(t *testing.T) {
+	const defaultStep = 5 * time.Millisecond
+	testCases := []struct {
+		rule      *AlertingRule
+		steps     [][]datasource.Metric
+		expAlerts map[uint64]*notifier.Alert
+	}{
+		{
+			newTestAlertingRule("empty", 0),
+			[][]datasource.Metric{},
+			map[uint64]*notifier.Alert{},
+		},
+		{
+			newTestAlertingRule("empty labels", 0),
+			[][]datasource.Metric{
+				{datasource.Metric{}},
+			},
+			map[uint64]*notifier.Alert{
+				hash(datasource.Metric{}): {State: notifier.StateFiring},
+			},
+		},
+		{
+			newTestAlertingRule("single-firing", 0),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
+			},
+		},
+		{
+			newTestAlertingRule("single-firing=>inactive", 0),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+				{},
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
+			},
+		},
+		{
+			newTestAlertingRule("single-firing=>inactive=>firing", 0),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+				{},
+				{metricWithLabels(t, "name", "foo")},
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
+			},
+		},
+		{
+			newTestAlertingRule("single-firing=>inactive=>firing=>inactive", 0),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+				{},
+				{metricWithLabels(t, "name", "foo")},
+				{},
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
+			},
+		},
+		{
+			newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>empty", 0),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+				{},
+				{metricWithLabels(t, "name", "foo")},
+				{},
+				{},
+			},
+			map[uint64]*notifier.Alert{},
+		},
+		{
+			newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>empty=>firing", 0),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+				{},
+				{metricWithLabels(t, "name", "foo")},
+				{},
+				{},
+				{metricWithLabels(t, "name", "foo")},
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
+			},
+		},
+		{
+			newTestAlertingRule("multiple-firing", 0),
+			[][]datasource.Metric{
+				{
+					metricWithLabels(t, "name", "foo"),
+					metricWithLabels(t, "name", "foo1"),
+					metricWithLabels(t, "name", "foo2"),
+				},
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo")):  {State: notifier.StateFiring},
+				hash(metricWithLabels(t, "name", "foo1")): {State: notifier.StateFiring},
+				hash(metricWithLabels(t, "name", "foo2")): {State: notifier.StateFiring},
+			},
+		},
+		{
+			newTestAlertingRule("multiple-steps-firing", 0),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+				{metricWithLabels(t, "name", "foo1")},
+				{metricWithLabels(t, "name", "foo2")},
+			},
+			// 1: fire first alert
+			// 2: fire second alert, set first inactive
+			// 3: fire third alert, set second inactive, delete first one
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo1")): {State: notifier.StateInactive},
+				hash(metricWithLabels(t, "name", "foo2")): {State: notifier.StateFiring},
+			},
+		},
+		{
+			newTestAlertingRule("duplicate", 0),
+			[][]datasource.Metric{
+				{
+					// metrics with the same labelset should result in one alert
+					metricWithLabels(t, "name", "foo", "type", "bar"),
+					metricWithLabels(t, "type", "bar", "name", "foo"),
+				},
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo", "type", "bar")): {State: notifier.StateFiring},
+			},
+		},
+		{
+			newTestAlertingRule("for-pending", time.Minute),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo")): {State: notifier.StatePending},
+			},
+		},
+		{
+			newTestAlertingRule("for-fired", defaultStep),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+				{metricWithLabels(t, "name", "foo")},
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
+			},
+		},
+		{
+			newTestAlertingRule("for-pending=>empty", time.Second),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+				{metricWithLabels(t, "name", "foo")},
+				// empty step to reset and delete pending alerts
+				{},
+			},
+			map[uint64]*notifier.Alert{},
+		},
+		{
+			newTestAlertingRule("for-pending=>firing=>inactive", defaultStep),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+				{metricWithLabels(t, "name", "foo")},
+				// empty step to reset pending alerts
+				{},
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
+			},
+		},
+		{
+			newTestAlertingRule("for-pending=>firing=>inactive=>pending", defaultStep),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+				{metricWithLabels(t, "name", "foo")},
+				// empty step to reset pending alerts
+				{},
+				{metricWithLabels(t, "name", "foo")},
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo")): {State: notifier.StatePending},
+			},
+		},
+		{
+			newTestAlertingRule("for-pending=>firing=>inactive=>pending=>firing", defaultStep),
+			[][]datasource.Metric{
+				{metricWithLabels(t, "name", "foo")},
+				{metricWithLabels(t, "name", "foo")},
+				// empty step to reset pending alerts
+				{},
+				{metricWithLabels(t, "name", "foo")},
+				{metricWithLabels(t, "name", "foo")},
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
+			},
+		},
+	}
+	fakeGroup := Group{Name: "TestRule_Exec"}
+	for _, tc := range testCases {
+		t.Run(tc.rule.Name, func(t *testing.T) {
+			fq := &fakeQuerier{}
+			tc.rule.GroupID = fakeGroup.ID()
+			for _, step := range tc.steps {
+				fq.reset()
+				fq.add(step...)
+				if _, err := tc.rule.Exec(context.TODO(), fq, false); err != nil {
+					t.Fatalf("unexpected err: %s", err)
+				}
+				// artificial delay between applying steps
+				time.Sleep(defaultStep)
+			}
+			if len(tc.rule.alerts) != len(tc.expAlerts) {
+				t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
+			}
+			for key, exp := range tc.expAlerts {
+				got, ok := tc.rule.alerts[key]
+				if !ok {
+					t.Fatalf("expected to have key %d", key)
+				}
+				if got.State != exp.State {
+					t.Fatalf("expected state %d; got %d", exp.State, got.State)
+				}
+			}
+		})
+	}
+}
+
+func TestAlertingRule_Restore(t *testing.T) {
+	testCases := []struct {
+		rule      *AlertingRule
+		metrics   []datasource.Metric
+		expAlerts map[uint64]*notifier.Alert
+	}{
+		{
+			newTestRuleWithLabels("no extra labels"),
+			[]datasource.Metric{
+				metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
+					"__name__", alertForStateMetricName,
+					alertNameLabel, "",
+				),
+			},
+			map[uint64]*notifier.Alert{
+				hash(datasource.Metric{}): {State: notifier.StatePending,
+					Start: time.Now().Truncate(time.Hour)},
+			},
+		},
+		{
+			newTestRuleWithLabels("metric labels"),
+			[]datasource.Metric{
+				metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
+					"__name__", alertForStateMetricName,
+					alertNameLabel, "",
+					"foo", "bar",
+					"namespace", "baz",
+				),
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t,
+					"foo", "bar",
+					"namespace", "baz",
+				)): {State: notifier.StatePending,
+					Start: time.Now().Truncate(time.Hour)},
+			},
+		},
+		{
+			newTestRuleWithLabels("rule labels", "source", "vm"),
+			[]datasource.Metric{
+				metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
+					"__name__", alertForStateMetricName,
+					alertNameLabel, "",
+					"foo", "bar",
+					"namespace", "baz",
+					// following pair supposed to be dropped
+					"source", "vm",
+				),
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t,
+					"foo", "bar",
+					"namespace", "baz",
+				)): {State: notifier.StatePending,
+					Start: time.Now().Truncate(time.Hour)},
+			},
+		},
+		{
+			newTestRuleWithLabels("multiple alerts"),
+			[]datasource.Metric{
+				metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
+					"__name__", alertForStateMetricName,
+					"host", "localhost-1",
+				),
+				metricWithValueAndLabels(t, float64(time.Now().Truncate(2*time.Hour).Unix()),
+					"__name__", alertForStateMetricName,
+					"host", "localhost-2",
+				),
+				metricWithValueAndLabels(t, float64(time.Now().Truncate(3*time.Hour).Unix()),
+					"__name__", alertForStateMetricName,
+					"host", "localhost-3",
+				),
+			},
+			map[uint64]*notifier.Alert{
+				hash(metricWithLabels(t, "host", "localhost-1")): {State: notifier.StatePending,
+					Start: time.Now().Truncate(time.Hour)},
+				hash(metricWithLabels(t, "host", "localhost-2")): {State: notifier.StatePending,
+					Start: time.Now().Truncate(2 * time.Hour)},
+				hash(metricWithLabels(t, "host", "localhost-3")): {State: notifier.StatePending,
+					Start: time.Now().Truncate(3 * time.Hour)},
+			},
+		},
+	}
+	fakeGroup := Group{Name: "TestRule_Exec"}
+	for _, tc := range testCases {
+		t.Run(tc.rule.Name, func(t *testing.T) {
+			fq := &fakeQuerier{}
+			tc.rule.GroupID = fakeGroup.ID()
+			fq.add(tc.metrics...)
+			if err := tc.rule.Restore(context.TODO(), fq, time.Hour); err != nil {
+				t.Fatalf("unexpected err: %s", err)
+			}
+			if len(tc.rule.alerts) != len(tc.expAlerts) {
+				t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
+			}
+			for key, exp := range tc.expAlerts {
+				got, ok := tc.rule.alerts[key]
+				if !ok {
+					t.Fatalf("expected to have key %d", key)
+				}
+				if got.State != exp.State {
+					t.Fatalf("expected state %d; got %d", exp.State, got.State)
+				}
+				if got.Start != exp.Start {
+					t.Fatalf("expected Start %v; got %v", exp.Start, got.Start)
+				}
+			}
+		})
+	}
+}
+
+func newTestRuleWithLabels(name string, labels ...string) *AlertingRule {
+	r := newTestAlertingRule(name, 0)
+	r.Labels = make(map[string]string)
+	for i := 0; i < len(labels); i += 2 {
+		r.Labels[labels[i]] = labels[i+1]
+	}
+	return r
+}
+
+func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
+	return &AlertingRule{Name: name, alerts: make(map[uint64]*notifier.Alert), For: waitFor}
+}
--- a/app/vmalert/config.go
+++ b/app/vmalert/config.go
@@ -1,70 +0,0 @@
-package main
-
-import (
-	"fmt"
-	"gopkg.in/yaml.v2"
-	"io/ioutil"
-	"path/filepath"
-	"strings"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
-)
-
-// Parse parses rule configs from given file patterns
-func Parse(pathPatterns []string, validateAnnotations bool) ([]Group, error) {
-	var fp []string
-	for _, pattern := range pathPatterns {
-		matches, err := filepath.Glob(pattern)
-		if err != nil {
-			return nil, fmt.Errorf("error reading file patther %s:%v", pattern, err)
-		}
-		fp = append(fp, matches...)
-	}
-	var groups []Group
-	for _, file := range fp {
-		groupsNames := map[string]struct{}{}
-		gr, err := parseFile(file)
-		if err != nil {
-			return nil, fmt.Errorf("file %s: %w", file, err)
-		}
-		for _, group := range gr {
-			if _, ok := groupsNames[group.Name]; ok {
-				return nil, fmt.Errorf("one file can not contain groups with the same name %s, filepath:%s", file, group.Name)
-			}
-			groupsNames[group.Name] = struct{}{}
-			for _, rule := range group.Rules {
-				if err = rule.Validate(); err != nil {
-					return nil, fmt.Errorf("invalid rule filepath:%s, group %s:%w", file, group.Name, err)
-				}
-				// TODO: this init looks weird here
-				rule.alerts = make(map[uint64]*notifier.Alert)
-				if validateAnnotations {
-					if err = notifier.ValidateTemplates(rule.Annotations); err != nil {
-						return nil, fmt.Errorf("invalid annotations filepath:%s, group %s:%w", file, group.Name, err)
-					}
-					if err = notifier.ValidateTemplates(rule.Labels); err != nil {
-						return nil, fmt.Errorf("invalid labels filepath:%s, group %s:%w", file, group.Name, err)
-					}
-				}
-				rule.group = &group
-			}
-		}
-		groups = append(groups, gr...)
-	}
-	if len(groups) < 1 {
-		return nil, fmt.Errorf("no groups found in %s", strings.Join(pathPatterns, ";"))
-	}
-	return groups, nil
-}
-
-func parseFile(path string) ([]Group, error) {
-	data, err := ioutil.ReadFile(path)
-	if err != nil {
-		return nil, fmt.Errorf("error reading alert rule file: %w", err)
-	}
-	g := struct {
-		Groups []Group `yaml:"groups"`
-	}{}
-	err = yaml.Unmarshal(data, &g)
-	return g.Groups, err
-}
--- a/app/vmalert/config/config.go
+++ b/app/vmalert/config/config.go
@@ -0,0 +1,195 @@
+package config
+
+import (
+	"fmt"
+	"hash/fnv"
+	"io/ioutil"
+	"path/filepath"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
+	"github.com/VictoriaMetrics/metricsql"
+	"gopkg.in/yaml.v2"
+)
+
+// Group contains list of Rules grouped into
+// entity with one name and evaluation interval
+type Group struct {
+	File        string
+	Name        string        `yaml:"name"`
+	Interval    time.Duration `yaml:"interval,omitempty"`
+	Rules       []Rule        `yaml:"rules"`
+	Concurrency int           `yaml:"concurrency"`
+
+	// Catches all undefined fields and must be empty after parsing.
+	XXX map[string]interface{} `yaml:",inline"`
+}
+
+// Validate check for internal Group or Rule configuration errors
+func (g *Group) Validate(validateAnnotations, validateExpressions bool) error {
+	if g.Name == "" {
+		return fmt.Errorf("group name must be set")
+	}
+	if len(g.Rules) == 0 {
+		return fmt.Errorf("group %q can't contain no rules", g.Name)
+	}
+	uniqueRules := map[uint64]struct{}{}
+	for _, r := range g.Rules {
+		ruleName := r.Record
+		if r.Alert != "" {
+			ruleName = r.Alert
+		}
+		if _, ok := uniqueRules[r.ID]; ok {
+			return fmt.Errorf("rule %q duplicate", ruleName)
+		}
+		uniqueRules[r.ID] = struct{}{}
+		if err := r.Validate(); err != nil {
+			return fmt.Errorf("invalid rule %q.%q: %s", g.Name, ruleName, err)
+		}
+		if validateExpressions {
+			if _, err := metricsql.Parse(r.Expr); err != nil {
+				return fmt.Errorf("invalid expression for rule %q.%q: %s", g.Name, ruleName, err)
+			}
+		}
+		if validateAnnotations {
+			if err := notifier.ValidateTemplates(r.Annotations); err != nil {
+				return fmt.Errorf("invalid annotations for rule %q.%q: %s", g.Name, ruleName, err)
+			}
+			if err := notifier.ValidateTemplates(r.Labels); err != nil {
+				return fmt.Errorf("invalid labels for rule %q.%q: %s", g.Name, ruleName, err)
+			}
+		}
+	}
+	return checkOverflow(g.XXX, fmt.Sprintf("group %q", g.Name))
+}
+
+// Rule describes entity that represent either
+// recording rule or alerting rule.
+type Rule struct {
+	ID          uint64
+	Record      string            `yaml:"record,omitempty"`
+	Alert       string            `yaml:"alert,omitempty"`
+	Expr        string            `yaml:"expr"`
+	For         time.Duration     `yaml:"for,omitempty"`
+	Labels      map[string]string `yaml:"labels,omitempty"`
+	Annotations map[string]string `yaml:"annotations,omitempty"`
+
+	// Catches all undefined fields and must be empty after parsing.
+	XXX map[string]interface{} `yaml:",inline"`
+}
+
+// UnmarshalYAML implements the yaml.Unmarshaler interface.
+func (r *Rule) UnmarshalYAML(unmarshal func(interface{}) error) error {
+	type rule Rule
+	if err := unmarshal((*rule)(r)); err != nil {
+		return err
+	}
+	r.ID = HashRule(*r)
+	return nil
+}
+
+// HashRule hashes significant Rule fields into
+// unique hash value
+func HashRule(r Rule) uint64 {
+	h := fnv.New64a()
+	h.Write([]byte(r.Expr))
+	if r.Record != "" {
+		h.Write([]byte("recording"))
+		h.Write([]byte(r.Record))
+	} else {
+		h.Write([]byte("alerting"))
+		h.Write([]byte(r.Alert))
+	}
+	type item struct {
+		key, value string
+	}
+	var kv []item
+	for k, v := range r.Labels {
+		kv = append(kv, item{key: k, value: v})
+	}
+	sort.Slice(kv, func(i, j int) bool {
+		return kv[i].key < kv[j].key
+	})
+	for _, i := range kv {
+		h.Write([]byte(i.key))
+		h.Write([]byte(i.value))
+		h.Write([]byte("\xff"))
+	}
+	return h.Sum64()
+}
+
+// Validate check for Rule configuration errors
+func (r *Rule) Validate() error {
+	if (r.Record == "" && r.Alert == "") || (r.Record != "" && r.Alert != "") {
+		return fmt.Errorf("either `record` or `alert` must be set")
+	}
+	if r.Expr == "" {
+		return fmt.Errorf("expression can't be empty")
+	}
+	return checkOverflow(r.XXX, "rule")
+}
+
+// Parse parses rule configs from given file patterns
+func Parse(pathPatterns []string, validateAnnotations, validateExpressions bool) ([]Group, error) {
+	var fp []string
+	for _, pattern := range pathPatterns {
+		matches, err := filepath.Glob(pattern)
+		if err != nil {
+			return nil, fmt.Errorf("error reading file pattern %s: %v", pattern, err)
+		}
+		fp = append(fp, matches...)
+	}
+	var groups []Group
+	for _, file := range fp {
+		uniqueGroups := map[string]struct{}{}
+		gr, err := parseFile(file)
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse file %q: %w", file, err)
+		}
+		for _, g := range gr {
+			if err := g.Validate(validateAnnotations, validateExpressions); err != nil {
+				return nil, fmt.Errorf("invalid group %q in file %q: %s", g.Name, file, err)
+			}
+			if _, ok := uniqueGroups[g.Name]; ok {
+				return nil, fmt.Errorf("group name %q duplicate in file %q", g.Name, file)
+			}
+			uniqueGroups[g.Name] = struct{}{}
+			g.File = file
+			groups = append(groups, g)
+		}
+	}
+	if len(groups) < 1 {
+		return nil, fmt.Errorf("no groups found in %s", strings.Join(pathPatterns, ";"))
+	}
+	return groups, nil
+}
+
+func parseFile(path string) ([]Group, error) {
+	data, err := ioutil.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("error reading alert rule file: %w", err)
+	}
+	g := struct {
+		Groups []Group `yaml:"groups"`
+		// Catches all undefined fields and must be empty after parsing.
+		XXX map[string]interface{} `yaml:",inline"`
+	}{}
+	err = yaml.Unmarshal(data, &g)
+	if err != nil {
+		return nil, err
+	}
+	return g.Groups, checkOverflow(g.XXX, "config")
+}
+
+func checkOverflow(m map[string]interface{}, ctx string) error {
+	if len(m) > 0 {
+		var keys []string
+		for k := range m {
+			keys = append(keys, k)
+		}
+		return fmt.Errorf("unknown fields in %s: %s", ctx, strings.Join(keys, ", "))
+	}
+	return nil
+}
--- a/app/vmalert/config/config_test.go
+++ b/app/vmalert/config/config_test.go
@@ -0,0 +1,326 @@
+package config
+
+import (
+	"net/url"
+	"os"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
+)
+
+func TestMain(m *testing.M) {
+	u, _ := url.Parse("https://victoriametrics.com/path")
+	notifier.InitTemplateFunc(u)
+	os.Exit(m.Run())
+}
+
+func TestParseGood(t *testing.T) {
+	if _, err := Parse([]string{"testdata/*good.rules", "testdata/dir/*good.*"}, true, true); err != nil {
+		t.Errorf("error parsing files %s", err)
+	}
+}
+
+func TestParseBad(t *testing.T) {
+	testCases := []struct {
+		path   []string
+		expErr string
+	}{
+		{
+			[]string{"testdata/rules0-bad.rules"},
+			"unexpected token",
+		},
+		{
+			[]string{"testdata/dir/rules0-bad.rules"},
+			"error parsing annotation",
+		},
+		{
+			[]string{"testdata/dir/rules1-bad.rules"},
+			"duplicate in file",
+		},
+		{
+			[]string{"testdata/dir/rules2-bad.rules"},
+			"function \"value\" not defined",
+		},
+		{
+			[]string{"testdata/dir/rules3-bad.rules"},
+			"either `record` or `alert` must be set",
+		},
+		{
+			[]string{"testdata/dir/rules4-bad.rules"},
+			"either `record` or `alert` must be set",
+		},
+		{
+			[]string{"testdata/*.yaml"},
+			"no groups found",
+		},
+	}
+	for _, tc := range testCases {
+		_, err := Parse(tc.path, true, true)
+		if err == nil {
+			t.Errorf("expected to get error")
+			return
+		}
+		if !strings.Contains(err.Error(), tc.expErr) {
+			t.Errorf("expected err to contain %q; got %q instead", tc.expErr, err)
+		}
+	}
+}
+
+func TestRule_Validate(t *testing.T) {
+	if err := (&Rule{}).Validate(); err == nil {
+		t.Errorf("exptected empty name error")
+	}
+	if err := (&Rule{Alert: "alert"}).Validate(); err == nil {
+		t.Errorf("exptected empty expr error")
+	}
+	if err := (&Rule{Alert: "alert", Expr: "test>0"}).Validate(); err != nil {
+		t.Errorf("exptected valid rule; got %s", err)
+	}
+}
+
+func TestGroup_Validate(t *testing.T) {
+	testCases := []struct {
+		group               *Group
+		rules               []Rule
+		validateAnnotations bool
+		validateExpressions bool
+		expErr              string
+	}{
+		{
+			group:  &Group{},
+			expErr: "group name must be set",
+		},
+		{
+			group:  &Group{Name: "test"},
+			expErr: "contain no rules",
+		},
+		{
+			group: &Group{Name: "test",
+				Rules: []Rule{
+					{
+						Record: "record",
+						Expr:   "up | 0",
+					},
+				},
+			},
+			expErr: "",
+		},
+		{
+			group: &Group{Name: "test",
+				Rules: []Rule{
+					{
+						Record: "record",
+						Expr:   "up | 0",
+					},
+				},
+			},
+			expErr:              "invalid expression",
+			validateExpressions: true,
+		},
+		{
+			group: &Group{Name: "test",
+				Rules: []Rule{
+					{
+						Alert: "alert",
+						Expr:  "up == 1",
+						Labels: map[string]string{
+							"summary": "{{ value|query }}",
+						},
+					},
+				},
+			},
+			expErr: "",
+		},
+		{
+			group: &Group{Name: "test",
+				Rules: []Rule{
+					{
+						Alert: "alert",
+						Expr:  "up == 1",
+						Labels: map[string]string{
+							"summary": "{{ value|query }}",
+						},
+					},
+				},
+			},
+			expErr:              "error parsing annotation",
+			validateAnnotations: true,
+		},
+		{
+			group: &Group{Name: "test",
+				Rules: []Rule{
+					{
+						Alert: "alert",
+						Expr:  "up == 1",
+					},
+					{
+						Alert: "alert",
+						Expr:  "up == 1",
+					},
+				},
+			},
+			expErr: "duplicate",
+		},
+		{
+			group: &Group{Name: "test",
+				Rules: []Rule{
+					{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+						"summary": "{{ value|query }}",
+					}},
+					{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+						"summary": "{{ value|query }}",
+					}},
+				},
+			},
+			expErr: "duplicate",
+		},
+		{
+			group: &Group{Name: "test",
+				Rules: []Rule{
+					{Record: "record", Expr: "up == 1", Labels: map[string]string{
+						"summary": "{{ value|query }}",
+					}},
+					{Record: "record", Expr: "up == 1", Labels: map[string]string{
+						"summary": "{{ value|query }}",
+					}},
+				},
+			},
+			expErr: "duplicate",
+		},
+		{
+			group: &Group{Name: "test",
+				Rules: []Rule{
+					{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+						"summary": "{{ value|query }}",
+					}},
+					{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+						"description": "{{ value|query }}",
+					}},
+				},
+			},
+			expErr: "",
+		},
+		{
+			group: &Group{Name: "test",
+				Rules: []Rule{
+					{Record: "alert", Expr: "up == 1", Labels: map[string]string{
+						"summary": "{{ value|query }}",
+					}},
+					{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+						"summary": "{{ value|query }}",
+					}},
+				},
+			},
+			expErr: "",
+		},
+	}
+	for _, tc := range testCases {
+		err := tc.group.Validate(tc.validateAnnotations, tc.validateExpressions)
+		if err == nil {
+			if tc.expErr != "" {
+				t.Errorf("expected to get err %q; got nil insted", tc.expErr)
+			}
+			continue
+		}
+		if !strings.Contains(err.Error(), tc.expErr) {
+			t.Errorf("expected err to contain %q; got %q instead", tc.expErr, err)
+		}
+	}
+}
+
+func TestHashRule(t *testing.T) {
+	testCases := []struct {
+		a, b  Rule
+		equal bool
+	}{
+		{
+			Rule{Record: "record", Expr: "up == 1"},
+			Rule{Record: "record", Expr: "up == 1"},
+			true,
+		},
+		{
+			Rule{Alert: "alert", Expr: "up == 1"},
+			Rule{Alert: "alert", Expr: "up == 1"},
+			true,
+		},
+		{
+			Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+				"foo": "bar",
+				"baz": "foo",
+			}},
+			Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+				"foo": "bar",
+				"baz": "foo",
+			}},
+			true,
+		},
+		{
+			Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+				"foo": "bar",
+				"baz": "foo",
+			}},
+			Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+				"baz": "foo",
+				"foo": "bar",
+			}},
+			true,
+		},
+		{
+			Rule{Alert: "record", Expr: "up == 1"},
+			Rule{Alert: "record", Expr: "up == 1"},
+			true,
+		},
+		{
+			Rule{Alert: "alert", Expr: "up == 1", For: time.Minute},
+			Rule{Alert: "alert", Expr: "up == 1"},
+			true,
+		},
+		{
+			Rule{Alert: "record", Expr: "up == 1"},
+			Rule{Record: "record", Expr: "up == 1"},
+			false,
+		},
+		{
+			Rule{Record: "record", Expr: "up == 1"},
+			Rule{Record: "record", Expr: "up == 2"},
+			false,
+		},
+		{
+			Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+				"foo": "bar",
+				"baz": "foo",
+			}},
+			Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+				"baz": "foo",
+				"foo": "baz",
+			}},
+			false,
+		},
+		{
+			Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+				"foo": "bar",
+				"baz": "foo",
+			}},
+			Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+				"baz": "foo",
+			}},
+			false,
+		},
+		{
+			Rule{Alert: "alert", Expr: "up == 1", Labels: map[string]string{
+				"foo": "bar",
+				"baz": "foo",
+			}},
+			Rule{Alert: "alert", Expr: "up == 1"},
+			false,
+		},
+	}
+	for i, tc := range testCases {
+		aID, bID := HashRule(tc.a), HashRule(tc.b)
+		if tc.equal != (aID == bID) {
+			t.Fatalf("missmatch for rule %d", i)
+		}
+	}
+}
--- a/app/vmalert/config/testdata/dir/rules0-bad.rules
+++ b/app/vmalert/config/testdata/dir/rules0-bad.rules
--- a/app/vmalert/config/testdata/dir/rules0-good.rules
+++ b/app/vmalert/config/testdata/dir/rules0-good.rules
@@ -6,6 +6,7 @@ groups:
        expr: vm_rows > 0
        labels:
          label: bar
+          expr: "{{ $expr|queryEscape }}"
        annotations:
          summary: "{{ $value|humanize }}"
          description: "{{$labels}}"
--- a/app/vmalert/config/testdata/dir/rules1-bad.rules
+++ b/app/vmalert/config/testdata/dir/rules1-bad.rules
--- a/app/vmalert/config/testdata/dir/rules1-good.rules
+++ b/app/vmalert/config/testdata/dir/rules1-good.rules
@@ -9,5 +9,3 @@ groups:
        annotations:
          summary: "{{ $value }}"
          description: "{{$labels}}"
-
-
--- a/app/vmalert/config/testdata/dir/rules2-bad.rules
+++ b/app/vmalert/config/testdata/dir/rules2-bad.rules
--- a/app/vmalert/config/testdata/dir/rules3-bad.rules
+++ b/app/vmalert/config/testdata/dir/rules3-bad.rules
@@ -0,0 +1,5 @@
+groups:
+  - name: group
+    rules:
+      - for: 5m
+        expr: vm_rows > 0
--- a/app/vmalert/config/testdata/dir/rules4-bad.rules
+++ b/app/vmalert/config/testdata/dir/rules4-bad.rules
@@ -0,0 +1,7 @@
+groups:
+  - name: group
+    rules:
+      - alert: rows
+        record: record
+        for: 5m
+        expr: vm_rows > 0
--- a/app/vmalert/config/testdata/dir/rules5-bad.rules
+++ b/app/vmalert/config/testdata/dir/rules5-bad.rules
@@ -0,0 +1,7 @@
+groups:
+  - name: group
+    rules:
+      - alert: rows
+        expr: vm_rows > 0
+      - record: rows
+        expr: sum(vm_rows)
--- a/app/vmalert/config/testdata/kube-good.rules
+++ b/app/vmalert/config/testdata/kube-good.rules
--- a/app/vmalert/config/testdata/rules0-bad.rules
+++ b/app/vmalert/config/testdata/rules0-bad.rules
--- a/app/vmalert/config/testdata/rules0-good.rules
+++ b/app/vmalert/config/testdata/rules0-good.rules
@@ -6,7 +6,7 @@ groups:
        expr: vm_rows > 0
        labels:
          label: bar
-          template: "{{ $value|humanize }}"
+          host: "{{ $labels.instance }}"
        annotations:
          summary: "{{ $value|humanize }}"
          description: "{{$labels}}"
--- a/app/vmalert/config/testdata/rules1-good.rules
+++ b/app/vmalert/config/testdata/rules1-good.rules
@@ -0,0 +1,11 @@
+groups:
+  - name: groupTest
+    rules:
+      - alert: VMRows
+        for: 1ms
+        expr: vm_rows > 0
+        labels:
+          label: bar
+          host: "{{ $labels.instance }}"
+        annotations:
+          summary: "{{ $value }}"
--- a/app/vmalert/config/testdata/rules2-good.rules
+++ b/app/vmalert/config/testdata/rules2-good.rules
@@ -0,0 +1,35 @@
+groups:
+  - name: TestGroup
+    interval: 2s
+    concurrency: 2
+    rules:
+      - alert: Conns
+        expr: sum(vm_tcplistener_conns) by(instance) > 1
+        for: 3m
+        annotations:
+          summary: "Too high connection number for {{$labels.instance}}"
+          description: "It is {{ $value }} connections for {{$labels.instance}}"
+      - alert: ExampleAlertAlwaysFiring
+        expr: sum by(job)
+          (up == 1)
+      - record: handler:requests:rate5m
+        expr: sum(rate(prometheus_http_requests_total[5m])) by (handler)
+        labels:
+          recording: true
+      - record: code:requests:rate5m
+        expr: sum(rate(promhttp_metric_handler_requests_total[5m])) by (code)
+        labels:
+          env: dev
+          recording: true
+      - record: code:requests:rate5m
+        expr: sum(rate(promhttp_metric_handler_requests_total[5m])) by (code)
+        labels:
+          env: staging
+          recording: true
+      - record: successful_requests:ratio_rate5m
+        labels:
+          recording: true
+        expr: |2
+            sum(code:requests:rate5m{code="200"})
+          /
+            sum(code:requests:rate5m)
--- a/app/vmalert/config_test.go
+++ b/app/vmalert/config_test.go
@@ -1,39 +0,0 @@
-package main
-
-import (
-	"net/url"
-	"os"
-	"testing"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
-)
-
-func TestMain(m *testing.M) {
-	u, _ := url.Parse("https://victoriametrics.com/path")
-	notifier.InitTemplateFunc(u)
-	os.Exit(m.Run())
-}
-
-func TestParseGood(t *testing.T) {
-	if _, err := Parse([]string{"testdata/*good.rules", "testdata/dir/*good.*"}, true); err != nil {
-		t.Errorf("error parsing files %s", err)
-	}
-}
-
-func TestParseBad(t *testing.T) {
-	if _, err := Parse([]string{"testdata/rules0-bad.rules"}, true); err == nil {
-		t.Errorf("expected syntaxt error")
-	}
-	if _, err := Parse([]string{"testdata/dir/rules0-bad.rules"}, true); err == nil {
-		t.Errorf("expected template annotation error")
-	}
-	if _, err := Parse([]string{"testdata/dir/rules1-bad.rules"}, true); err == nil {
-		t.Errorf("expected same group error")
-	}
-	if _, err := Parse([]string{"testdata/dir/rules2-bad.rules"}, true); err == nil {
-		t.Errorf("expected template label error")
-	}
-	if _, err := Parse([]string{"testdata/*.yaml"}, true); err == nil {
-		t.Errorf("expected empty group")
-	}
-}
--- a/app/vmalert/group.go
+++ b/app/vmalert/group.go
@@ -0,0 +1,295 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"hash/fnv"
+	"sync"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/metrics"
+)
+
+// Group is an entity for grouping rules
+type Group struct {
+	mu          sync.RWMutex
+	Name        string
+	File        string
+	Rules       []Rule
+	Interval    time.Duration
+	Concurrency int
+
+	doneCh     chan struct{}
+	finishedCh chan struct{}
+	// channel accepts new Group obj
+	// which supposed to update current group
+	updateCh chan *Group
+}
+
+func newGroup(cfg config.Group, defaultInterval time.Duration) *Group {
+	g := &Group{
+		Name:        cfg.Name,
+		File:        cfg.File,
+		Interval:    cfg.Interval,
+		Concurrency: cfg.Concurrency,
+		doneCh:      make(chan struct{}),
+		finishedCh:  make(chan struct{}),
+		updateCh:    make(chan *Group),
+	}
+	if g.Interval == 0 {
+		g.Interval = defaultInterval
+	}
+	if g.Concurrency < 1 {
+		g.Concurrency = 1
+	}
+	rules := make([]Rule, len(cfg.Rules))
+	for i, r := range cfg.Rules {
+		rules[i] = g.newRule(r)
+	}
+	g.Rules = rules
+	return g
+}
+
+func (g *Group) newRule(rule config.Rule) Rule {
+	if rule.Alert != "" {
+		return newAlertingRule(g.ID(), rule)
+	}
+	return newRecordingRule(g.ID(), rule)
+}
+
+// ID return unique group ID that consists of
+// rules file and group name
+func (g *Group) ID() uint64 {
+	hash := fnv.New64a()
+	hash.Write([]byte(g.File))
+	hash.Write([]byte("\xff"))
+	hash.Write([]byte(g.Name))
+	return hash.Sum64()
+}
+
+// Restore restores alerts state for group rules
+func (g *Group) Restore(ctx context.Context, q datasource.Querier, lookback time.Duration) error {
+	for _, rule := range g.Rules {
+		rr, ok := rule.(*AlertingRule)
+		if !ok {
+			continue
+		}
+		if rr.For < 1 {
+			continue
+		}
+		if err := rr.Restore(ctx, q, lookback); err != nil {
+			return fmt.Errorf("error while restoring rule %q: %s", rule, err)
+		}
+	}
+	return nil
+}
+
+// updateWith updates existing group with
+// passed group object. This function ignores group
+// evaluation interval change. It supposed to be updated
+// in group.start function.
+// Not thread-safe.
+func (g *Group) updateWith(newGroup *Group) error {
+	rulesRegistry := make(map[uint64]Rule)
+	for _, nr := range newGroup.Rules {
+		rulesRegistry[nr.ID()] = nr
+	}
+
+	for i, or := range g.Rules {
+		nr, ok := rulesRegistry[or.ID()]
+		if !ok {
+			// old rule is not present in the new list
+			// so we mark it for removing
+			g.Rules[i] = nil
+			continue
+		}
+		if err := or.UpdateWith(nr); err != nil {
+			return err
+		}
+		delete(rulesRegistry, nr.ID())
+	}
+
+	var newRules []Rule
+	for _, r := range g.Rules {
+		if r == nil {
+			// skip nil rules
+			continue
+		}
+		newRules = append(newRules, r)
+	}
+	// add the rest of rules from registry
+	for _, nr := range rulesRegistry {
+		newRules = append(newRules, nr)
+	}
+	g.Concurrency = newGroup.Concurrency
+	g.Rules = newRules
+	return nil
+}
+
+var (
+	iterationTotal    = metrics.NewCounter(`vmalert_iteration_total`)
+	iterationDuration = metrics.NewSummary(`vmalert_iteration_duration_seconds`)
+
+	execTotal    = metrics.NewCounter(`vmalert_execution_total`)
+	execErrors   = metrics.NewCounter(`vmalert_execution_errors_total`)
+	execDuration = metrics.NewSummary(`vmalert_execution_duration_seconds`)
+
+	alertsFired      = metrics.NewCounter(`vmalert_alerts_fired_total`)
+	alertsSent       = metrics.NewCounter(`vmalert_alerts_sent_total`)
+	alertsSendErrors = metrics.NewCounter(`vmalert_alerts_send_errors_total`)
+
+	remoteWriteSent   = metrics.NewCounter(`vmalert_remotewrite_sent_total`)
+	remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
+)
+
+func (g *Group) close() {
+	if g.doneCh == nil {
+		return
+	}
+	close(g.doneCh)
+	<-g.finishedCh
+}
+
+func (g *Group) start(ctx context.Context, querier datasource.Querier, nr notifier.Notifier, rw *remotewrite.Client) {
+	defer func() { close(g.finishedCh) }()
+	logger.Infof("group %q started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
+	e := &executor{querier, nr, rw}
+	t := time.NewTicker(g.Interval)
+	defer t.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			logger.Infof("group %q: context cancelled", g.Name)
+			return
+		case <-g.doneCh:
+			logger.Infof("group %q: received stop signal", g.Name)
+			return
+		case ng := <-g.updateCh:
+			g.mu.Lock()
+			err := g.updateWith(ng)
+			if err != nil {
+				logger.Errorf("group %q: failed to update: %s", g.Name, err)
+				g.mu.Unlock()
+				continue
+			}
+			if g.Interval != ng.Interval {
+				g.Interval = ng.Interval
+				t.Stop()
+				t = time.NewTicker(g.Interval)
+			}
+			g.mu.Unlock()
+			logger.Infof("group %q re-started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
+		case <-t.C:
+			iterationTotal.Inc()
+			iterationStart := time.Now()
+
+			errs := e.execConcurrently(ctx, g.Rules, g.Concurrency, g.Interval)
+			for err := range errs {
+				if err != nil {
+					logger.Errorf("group %q: %s", g.Name, err)
+				}
+			}
+
+			iterationDuration.UpdateDuration(iterationStart)
+		}
+	}
+}
+
+type executor struct {
+	querier  datasource.Querier
+	notifier notifier.Notifier
+	rw       *remotewrite.Client
+}
+
+func (e *executor) execConcurrently(ctx context.Context, rules []Rule, concurrency int, interval time.Duration) chan error {
+	res := make(chan error, len(rules))
+	var returnSeries bool
+	if e.rw != nil {
+		returnSeries = true
+	}
+
+	if concurrency == 1 {
+		// fast path
+		for _, rule := range rules {
+			res <- e.exec(ctx, rule, returnSeries, interval)
+		}
+		close(res)
+		return res
+	}
+
+	sem := make(chan struct{}, concurrency)
+	go func() {
+		wg := sync.WaitGroup{}
+		for _, rule := range rules {
+			sem <- struct{}{}
+			wg.Add(1)
+			go func(r Rule) {
+				res <- e.exec(ctx, r, returnSeries, interval)
+				<-sem
+				wg.Done()
+			}(rule)
+		}
+		wg.Wait()
+		close(res)
+	}()
+	return res
+}
+
+func (e *executor) exec(ctx context.Context, rule Rule, returnSeries bool, interval time.Duration) error {
+	execTotal.Inc()
+	execStart := time.Now()
+	defer func() {
+		execDuration.UpdateDuration(execStart)
+	}()
+
+	tss, err := rule.Exec(ctx, e.querier, returnSeries)
+	if err != nil {
+		execErrors.Inc()
+		return fmt.Errorf("rule %q: failed to execute: %s", rule, err)
+	}
+
+	if len(tss) > 0 && e.rw != nil {
+		remoteWriteSent.Add(len(tss))
+		for _, ts := range tss {
+			if err := e.rw.Push(ts); err != nil {
+				remoteWriteErrors.Inc()
+				return fmt.Errorf("rule %q: remote write failure: %s", rule, err)
+			}
+		}
+	}
+
+	ar, ok := rule.(*AlertingRule)
+	if !ok {
+		return nil
+	}
+	var alerts []notifier.Alert
+	for _, a := range ar.alerts {
+		switch a.State {
+		case notifier.StateFiring:
+			// set End to execStart + 3 intervals
+			// so notifier can resolve it automatically if `vmalert`
+			// won't be able to send resolve for some reason
+			a.End = time.Now().Add(3 * interval)
+			alerts = append(alerts, *a)
+		case notifier.StateInactive:
+			// set End to execStart to notify
+			// that it was just resolved
+			a.End = time.Now()
+			alerts = append(alerts, *a)
+		}
+	}
+	if len(alerts) < 1 {
+		return nil
+	}
+	alertsSent.Add(len(alerts))
+	if err := e.notifier.Send(ctx, alerts); err != nil {
+		alertsSendErrors.Inc()
+		return fmt.Errorf("rule %q: failed to send alerts: %s", rule, err)
+	}
+	return nil
+}
--- a/app/vmalert/group_test.go
+++ b/app/vmalert/group_test.go
@@ -0,0 +1,207 @@
+package main
+
+import (
+	"context"
+	"sort"
+	"testing"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
+)
+
+func TestUpdateWith(t *testing.T) {
+	testCases := []struct {
+		name         string
+		currentRules []config.Rule
+		newRules     []config.Rule
+	}{
+		{
+			"new rule",
+			nil,
+			[]config.Rule{{Alert: "bar"}},
+		},
+		{
+			"update alerting rule",
+			[]config.Rule{{
+				Alert: "foo",
+				Expr:  "up > 0",
+				For:   time.Second,
+				Labels: map[string]string{
+					"bar": "baz",
+				},
+				Annotations: map[string]string{
+					"summary":     "{{ $value|humanize }}",
+					"description": "{{$labels}}",
+				},
+			}},
+			[]config.Rule{{
+				Alert: "foo",
+				Expr:  "up > 10",
+				For:   time.Second,
+				Labels: map[string]string{
+					"baz": "bar",
+				},
+				Annotations: map[string]string{
+					"summary": "none",
+				},
+			}},
+		},
+		{
+			"update recording rule",
+			[]config.Rule{{
+				Record: "foo",
+				Expr:   "max(up)",
+				Labels: map[string]string{
+					"bar": "baz",
+				},
+			}},
+			[]config.Rule{{
+				Record: "foo",
+				Expr:   "min(up)",
+				Labels: map[string]string{
+					"baz": "bar",
+				},
+			}},
+		},
+		{
+			"empty rule",
+			[]config.Rule{{Alert: "foo"}, {Record: "bar"}},
+			nil,
+		},
+		{
+			"multiple rules",
+			[]config.Rule{
+				{Alert: "bar"},
+				{Alert: "baz"},
+				{Alert: "foo"},
+			},
+			[]config.Rule{
+				{Alert: "baz"},
+				{Record: "foo"},
+			},
+		},
+		{
+			"replace rule",
+			[]config.Rule{{Alert: "foo1"}},
+			[]config.Rule{{Alert: "foo2"}},
+		},
+		{
+			"replace multiple rules",
+			[]config.Rule{
+				{Alert: "foo1"},
+				{Record: "foo2"},
+				{Alert: "foo3"},
+			},
+			[]config.Rule{
+				{Alert: "foo3"},
+				{Alert: "foo4"},
+				{Record: "foo5"},
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			g := &Group{Name: "test"}
+			for _, r := range tc.currentRules {
+				r.ID = config.HashRule(r)
+				g.Rules = append(g.Rules, g.newRule(r))
+			}
+
+			ng := &Group{Name: "test"}
+			for _, r := range tc.newRules {
+				r.ID = config.HashRule(r)
+				ng.Rules = append(ng.Rules, ng.newRule(r))
+			}
+
+			err := g.updateWith(ng)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if len(g.Rules) != len(tc.newRules) {
+				t.Fatalf("expected to have %d rules; got: %d",
+					len(g.Rules), len(tc.newRules))
+			}
+			sort.Slice(g.Rules, func(i, j int) bool {
+				return g.Rules[i].ID() < g.Rules[j].ID()
+			})
+			sort.Slice(ng.Rules, func(i, j int) bool {
+				return ng.Rules[i].ID() < ng.Rules[j].ID()
+			})
+			for i, r := range g.Rules {
+				got, want := r, ng.Rules[i]
+				if got.ID() != want.ID() {
+					t.Fatalf("expected to have rule %q; got %q", want, got)
+				}
+				if err := compareRules(t, got, want); err != nil {
+					t.Fatalf("comparsion error: %s", err)
+				}
+			}
+		})
+	}
+}
+
+func TestGroupStart(t *testing.T) {
+	// TODO: make parsing from string instead of file
+	groups, err := config.Parse([]string{"config/testdata/rules1-good.rules"}, true, true)
+	if err != nil {
+		t.Fatalf("failed to parse rules: %s", err)
+	}
+	const evalInterval = time.Millisecond
+	g := newGroup(groups[0], evalInterval)
+	g.Concurrency = 2
+
+	fn := &fakeNotifier{}
+	fs := &fakeQuerier{}
+
+	const inst1, inst2, job = "foo", "bar", "baz"
+	m1 := metricWithLabels(t, "instance", inst1, "job", job)
+	m2 := metricWithLabels(t, "instance", inst2, "job", job)
+
+	r := g.Rules[0].(*AlertingRule)
+	alert1, err := r.newAlert(m1, time.Now())
+	if err != nil {
+		t.Fatalf("faield to create alert: %s", err)
+	}
+	alert1.State = notifier.StateFiring
+	alert1.ID = hash(m1)
+
+	alert2, err := r.newAlert(m2, time.Now())
+	if err != nil {
+		t.Fatalf("faield to create alert: %s", err)
+	}
+	alert2.State = notifier.StateFiring
+	alert2.ID = hash(m2)
+
+	finished := make(chan struct{})
+	fs.add(m1)
+	fs.add(m2)
+	go func() {
+		g.start(context.Background(), fs, fn, nil)
+		close(finished)
+	}()
+
+	// wait for multiple evals
+	time.Sleep(20 * evalInterval)
+
+	gotAlerts := fn.getAlerts()
+	expectedAlerts := []notifier.Alert{*alert1, *alert2}
+	compareAlerts(t, expectedAlerts, gotAlerts)
+
+	// reset previous data
+	fs.reset()
+	// and set only one datapoint for response
+	fs.add(m1)
+
+	// wait for multiple evals
+	time.Sleep(20 * evalInterval)
+
+	gotAlerts = fn.getAlerts()
+	expectedAlerts = []notifier.Alert{*alert1}
+	compareAlerts(t, expectedAlerts, gotAlerts)
+
+	g.close()
+	<-finished
+}
--- a/app/vmalert/helpers_test.go
+++ b/app/vmalert/helpers_test.go
@@ -0,0 +1,232 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"reflect"
+	"sort"
+	"sync"
+	"testing"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
+)
+
+type fakeQuerier struct {
+	sync.Mutex
+	metrics []datasource.Metric
+	err     error
+}
+
+func (fq *fakeQuerier) setErr(err error) {
+	fq.Lock()
+	fq.err = err
+	fq.Unlock()
+}
+
+func (fq *fakeQuerier) reset() {
+	fq.Lock()
+	fq.err = nil
+	fq.metrics = fq.metrics[:0]
+	fq.Unlock()
+}
+
+func (fq *fakeQuerier) add(metrics ...datasource.Metric) {
+	fq.Lock()
+	fq.metrics = append(fq.metrics, metrics...)
+	fq.Unlock()
+}
+
+func (fq *fakeQuerier) Query(_ context.Context, _ string) ([]datasource.Metric, error) {
+	fq.Lock()
+	defer fq.Unlock()
+	if fq.err != nil {
+		return nil, fq.err
+	}
+	cp := make([]datasource.Metric, len(fq.metrics))
+	copy(cp, fq.metrics)
+	return cp, nil
+}
+
+type fakeNotifier struct {
+	sync.Mutex
+	alerts []notifier.Alert
+}
+
+func (fn *fakeNotifier) Send(_ context.Context, alerts []notifier.Alert) error {
+	fn.Lock()
+	defer fn.Unlock()
+	fn.alerts = alerts
+	return nil
+}
+
+func (fn *fakeNotifier) getAlerts() []notifier.Alert {
+	fn.Lock()
+	defer fn.Unlock()
+	return fn.alerts
+}
+
+func metricWithValueAndLabels(t *testing.T, value float64, labels ...string) datasource.Metric {
+	t.Helper()
+	m := metricWithLabels(t, labels...)
+	m.Value = value
+	return m
+}
+
+func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
+	t.Helper()
+	if len(labels) == 0 || len(labels)%2 != 0 {
+		t.Fatalf("expected to get even number of labels")
+	}
+	m := datasource.Metric{}
+	for i := 0; i < len(labels); i += 2 {
+		m.Labels = append(m.Labels, datasource.Label{
+			Name:  labels[i],
+			Value: labels[i+1],
+		})
+	}
+	return m
+}
+
+func compareGroups(t *testing.T, a, b *Group) {
+	t.Helper()
+	if a.Name != b.Name {
+		t.Fatalf("expected group name %q; got %q", a.Name, b.Name)
+	}
+	if a.File != b.File {
+		t.Fatalf("expected group %q file name %q; got %q", a.Name, a.File, b.File)
+	}
+	if a.Interval != b.Interval {
+		t.Fatalf("expected group %q interval %v; got %v", a.Name, a.Interval, b.Interval)
+	}
+	if len(a.Rules) != len(b.Rules) {
+		t.Fatalf("expected group %s to have %d rules; got: %d",
+			a.Name, len(a.Rules), len(b.Rules))
+	}
+	for i, r := range a.Rules {
+		got, want := r, b.Rules[i]
+		if a.ID() != b.ID() {
+			t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
+		}
+		if err := compareRules(t, want, got); err != nil {
+			t.Fatalf("comparsion error: %s", err)
+		}
+	}
+}
+
+func compareRules(t *testing.T, a, b Rule) error {
+	t.Helper()
+	switch v := a.(type) {
+	case *AlertingRule:
+		br, ok := b.(*AlertingRule)
+		if !ok {
+			return fmt.Errorf("rule %q supposed to be of type AlertingRule", b.ID())
+		}
+		return compareAlertingRules(t, v, br)
+	case *RecordingRule:
+		br, ok := b.(*RecordingRule)
+		if !ok {
+			return fmt.Errorf("rule %q supposed to be of type RecordingRule", b.ID())
+		}
+		return compareRecordingRules(t, v, br)
+	default:
+		return fmt.Errorf("unexpected rule type received %T", a)
+	}
+}
+
+func compareRecordingRules(t *testing.T, a, b *RecordingRule) error {
+	t.Helper()
+	if a.Expr != b.Expr {
+		return fmt.Errorf("expected to have expression %q; got %q", a.Expr, b.Expr)
+	}
+	if !reflect.DeepEqual(a.Labels, b.Labels) {
+		return fmt.Errorf("expected to have labels %#v; got %#v", a.Labels, b.Labels)
+	}
+	return nil
+}
+
+func compareAlertingRules(t *testing.T, a, b *AlertingRule) error {
+	t.Helper()
+	if a.Expr != b.Expr {
+		return fmt.Errorf("expected to have expression %q; got %q", a.Expr, b.Expr)
+	}
+	if a.For != b.For {
+		return fmt.Errorf("expected to have for %q; got %q", a.For, b.For)
+	}
+	if !reflect.DeepEqual(a.Annotations, b.Annotations) {
+		return fmt.Errorf("expected to have annotations %#v; got %#v", a.Annotations, b.Annotations)
+	}
+	if !reflect.DeepEqual(a.Labels, b.Labels) {
+		return fmt.Errorf("expected to have labels %#v; got %#v", a.Labels, b.Labels)
+	}
+	return nil
+}
+
+func compareTimeSeries(t *testing.T, a, b []prompbmarshal.TimeSeries) error {
+	t.Helper()
+	if len(a) != len(b) {
+		return fmt.Errorf("expected number of timeseries %d; got %d", len(a), len(b))
+	}
+	for i := range a {
+		expTS, gotTS := a[i], b[i]
+		if len(expTS.Samples) != len(gotTS.Samples) {
+			return fmt.Errorf("expected number of samples %d; got %d", len(expTS.Samples), len(gotTS.Samples))
+		}
+		for i, exp := range expTS.Samples {
+			got := gotTS.Samples[i]
+			if got.Value != exp.Value {
+				return fmt.Errorf("expected value %.2f; got %.2f", exp.Value, got.Value)
+			}
+			// timestamp validation isn't always correct for now.
+			// this must be improved with time mock.
+			/*if got.Timestamp != exp.Timestamp {
+				return fmt.Errorf("expected timestamp %d; got %d", exp.Timestamp, got.Timestamp)
+			}*/
+		}
+		if len(expTS.Labels) != len(gotTS.Labels) {
+			return fmt.Errorf("expected number of labels %d; got %d", len(expTS.Labels), len(gotTS.Labels))
+		}
+		for i, exp := range expTS.Labels {
+			got := gotTS.Labels[i]
+			if got.Name != exp.Name {
+				return fmt.Errorf("expected label name %q; got %q", exp.Name, got.Name)
+			}
+			if got.Value != exp.Value {
+				return fmt.Errorf("expected label value %q; got %q", exp.Value, got.Value)
+			}
+		}
+	}
+	return nil
+}
+
+func compareAlerts(t *testing.T, as, bs []notifier.Alert) {
+	t.Helper()
+	if len(as) != len(bs) {
+		t.Fatalf("expected to have length %d; got %d", len(as), len(bs))
+	}
+	sort.Slice(as, func(i, j int) bool {
+		return as[i].ID < as[j].ID
+	})
+	sort.Slice(bs, func(i, j int) bool {
+		return bs[i].ID < bs[j].ID
+	})
+	for i := range as {
+		a, b := as[i], bs[i]
+		if a.Name != b.Name {
+			t.Fatalf("expected t have Name %q; got %q", a.Name, b.Name)
+		}
+		if a.State != b.State {
+			t.Fatalf("expected t have State %q; got %q", a.State, b.State)
+		}
+		if a.Value != b.Value {
+			t.Fatalf("expected t have Value %f; got %f", a.Value, b.Value)
+		}
+		if !reflect.DeepEqual(a.Annotations, b.Annotations) {
+			t.Fatalf("expected to have annotations %#v; got %#v", a.Annotations, b.Annotations)
+		}
+		if !reflect.DeepEqual(a.Labels, b.Labels) {
+			t.Fatalf("expected to have labels %#v; got %#v", a.Labels, b.Labels)
+		}
+	}
+}
--- a/app/vmalert/main.go
+++ b/app/vmalert/main.go
@@ -2,13 +2,16 @@ package main

 import (
 	"context"
+	"crypto/tls"
+	"crypto/x509"
 	"flag"
 	"fmt"
+	"io/ioutil"
 	"net/http"
 	"net/url"
 	"os"
+	"strconv"
 	"strings"
-	"sync"
 	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
@@ -16,10 +19,12 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
+	"github.com/VictoriaMetrics/fasthttp"
 	"github.com/VictoriaMetrics/metrics"
 )

@@ -30,21 +35,72 @@ Examples:
 -rule /path/to/file. Path to a single file with alerting rules
 -rule dir/*.yaml -rule /*.yaml. Relative path to all .yaml files in "dir" folder, 
 absolute path to all .yaml files in root.`)
-	validateTemplates = flag.Bool("rule.validateTemplates", true, "Indicates to validate annotation and label templates")
-	httpListenAddr    = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
-	datasourceURL     = flag.String("datasource.url", "", "Victoria Metrics or VMSelect url. Required parameter. e.g. http://127.0.0.1:8428")
-	basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username to use for -datasource.url")
-	basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password to use for -datasource.url")
-	remoteWriteURL    = flag.String("remotewrite.url", "", "Optional URL to remote-write compatible storage where to write timeseries"+
-		"based on active alerts. E.g. http://127.0.0.1:8428")
-	evaluationInterval = flag.Duration("evaluationInterval", 1*time.Minute, "How often to evaluate the rules. Default 1m")
-	notifierURL        = flag.String("notifier.url", "", "Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093")
-	externalURL        = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
+
+	validateTemplates   = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
+	validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
+
+	httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections")
+
+	datasourceURL = flag.String("datasource.url", "", "Victoria Metrics or VMSelect url. Required parameter."+
+		" E.g. http://127.0.0.1:8428")
+	basicAuthUsername               = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url")
+	basicAuthPassword               = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url")
+	datasourceTLSInsecureSkipVerify = flag.Bool("datasource.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -datasource.url")
+	datasourceTLSCertFile           = flag.String("datasource.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -datasource.url")
+	datasourceTLSKeyFile            = flag.String("datasource.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -datasource.url")
+	datasourceTLSCAFile             = flag.String("datasource.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -datasource.url. "+
+		"By default system CA is used")
+	datasourceTLSServerName = flag.String("datasource.tlsServerName", "", "Optional TLS server name to use for connections to -datasource.url. "+
+		"By default the server name from -datasource.url is used")
+
+	remoteWriteURL = flag.String("remoteWrite.url", "", "Optional URL to Victoria Metrics or VMInsert where to persist alerts state"+
+		" and recording rules results in form of timeseries. E.g. http://127.0.0.1:8428")
+	remoteWriteUsername              = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username for -remoteWrite.url")
+	remoteWritePassword              = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password for -remoteWrite.url")
+	remoteWriteMaxQueueSize          = flag.Int("remoteWrite.maxQueueSize", 1e5, "Defines the max number of pending datapoints to remote write endpoint")
+	remoteWriteMaxBatchSize          = flag.Int("remoteWrite.maxBatchSize", 1e3, "Defines defines max number of timeseries to be flushed at once")
+	remoteWriteConcurrency           = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote storage")
+	remoteWriteTLSInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
+	remoteWriteTLSCertFile           = flag.String("remoteWrite.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url")
+	remoteWriteTLSKeyFile            = flag.String("remoteWrite.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteWrite.url")
+	remoteWriteTLSCAFile             = flag.String("remoteWrite.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteWrite.url. "+
+		"By default system CA is used")
+	remoteWriteTLSServerName = flag.String("remoteWrite.tlsServerName", "", "Optional TLS server name to use for connections to -remoteWrite.url. "+
+		"By default the server name from -remoteWrite.url is used")
+
+	remoteReadURL = flag.String("remoteRead.url", "", "Optional URL to Victoria Metrics or VMSelect that will be used to restore alerts"+
+		" state. This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state."+
+		" E.g. http://127.0.0.1:8428")
+	remoteReadUsername = flag.String("remoteRead.basicAuth.username", "", "Optional basic auth username for -remoteRead.url")
+	remoteReadPassword = flag.String("remoteRead.basicAuth.password", "", "Optional basic auth password for -remoteRead.url")
+	remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
+		" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
+	remoteReadTLSInsecureSkipVerify = flag.Bool("remoteRead.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteRead.url")
+	remoteReadTLSCertFile           = flag.String("remoteRead.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteRead.url")
+	remoteReadTLSKeyFile            = flag.String("remoteRead.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -remoteRead.url")
+	remoteReadTLSCAFile             = flag.String("remoteRead.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -remoteRead.url. "+
+		"By default system CA is used")
+	remoteReadTLSServerName = flag.String("remoteRead.tlsServerName", "", "Optional TLS server name to use for connections to -remoteRead.url. "+
+		"By default the server name from -remoteRead.url is used")
+
+	evaluationInterval            = flag.Duration("evaluationInterval", time.Minute, "How often to evaluate the rules")
+	notifierURL                   = flag.String("notifier.url", "", "Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093")
+	notifierTLSInsecureSkipVerify = flag.Bool("notifier.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -notifier.url")
+	notifierTLSCertFile           = flag.String("notifier.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -notifier.url")
+	notifierTLSKeyFile            = flag.String("notifier.tlsKeyFile", "", "Optional path to client-side TLS certificate key to use when connecting to -notifier.url")
+	notifierTLSCAFile             = flag.String("notifier.tlsCAFile", "", "Optional path to TLS CA file to use for verifying connections to -notifier.url. "+
+		"By default system CA is used")
+	notifierTLSServerName = flag.String("notifier.tlsServerName", "", "Optional TLS server name to use for connections to -notifier.url. "+
+		"By default the server name from -notifier.url is used")
+	externalURL         = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
+	externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
+eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|pathEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/api/v1/:groupID/alertID/status' is used`)
 )

-// TODO: hot configuration reload
-// TODO: alerts state persistence
 func main() {
+	// Write flags and help message to stdout, since it is easier to grep or pipe.
+	flag.CommandLine.SetOutput(os.Stdout)
+	flag.Usage = usage
 	envflag.Parse()
 	buildinfo.Init()
 	logger.Init()
@@ -52,44 +108,87 @@ func main() {
 	ctx, cancel := context.WithCancel(context.Background())
 	eu, err := getExternalURL(*externalURL, *httpListenAddr, httpserver.IsTLS())
 	if err != nil {
-		logger.Fatalf("can not get external url:%s ", err)
+		logger.Fatalf("can not get external url: %s ", err)
 	}
 	notifier.InitTemplateFunc(eu)
-
-	logger.Infof("reading alert rules configuration file from %s", strings.Join(*rulePath, ";"))
-	groups, err := Parse(*rulePath, *validateTemplates)
+	aug, err := getAlertURLGenerator(eu, *externalAlertSource, *validateTemplates)
 	if err != nil {
-		logger.Fatalf("cannot parse configuration file: %s", err)
+		logger.Fatalf("URL generator error: %s", err)
 	}

-	w := &watchdog{
-		storage: datasource.NewVMStorage(*datasourceURL, *basicAuthUsername, *basicAuthPassword, &http.Client{}),
-		alertProvider: notifier.NewAlertManager(*notifierURL, func(group, name string) string {
-			return fmt.Sprintf("%s/api/v1/%s/%s/status", eu, group, name)
-		}, &http.Client{}),
+	dst, err := getTransport(datasourceURL, datasourceTLSCertFile, datasourceTLSKeyFile, datasourceTLSCAFile, datasourceTLSServerName, datasourceTLSInsecureSkipVerify)
+	if err != nil {
+		logger.Fatalf("cannot create datasource transport: %s", err)
 	}

+	nt, err := getTransport(notifierURL, notifierTLSCertFile, notifierTLSKeyFile, notifierTLSCAFile, notifierTLSServerName, notifierTLSInsecureSkipVerify)
+	if err != nil {
+		logger.Fatalf("cannot create notifier transport: %s", err)
+	}
+
+	manager := &manager{
+		groups:   make(map[uint64]*Group),
+		storage:  datasource.NewVMStorage(*datasourceURL, *basicAuthUsername, *basicAuthPassword, &http.Client{Transport: dst}),
+		notifier: notifier.NewAlertManager(*notifierURL, aug, &http.Client{Transport: nt}),
+	}
 	if *remoteWriteURL != "" {
+		t, err := getTransport(remoteWriteURL, remoteWriteTLSCertFile, remoteWriteTLSKeyFile, remoteWriteTLSCAFile, remoteWriteTLSServerName, remoteWriteTLSInsecureSkipVerify)
+		if err != nil {
+			logger.Fatalf("cannot create remoteWrite transport: %s", err)
+		}
+
 		c, err := remotewrite.NewClient(ctx, remotewrite.Config{
 			Addr:          *remoteWriteURL,
+			Concurrency:   *remoteWriteConcurrency,
+			MaxQueueSize:  *remoteWriteMaxQueueSize,
+			MaxBatchSize:  *remoteWriteMaxBatchSize,
 			FlushInterval: *evaluationInterval,
+			BasicAuthUser: *remoteWriteUsername,
+			BasicAuthPass: *remoteWritePassword,
+			Transport:     t,
 		})
 		if err != nil {
 			logger.Fatalf("failed to init remotewrite client: %s", err)
 		}
-		w.rw = c
+		manager.rw = c
 	}

-	wg := sync.WaitGroup{}
-	for i := range groups {
-		wg.Add(1)
-		go func(group Group) {
-			w.run(ctx, group, *evaluationInterval)
-			wg.Done()
-		}(groups[i])
+	if *remoteReadURL != "" {
+		t, err := getTransport(remoteReadURL, remoteReadTLSCertFile, remoteReadTLSKeyFile, remoteReadTLSCAFile, remoteReadTLSServerName, remoteReadTLSInsecureSkipVerify)
+		if err != nil {
+			logger.Fatalf("cannot create remoteRead transport: %s", err)
+		}
+
+		manager.rr = datasource.NewVMStorage(*remoteReadURL, *remoteReadUsername, *remoteReadPassword, &http.Client{Transport: t})
 	}

-	go httpserver.Serve(*httpListenAddr, (&requestHandler{groups: groups}).handler)
+	if err := manager.start(ctx, *rulePath, *validateTemplates, *validateExpressions); err != nil {
+		logger.Fatalf("failed to start: %s", err)
+	}
+
+	go func() {
+		// init reload metrics with positive values to improve alerting conditions
+		configSuccess.Set(1)
+		configTimestamp.Set(fasttime.UnixTimestamp())
+		sigHup := procutil.NewSighupChan()
+		for {
+			<-sigHup
+			configReloads.Inc()
+			logger.Infof("SIGHUP received. Going to reload rules %q ...", *rulePath)
+			if err := manager.update(ctx, *rulePath, *validateTemplates, *validateExpressions, false); err != nil {
+				configReloadErrors.Inc()
+				configSuccess.Set(0)
+				logger.Errorf("error while reloading rules: %s", err)
+				continue
+			}
+			configSuccess.Set(1)
+			configTimestamp.Set(fasttime.UnixTimestamp())
+			logger.Infof("Rules reloaded successfully from %q", *rulePath)
+		}
+	}()
+
+	rh := &requestHandler{m: manager}
+	go httpserver.Serve(*httpListenAddr, rh.handler)

 	sig := procutil.WaitForSigterm()
 	logger.Infof("service received signal %s", sig)
@@ -97,91 +196,16 @@ func main() {
 		logger.Fatalf("cannot stop the webservice: %s", err)
 	}
 	cancel()
-	if w.rw != nil {
-		err := w.rw.Close()
-		if err != nil {
-			logger.Fatalf("cannot stop the remotewrite: %s", err)
-		}
-	}
-	wg.Wait()
-}
-
-type watchdog struct {
-	storage       *datasource.VMStorage
-	alertProvider notifier.Notifier
-	rw            *remotewrite.Client
+	manager.close()
 }

 var (
-	iterationTotal    = metrics.NewCounter(`vmalert_iteration_total`)
-	iterationDuration = metrics.NewSummary(`vmalert_iteration_duration_seconds`)
-
-	execTotal    = metrics.NewCounter(`vmalert_execution_total`)
-	execErrors   = metrics.NewCounter(`vmalert_execution_errors_total`)
-	execDuration = metrics.NewSummary(`vmalert_execution_duration_seconds`)
-
-	alertsFired      = metrics.NewCounter(`vmalert_alerts_fired_total`)
-	alertsSent       = metrics.NewCounter(`vmalert_alerts_sent_total`)
-	alertsSendErrors = metrics.NewCounter(`vmalert_alerts_send_errors_total`)
-
-	remoteWriteSent   = metrics.NewCounter(`vmalert_remotewrite_sent_total`)
-	remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
+	configReloads      = metrics.NewCounter(`vmalert_config_last_reload_total`)
+	configReloadErrors = metrics.NewCounter(`vmalert_config_last_reload_errors_total`)
+	configSuccess      = metrics.NewCounter(`vmalert_config_last_reload_successful`)
+	configTimestamp    = metrics.NewCounter(`vmalert_config_last_reload_success_timestamp_seconds`)
 )

-func (w *watchdog) run(ctx context.Context, group Group, evaluationInterval time.Duration) {
-	logger.Infof("watchdog for %s has been started", group.Name)
-	t := time.NewTicker(evaluationInterval)
-	defer t.Stop()
-	for {
-
-		select {
-		case <-t.C:
-			iterationTotal.Inc()
-			iterationStart := time.Now()
-			for _, rule := range group.Rules {
-				execTotal.Inc()
-
-				execStart := time.Now()
-				err := rule.Exec(ctx, w.storage)
-				execDuration.UpdateDuration(execStart)
-
-				if err != nil {
-					execErrors.Inc()
-					logger.Errorf("failed to execute rule %q.%q: %s", group.Name, rule.Name, err)
-					continue
-				}
-
-				var alertsToSend []notifier.Alert
-				for _, a := range rule.alerts {
-					if a.State != notifier.StatePending {
-						alertsToSend = append(alertsToSend, *a)
-					}
-					if a.State == notifier.StateInactive || w.rw == nil {
-						continue
-					}
-					tss := rule.AlertToTimeSeries(a, execStart)
-					for _, ts := range tss {
-						remoteWriteSent.Inc()
-						if err := w.rw.Push(ts); err != nil {
-							remoteWriteErrors.Inc()
-							logger.Errorf("failed to push timeseries to remotewrite: %s", err)
-						}
-					}
-				}
-				alertsSent.Add(len(alertsToSend))
-				if err := w.alertProvider.Send(alertsToSend); err != nil {
-					alertsSendErrors.Inc()
-					logger.Errorf("failed to send alert for rule %q.%q: %s", group.Name, rule.Name, err)
-				}
-			}
-			iterationDuration.UpdateDuration(iterationStart)
-		case <-ctx.Done():
-			logger.Infof("%s received stop signal", group.Name)
-			return
-		}
-	}
-}
-
 func getExternalURL(externalURL, httpListenAddr string, isSecure bool) (*url.URL, error) {
 	if externalURL != "" {
 		return url.Parse(externalURL)
@@ -201,6 +225,82 @@ func getExternalURL(externalURL, httpListenAddr string, isSecure bool) (*url.URL
 	return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
 }

+func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, validateTemplate bool) (notifier.AlertURLGenerator, error) {
+	if externalAlertSource == "" {
+		return func(alert notifier.Alert) string {
+			return fmt.Sprintf("%s/api/v1/%s/%s/status", externalURL, strconv.FormatUint(alert.GroupID, 10), strconv.FormatUint(alert.ID, 10))
+		}, nil
+	}
+	if validateTemplate {
+		if err := notifier.ValidateTemplates(map[string]string{
+			"tpl": externalAlertSource,
+		}); err != nil {
+			return nil, fmt.Errorf("error validating source template %s:%w", externalAlertSource, err)
+		}
+	}
+	m := map[string]string{
+		"tpl": externalAlertSource,
+	}
+	return func(alert notifier.Alert) string {
+		templated, err := alert.ExecTemplate(m)
+		if err != nil {
+			logger.Errorf("can not exec source template %s", err)
+		}
+		return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
+	}, nil
+}
+
+func getTLSConfig(certFile, keyFile, CAFile, serverName *string, insecureSkipVerify *bool) (*tls.Config, error) {
+	var certs []tls.Certificate
+	if *certFile != "" {
+		cert, err := tls.LoadX509KeyPair(*certFile, *keyFile)
+		if err != nil {
+			return nil, fmt.Errorf("cannot load TLS certificate from `cert_file`=%q, `key_file`=%q: %s", *certFile, *keyFile, err)
+		}
+
+		certs = []tls.Certificate{cert}
+	}
+
+	var rootCAs *x509.CertPool
+	if *CAFile != "" {
+		pem, err := ioutil.ReadFile(*CAFile)
+		if err != nil {
+			return nil, fmt.Errorf("cannot read `ca_file` %q: %s", *CAFile, err)
+		}
+
+		rootCAs = x509.NewCertPool()
+		if !rootCAs.AppendCertsFromPEM(pem) {
+			return nil, fmt.Errorf("cannot parse data from `ca_file` %q", *CAFile)
+		}
+	}
+
+	return &tls.Config{
+		Certificates:       certs,
+		InsecureSkipVerify: *insecureSkipVerify,
+		RootCAs:            rootCAs,
+		ServerName:         *serverName,
+	}, nil
+}
+
+func getTransport(URL, certFile, keyFile, CAFile, serverName *string, insecureSkipVerify *bool) (*http.Transport, error) {
+	var u fasthttp.URI
+	u.Update(*URL)
+
+	var t *http.Transport
+	if string(u.Scheme()) == "https" {
+		t = http.DefaultTransport.(*http.Transport).Clone()
+
+		tlsCfg, err := getTLSConfig(certFile, keyFile, CAFile, serverName, insecureSkipVerify)
+		if err != nil {
+			return nil, err
+		}
+
+		t.TLSClientConfig = tlsCfg
+	}
+
+	return t, nil
+}
+
 func checkFlags() {
 	if *notifierURL == "" {
 		flag.PrintDefaults()
@@ -211,3 +311,15 @@ func checkFlags() {
 		logger.Fatalf("datasource.url is empty")
 	}
 }
+
+func usage() {
+	const s = `
+vmalert processes alerts and recording rules.
+
+See the docs at https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/README.md .
+`
+
+	f := flag.CommandLine.Output()
+	fmt.Fprintf(f, "%s\n", s)
+	flag.PrintDefaults()
+}
--- a/app/vmalert/main_test.go
+++ b/app/vmalert/main_test.go
@@ -0,0 +1,105 @@
+package main
+
+import (
+	"fmt"
+	"net/url"
+	"os"
+	"testing"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
+)
+
+func TestGetExternalURL(t *testing.T) {
+	expURL := "https://vicotriametrics.com/path"
+	u, err := getExternalURL(expURL, "", false)
+	if err != nil {
+		t.Errorf("unexpected error %s", err)
+	}
+	if u.String() != expURL {
+		t.Errorf("unexpected url want %s, got %s", expURL, u.String())
+	}
+	h, _ := os.Hostname()
+	expURL = fmt.Sprintf("https://%s:4242", h)
+	u, err = getExternalURL("", "0.0.0.0:4242", true)
+	if err != nil {
+		t.Errorf("unexpected error %s", err)
+	}
+	if u.String() != expURL {
+		t.Errorf("unexpected url want %s, got %s", expURL, u.String())
+	}
+}
+
+func TestGetAlertURLGenerator(t *testing.T) {
+	testAlert := notifier.Alert{GroupID: 42, ID: 2, Value: 4}
+	u, _ := url.Parse("https://victoriametrics.com/path")
+	fn, err := getAlertURLGenerator(u, "", false)
+	if err != nil {
+		t.Errorf("unexpected error %s", err)
+	}
+	if exp := "https://victoriametrics.com/path/api/v1/42/2/status"; exp != fn(testAlert) {
+		t.Errorf("unexpected url want %s, got %s", exp, fn(testAlert))
+	}
+	_, err = getAlertURLGenerator(nil, "foo?{{invalid}}", true)
+	if err == nil {
+		t.Errorf("exptected tempalte validation error got nil")
+	}
+	fn, err = getAlertURLGenerator(u, "foo?query={{$value}}", true)
+	if err != nil {
+		t.Errorf("unexpected error %s", err)
+	}
+	if exp := "https://victoriametrics.com/path/foo?query=4"; exp != fn(testAlert) {
+		t.Errorf("unexpected url want %s, got %s", exp, fn(testAlert))
+	}
+}
+
+func TestGetTLSConfig(t *testing.T) {
+	var certFile, keyFile, CAFile, serverName string
+	var insecureSkipVerify bool
+	serverName = "test"
+	insecureSkipVerify = true
+	tlsCfg, err := getTLSConfig(&certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
+	if err != nil {
+		t.Errorf("unexpected error %s", err)
+	}
+	if tlsCfg == nil {
+		t.Errorf("expected tlsConfig to be set, got nil")
+	}
+	if tlsCfg.ServerName != serverName {
+		t.Errorf("unexpected ServerName, want %s, got %s", serverName, tlsCfg.ServerName)
+	}
+	if tlsCfg.InsecureSkipVerify != insecureSkipVerify {
+		t.Errorf("unexpected InsecureSkipVerify, want %v, got %v", insecureSkipVerify, tlsCfg.InsecureSkipVerify)
+	}
+	certFile = "/path/to/nonexisting/cert/file"
+	_, err = getTLSConfig(&certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
+	if err == nil {
+		t.Errorf("expected keypair error, got nil")
+	}
+	certFile = ""
+	CAFile = "/path/to/nonexisting/cert/file"
+	_, err = getTLSConfig(&certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
+	if err == nil {
+		t.Errorf("expected read error, got nil")
+	}
+}
+
+func TestGetTransport(t *testing.T) {
+	var certFile, keyFile, CAFile, serverName string
+	var insecureSkipVerify bool
+	URL := "http://victoriametrics.com"
+	tr, err := getTransport(&URL, &certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
+	if err != nil {
+		t.Errorf("unexpected error %s", err)
+	}
+	if tr != nil {
+		t.Errorf("expected Transport to be nil, got %v", tr)
+	}
+	URL = "https://victoriametrics.com"
+	tr, err = getTransport(&URL, &certFile, &keyFile, &CAFile, &serverName, &insecureSkipVerify)
+	if err != nil {
+		t.Errorf("unexpected error %s", err)
+	}
+	if tr.TLSClientConfig == nil {
+		t.Errorf("expected TLSClientConfig to be set, got nil")
+	}
+}
--- a/app/vmalert/manager.go
+++ b/app/vmalert/manager.go
@@ -0,0 +1,135 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"sync"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+)
+
+// manager controls group states
+type manager struct {
+	storage  datasource.Querier
+	notifier notifier.Notifier
+
+	rw *remotewrite.Client
+	rr datasource.Querier
+
+	wg sync.WaitGroup
+
+	groupsMu sync.RWMutex
+	groups   map[uint64]*Group
+}
+
+// AlertAPI generates APIAlert object from alert by its ID(hash)
+func (m *manager) AlertAPI(gID, aID uint64) (*APIAlert, error) {
+	m.groupsMu.RLock()
+	defer m.groupsMu.RUnlock()
+
+	g, ok := m.groups[gID]
+	if !ok {
+		return nil, fmt.Errorf("can't find group with id %q", gID)
+	}
+	for _, rule := range g.Rules {
+		ar, ok := rule.(*AlertingRule)
+		if !ok {
+			continue
+		}
+		if apiAlert := ar.AlertAPI(aID); apiAlert != nil {
+			return apiAlert, nil
+		}
+	}
+	return nil, fmt.Errorf("can't find alert with id %q in group %q", aID, g.Name)
+}
+
+func (m *manager) start(ctx context.Context, path []string, validateTpl, validateExpr bool) error {
+	return m.update(ctx, path, validateTpl, validateExpr, true)
+}
+
+func (m *manager) close() {
+	if m.rw != nil {
+		err := m.rw.Close()
+		if err != nil {
+			logger.Fatalf("cannot stop the remotewrite: %s", err)
+		}
+	}
+	m.wg.Wait()
+}
+
+func (m *manager) startGroup(ctx context.Context, group *Group, restore bool) {
+	if restore && m.rr != nil {
+		err := group.Restore(ctx, m.rr, *remoteReadLookBack)
+		if err != nil {
+			logger.Errorf("error while restoring state for group %q: %s", group.Name, err)
+		}
+	}
+
+	m.wg.Add(1)
+	id := group.ID()
+	go func() {
+		group.start(ctx, m.storage, m.notifier, m.rw)
+		m.wg.Done()
+	}()
+	m.groups[id] = group
+}
+
+func (m *manager) update(ctx context.Context, path []string, validateTpl, validateExpr, restore bool) error {
+	logger.Infof("reading rules configuration file from %q", strings.Join(path, ";"))
+	groupsCfg, err := config.Parse(path, validateTpl, validateExpr)
+	if err != nil {
+		return fmt.Errorf("cannot parse configuration file: %s", err)
+	}
+
+	groupsRegistry := make(map[uint64]*Group)
+	for _, cfg := range groupsCfg {
+		ng := newGroup(cfg, *evaluationInterval)
+		groupsRegistry[ng.ID()] = ng
+	}
+
+	m.groupsMu.Lock()
+	for _, og := range m.groups {
+		ng, ok := groupsRegistry[og.ID()]
+		if !ok {
+			// old group is not present in new list
+			// and must be stopped and deleted
+			og.close()
+			delete(m.groups, og.ID())
+			og = nil
+			continue
+		}
+		og.updateCh <- ng
+		delete(groupsRegistry, ng.ID())
+	}
+
+	for _, ng := range groupsRegistry {
+		m.startGroup(ctx, ng, restore)
+	}
+	m.groupsMu.Unlock()
+	return nil
+}
+
+func (g *Group) toAPI() APIGroup {
+	ag := APIGroup{
+		// encode as strings to avoid rounding
+		ID:          fmt.Sprintf("%d", g.ID()),
+		Name:        g.Name,
+		File:        g.File,
+		Interval:    g.Interval.String(),
+		Concurrency: g.Concurrency,
+	}
+	for _, r := range g.Rules {
+		switch v := r.(type) {
+		case *AlertingRule:
+			ag.AlertingRules = append(ag.AlertingRules, v.RuleAPI())
+		case *RecordingRule:
+			ag.RecordingRules = append(ag.RecordingRules, v.RuleAPI())
+		}
+	}
+	return ag
+}
--- a/app/vmalert/manager_test.go
+++ b/app/vmalert/manager_test.go
@@ -0,0 +1,211 @@
+package main
+
+import (
+	"context"
+	"math/rand"
+	"net/url"
+	"os"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
+)
+
+func TestMain(m *testing.M) {
+	u, _ := url.Parse("https://victoriametrics.com/path")
+	notifier.InitTemplateFunc(u)
+	os.Exit(m.Run())
+}
+
+func TestManagerUpdateError(t *testing.T) {
+	m := &manager{groups: make(map[uint64]*Group)}
+	path := []string{"foo/bar"}
+	err := m.update(context.Background(), path, true, true, false)
+	if err == nil {
+		t.Fatalf("expected to have err; got nil instead")
+	}
+	expErr := "no groups found"
+	if !strings.Contains(err.Error(), expErr) {
+		t.Fatalf("expected to got err %s; got %s", expErr, err)
+	}
+}
+
+// TestManagerUpdateConcurrent supposed to test concurrent
+// execution of configuration update.
+// Should be executed with -race flag
+func TestManagerUpdateConcurrent(t *testing.T) {
+	m := &manager{
+		groups:   make(map[uint64]*Group),
+		storage:  &fakeQuerier{},
+		notifier: &fakeNotifier{},
+	}
+	paths := []string{
+		"config/testdata/dir/rules0-good.rules",
+		"config/testdata/dir/rules0-bad.rules",
+		"config/testdata/dir/rules1-good.rules",
+		"config/testdata/dir/rules1-bad.rules",
+		"config/testdata/rules0-good.rules",
+		"config/testdata/rules1-good.rules",
+		"config/testdata/rules2-good.rules",
+	}
+	*evaluationInterval = time.Millisecond
+	if err := m.start(context.Background(), []string{paths[0]}, true, true); err != nil {
+		t.Fatalf("failed to start: %s", err)
+	}
+
+	const workers = 500
+	const iterations = 10
+	wg := sync.WaitGroup{}
+	wg.Add(workers)
+	for i := 0; i < workers; i++ {
+		go func() {
+			defer wg.Done()
+			for i := 0; i < iterations; i++ {
+				rnd := rand.Intn(len(paths))
+				path := []string{paths[rnd]}
+				_ = m.update(context.Background(), path, true, true, false)
+			}
+		}()
+	}
+	wg.Wait()
+}
+
+// TestManagerUpdate tests sequential configuration
+// updates.
+func TestManagerUpdate(t *testing.T) {
+	const defaultEvalInterval = time.Second * 30
+	currentEvalInterval := *evaluationInterval
+	*evaluationInterval = defaultEvalInterval
+	defer func() {
+		*evaluationInterval = currentEvalInterval
+	}()
+
+	var (
+		VMRows = &AlertingRule{
+			Name: "VMRows",
+			Expr: "vm_rows > 0",
+			For:  10 * time.Second,
+			Labels: map[string]string{
+				"label": "bar",
+				"host":  "{{ $labels.instance }}",
+			},
+			Annotations: map[string]string{
+				"summary":     "{{ $value|humanize }}",
+				"description": "{{$labels}}",
+			},
+		}
+		Conns = &AlertingRule{
+			Name: "Conns",
+			Expr: "sum(vm_tcplistener_conns) by(instance) > 1",
+			Annotations: map[string]string{
+				"summary":     "Too high connection number for {{$labels.instance}}",
+				"description": "It is {{ $value }} connections for {{$labels.instance}}",
+			},
+		}
+		ExampleAlertAlwaysFiring = &AlertingRule{
+			Name: "ExampleAlertAlwaysFiring",
+			Expr: "sum by(job) (up == 1)",
+		}
+	)
+
+	testCases := []struct {
+		name       string
+		initPath   string
+		updatePath string
+		want       []*Group
+	}{
+		{
+			name:       "update good rules",
+			initPath:   "config/testdata/rules0-good.rules",
+			updatePath: "config/testdata/dir/rules1-good.rules",
+			want: []*Group{
+				{
+					File:     "config/testdata/dir/rules1-good.rules",
+					Name:     "duplicatedGroupDiffFiles",
+					Interval: defaultEvalInterval,
+					Rules: []Rule{
+						&AlertingRule{
+							Name:   "VMRows",
+							Expr:   "vm_rows > 0",
+							For:    5 * time.Minute,
+							Labels: map[string]string{"label": "bar"},
+							Annotations: map[string]string{
+								"summary":     "{{ $value }}",
+								"description": "{{$labels}}",
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name:       "update good rules from 1 to 2 groups",
+			initPath:   "config/testdata/dir/rules1-good.rules",
+			updatePath: "config/testdata/rules0-good.rules",
+			want: []*Group{
+				{
+					File:     "config/testdata/rules0-good.rules",
+					Name:     "groupGorSingleAlert",
+					Rules:    []Rule{VMRows},
+					Interval: defaultEvalInterval,
+				},
+				{
+					File:     "config/testdata/rules0-good.rules",
+					Interval: defaultEvalInterval,
+					Name:     "TestGroup", Rules: []Rule{
+						Conns,
+						ExampleAlertAlwaysFiring,
+					}},
+			},
+		},
+		{
+			name:       "update with one bad rule file",
+			initPath:   "config/testdata/rules0-good.rules",
+			updatePath: "config/testdata/dir/rules2-bad.rules",
+			want: []*Group{
+				{
+					File:     "config/testdata/rules0-good.rules",
+					Name:     "groupGorSingleAlert",
+					Interval: defaultEvalInterval,
+					Rules:    []Rule{VMRows},
+				},
+				{
+					File:     "config/testdata/rules0-good.rules",
+					Interval: defaultEvalInterval,
+					Name:     "TestGroup", Rules: []Rule{
+						Conns,
+						ExampleAlertAlwaysFiring,
+					}},
+			},
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			ctx, cancel := context.WithCancel(context.TODO())
+			m := &manager{groups: make(map[uint64]*Group), storage: &fakeQuerier{}}
+			path := []string{tc.initPath}
+			if err := m.update(ctx, path, true, true, false); err != nil {
+				t.Fatalf("failed to complete initial rules update: %s", err)
+			}
+
+			path = []string{tc.updatePath}
+			_ = m.update(ctx, path, true, true, false)
+			if len(tc.want) != len(m.groups) {
+				t.Fatalf("\nwant number of groups: %d;\ngot: %d ", len(tc.want), len(m.groups))
+			}
+
+			for _, wantG := range tc.want {
+				gotG, ok := m.groups[wantG.ID()]
+				if !ok {
+					t.Fatalf("expected to have group %q", wantG.Name)
+				}
+				compareGroups(t, wantG, gotG)
+			}
+
+			cancel()
+			m.close()
+		})
+	}
+}
--- a/app/vmalert/notifier/alert.go
+++ b/app/vmalert/notifier/alert.go
@@ -12,12 +12,13 @@ import (
 // Alert the triggered alert
 // TODO: Looks like alert name isn't unique
 type Alert struct {
-	Group       string
+	GroupID     uint64
 	Name        string
 	Labels      map[string]string
 	Annotations map[string]string
 	State       AlertState

+	Expr  string
 	Start time.Time
 	End   time.Time
 	Value float64
@@ -52,14 +53,15 @@ func (as AlertState) String() string {
 type alertTplData struct {
 	Labels map[string]string
 	Value  float64
+	Expr   string
 }

-const tplHeader = `{{ $value := .Value }}{{ $labels := .Labels }}`
+const tplHeader = `{{ $value := .Value }}{{ $labels := .Labels }}{{ $expr := .Expr }}`

 // ExecTemplate executes the Alert template for give
 // map of annotations.
 func (a *Alert) ExecTemplate(annotations map[string]string) (map[string]string, error) {
-	tplData := alertTplData{Value: a.Value, Labels: a.Labels}
+	tplData := alertTplData{Value: a.Value, Labels: a.Labels, Expr: a.Expr}
 	return templateAnnotations(annotations, tplHeader, tplData)
 }

@@ -85,7 +87,7 @@ func templateAnnotations(annotations map[string]string, header string, data aler
 		builder.WriteString(header)
 		builder.WriteString(text)
 		if err := templateAnnotation(&buf, builder.String(), data); err != nil {
-			eg.errs = append(eg.errs, fmt.Sprintf("key %s, template %s:%s", key, text, err))
+			eg.errs = append(eg.errs, fmt.Sprintf("key %q, template %q: %s", key, text, err))
 			continue
 		}
 		r[key] = buf.String()
@@ -96,10 +98,10 @@ func templateAnnotations(annotations map[string]string, header string, data aler
 func templateAnnotation(dst io.Writer, text string, data alertTplData) error {
 	tpl, err := template.New("").Funcs(tmplFunc).Option("missingkey=zero").Parse(text)
 	if err != nil {
-		return fmt.Errorf("error parsing annotation:%w", err)
+		return fmt.Errorf("error parsing annotation: %w", err)
 	}
 	if err = tpl.Execute(dst, data); err != nil {
-		return fmt.Errorf("error evaluating annotation template:%w", err)
+		return fmt.Errorf("error evaluating annotation template: %w", err)
 	}
 	return nil
 }
--- a/app/vmalert/notifier/alert_test.go
+++ b/app/vmalert/notifier/alert_test.go
@@ -1,22 +1,24 @@
 package notifier

 import (
-	"fmt"
 	"testing"
 )

 func TestAlert_ExecTemplate(t *testing.T) {
 	testCases := []struct {
+		name        string
 		alert       *Alert
 		annotations map[string]string
 		expTpl      map[string]string
 	}{
 		{
+			name:        "empty-alert",
 			alert:       &Alert{},
 			annotations: map[string]string{},
 			expTpl:      map[string]string{},
 		},
 		{
+			name: "no-template",
 			alert: &Alert{
 				Value: 1e4,
 				Labels: map[string]string{
@@ -27,6 +29,7 @@ func TestAlert_ExecTemplate(t *testing.T) {
 			expTpl:      map[string]string{},
 		},
 		{
+			name: "label-template",
 			alert: &Alert{
 				Value: 1e4,
 				Labels: map[string]string{
@@ -43,10 +46,24 @@ func TestAlert_ExecTemplate(t *testing.T) {
 				"description": "It is 10000 connections for localhost",
 			},
 		},
+		{
+			name: "expression-template",
+			alert: &Alert{
+				Expr: `vm_rows{"label"="bar"}>0`,
+			},
+			annotations: map[string]string{
+				"exprEscapedQuery": "{{ $expr|quotesEscape|queryEscape }}",
+				"exprEscapedPath":  "{{ $expr|quotesEscape|pathEscape }}",
+			},
+			expTpl: map[string]string{
+				"exprEscapedQuery": "vm_rows%7B%5C%22label%5C%22%3D%5C%22bar%5C%22%7D%3E0",
+				"exprEscapedPath":  "vm_rows%7B%5C%22label%5C%22=%5C%22bar%5C%22%7D%3E0",
+			},
+		},
 	}

-	for i, tc := range testCases {
-		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
 			tpl, err := tc.alert.ExecTemplate(tc.annotations)
 			if err != nil {
 				t.Fatal(err)
--- a/app/vmalert/notifier/alertmanager.go
+++ b/app/vmalert/notifier/alertmanager.go
@@ -2,6 +2,7 @@ package notifier

 import (
 	"bytes"
+	"context"
 	"fmt"
 	"io/ioutil"
 	"net/http"
@@ -17,13 +18,21 @@ type AlertManager struct {
 }

 // Send an alert or resolve message
-func (am *AlertManager) Send(alerts []Alert) error {
+func (am *AlertManager) Send(ctx context.Context, alerts []Alert) error {
 	b := &bytes.Buffer{}
 	writeamRequest(b, alerts, am.argFunc)
-	resp, err := am.client.Post(am.alertURL, "application/json", b)
+
+	req, err := http.NewRequest("POST", am.alertURL, b)
 	if err != nil {
 		return err
 	}
+	req.Header.Set("Content-Type", "application/json")
+	req = req.WithContext(ctx)
+	resp, err := am.client.Do(req)
+	if err != nil {
+		return err
+	}
+
 	defer func() { _ = resp.Body.Close() }()

 	if resp.StatusCode != http.StatusOK {
@@ -37,7 +46,7 @@ func (am *AlertManager) Send(alerts []Alert) error {
 }

 // AlertURLGenerator returns URL to single alert by given name
-type AlertURLGenerator func(group, id string) string
+type AlertURLGenerator func(Alert) string

 const alertManagerPath = "/api/v2/alerts"

--- a/app/vmalert/notifier/alertmanager_request.qtpl
+++ b/app/vmalert/notifier/alertmanager_request.qtpl
@@ -1,15 +1,14 @@
 {% import (
-    "strconv"
    "time"
 ) %}
 {% stripspace %}

-{% func amRequest(alerts []Alert, generatorURL func(string, string) string) %}
+{% func amRequest(alerts []Alert, generatorURL func(Alert) string) %}
 [
 {% for i, alert := range alerts %}
 {
-	"startsAt":{%q= alert.Start.Format(time.RFC3339Nano) %},
-	"generatorURL": {%q= generatorURL(alert.Group, strconv.FormatUint(alert.ID, 10)) %},
+    "startsAt":{%q= alert.Start.Format(time.RFC3339Nano) %},
+    "generatorURL": {%q= generatorURL(alert) %},
    {% if !alert.End.IsZero() %}
    "endsAt":{%q= alert.End.Format(time.RFC3339Nano) %},
    {% endif %}
--- a/app/vmalert/notifier/alertmanager_request.qtpl.go
+++ b/app/vmalert/notifier/alertmanager_request.qtpl.go
@@ -6,126 +6,125 @@ package notifier

 //line app/vmalert/notifier/alertmanager_request.qtpl:1
 import (
-	"strconv"
 	"time"
 )

-//line app/vmalert/notifier/alertmanager_request.qtpl:7
+//line app/vmalert/notifier/alertmanager_request.qtpl:6
 import (
 	qtio422016 "io"

 	qt422016 "github.com/valyala/quicktemplate"
 )

-//line app/vmalert/notifier/alertmanager_request.qtpl:7
+//line app/vmalert/notifier/alertmanager_request.qtpl:6
 var (
 	_ = qtio422016.Copy
 	_ = qt422016.AcquireByteBuffer
 )

-//line app/vmalert/notifier/alertmanager_request.qtpl:7
-func streamamRequest(qw422016 *qt422016.Writer, alerts []Alert, generatorURL func(string, string) string) {
-//line app/vmalert/notifier/alertmanager_request.qtpl:7
+//line app/vmalert/notifier/alertmanager_request.qtpl:6
+func streamamRequest(qw422016 *qt422016.Writer, alerts []Alert, generatorURL func(Alert) string) {
+//line app/vmalert/notifier/alertmanager_request.qtpl:6
 	qw422016.N().S(`[`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:9
+//line app/vmalert/notifier/alertmanager_request.qtpl:8
 	for i, alert := range alerts {
-//line app/vmalert/notifier/alertmanager_request.qtpl:9
+//line app/vmalert/notifier/alertmanager_request.qtpl:8
 		qw422016.N().S(`{"startsAt":`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:11
+//line app/vmalert/notifier/alertmanager_request.qtpl:10
 		qw422016.N().Q(alert.Start.Format(time.RFC3339Nano))
-//line app/vmalert/notifier/alertmanager_request.qtpl:11
+//line app/vmalert/notifier/alertmanager_request.qtpl:10
 		qw422016.N().S(`,"generatorURL":`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:12
-		qw422016.N().Q(generatorURL(alert.Group, strconv.FormatUint(alert.ID, 10)))
-//line app/vmalert/notifier/alertmanager_request.qtpl:12
+//line app/vmalert/notifier/alertmanager_request.qtpl:11
+		qw422016.N().Q(generatorURL(alert))
+//line app/vmalert/notifier/alertmanager_request.qtpl:11
 		qw422016.N().S(`,`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:13
+//line app/vmalert/notifier/alertmanager_request.qtpl:12
 		if !alert.End.IsZero() {
-//line app/vmalert/notifier/alertmanager_request.qtpl:13
+//line app/vmalert/notifier/alertmanager_request.qtpl:12
 			qw422016.N().S(`"endsAt":`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:14
+//line app/vmalert/notifier/alertmanager_request.qtpl:13
 			qw422016.N().Q(alert.End.Format(time.RFC3339Nano))
+//line app/vmalert/notifier/alertmanager_request.qtpl:13
+			qw422016.N().S(`,`)
 //line app/vmalert/notifier/alertmanager_request.qtpl:14
-			qw422016.N().S(`,`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:15
 		}
-//line app/vmalert/notifier/alertmanager_request.qtpl:15
+//line app/vmalert/notifier/alertmanager_request.qtpl:14
 		qw422016.N().S(`"labels": {"alertname":`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:17
+//line app/vmalert/notifier/alertmanager_request.qtpl:16
 		qw422016.N().Q(alert.Name)
-//line app/vmalert/notifier/alertmanager_request.qtpl:18
+//line app/vmalert/notifier/alertmanager_request.qtpl:17
 		for k, v := range alert.Labels {
-//line app/vmalert/notifier/alertmanager_request.qtpl:18
+//line app/vmalert/notifier/alertmanager_request.qtpl:17
 			qw422016.N().S(`,`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:19
+//line app/vmalert/notifier/alertmanager_request.qtpl:18
 			qw422016.N().Q(k)
-//line app/vmalert/notifier/alertmanager_request.qtpl:19
+//line app/vmalert/notifier/alertmanager_request.qtpl:18
 			qw422016.N().S(`:`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:19
+//line app/vmalert/notifier/alertmanager_request.qtpl:18
 			qw422016.N().Q(v)
-//line app/vmalert/notifier/alertmanager_request.qtpl:20
+//line app/vmalert/notifier/alertmanager_request.qtpl:19
 		}
-//line app/vmalert/notifier/alertmanager_request.qtpl:20
+//line app/vmalert/notifier/alertmanager_request.qtpl:19
 		qw422016.N().S(`},"annotations": {`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:23
+//line app/vmalert/notifier/alertmanager_request.qtpl:22
 		c := len(alert.Annotations)

-//line app/vmalert/notifier/alertmanager_request.qtpl:24
+//line app/vmalert/notifier/alertmanager_request.qtpl:23
 		for k, v := range alert.Annotations {
-//line app/vmalert/notifier/alertmanager_request.qtpl:25
+//line app/vmalert/notifier/alertmanager_request.qtpl:24
 			c = c - 1

-//line app/vmalert/notifier/alertmanager_request.qtpl:26
+//line app/vmalert/notifier/alertmanager_request.qtpl:25
 			qw422016.N().Q(k)
-//line app/vmalert/notifier/alertmanager_request.qtpl:26
+//line app/vmalert/notifier/alertmanager_request.qtpl:25
 			qw422016.N().S(`:`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:26
+//line app/vmalert/notifier/alertmanager_request.qtpl:25
 			qw422016.N().Q(v)
-//line app/vmalert/notifier/alertmanager_request.qtpl:26
+//line app/vmalert/notifier/alertmanager_request.qtpl:25
 			if c > 0 {
-//line app/vmalert/notifier/alertmanager_request.qtpl:26
+//line app/vmalert/notifier/alertmanager_request.qtpl:25
 				qw422016.N().S(`,`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:26
+//line app/vmalert/notifier/alertmanager_request.qtpl:25
 			}
-//line app/vmalert/notifier/alertmanager_request.qtpl:27
+//line app/vmalert/notifier/alertmanager_request.qtpl:26
 		}
-//line app/vmalert/notifier/alertmanager_request.qtpl:27
+//line app/vmalert/notifier/alertmanager_request.qtpl:26
 		qw422016.N().S(`}}`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:30
+//line app/vmalert/notifier/alertmanager_request.qtpl:29
 		if i != len(alerts)-1 {
-//line app/vmalert/notifier/alertmanager_request.qtpl:30
+//line app/vmalert/notifier/alertmanager_request.qtpl:29
 			qw422016.N().S(`,`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:30
+//line app/vmalert/notifier/alertmanager_request.qtpl:29
 		}
-//line app/vmalert/notifier/alertmanager_request.qtpl:31
+//line app/vmalert/notifier/alertmanager_request.qtpl:30
 	}
-//line app/vmalert/notifier/alertmanager_request.qtpl:31
+//line app/vmalert/notifier/alertmanager_request.qtpl:30
 	qw422016.N().S(`]`)
-//line app/vmalert/notifier/alertmanager_request.qtpl:33
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
 }

-//line app/vmalert/notifier/alertmanager_request.qtpl:33
-func writeamRequest(qq422016 qtio422016.Writer, alerts []Alert, generatorURL func(string, string) string) {
-//line app/vmalert/notifier/alertmanager_request.qtpl:33
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
+func writeamRequest(qq422016 qtio422016.Writer, alerts []Alert, generatorURL func(Alert) string) {
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
 	qw422016 := qt422016.AcquireWriter(qq422016)
-//line app/vmalert/notifier/alertmanager_request.qtpl:33
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
 	streamamRequest(qw422016, alerts, generatorURL)
-//line app/vmalert/notifier/alertmanager_request.qtpl:33
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
 	qt422016.ReleaseWriter(qw422016)
-//line app/vmalert/notifier/alertmanager_request.qtpl:33
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
 }

-//line app/vmalert/notifier/alertmanager_request.qtpl:33
-func amRequest(alerts []Alert, generatorURL func(string, string) string) string {
-//line app/vmalert/notifier/alertmanager_request.qtpl:33
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
+func amRequest(alerts []Alert, generatorURL func(Alert) string) string {
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
 	qb422016 := qt422016.AcquireByteBuffer()
-//line app/vmalert/notifier/alertmanager_request.qtpl:33
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
 	writeamRequest(qb422016, alerts, generatorURL)
-//line app/vmalert/notifier/alertmanager_request.qtpl:33
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
 	qs422016 := string(qb422016.B)
-//line app/vmalert/notifier/alertmanager_request.qtpl:33
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
 	qt422016.ReleaseByteBuffer(qb422016)
-//line app/vmalert/notifier/alertmanager_request.qtpl:33
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
 	return qs422016
-//line app/vmalert/notifier/alertmanager_request.qtpl:33
+//line app/vmalert/notifier/alertmanager_request.qtpl:32
 }
--- a/app/vmalert/notifier/alertmanager_test.go
+++ b/app/vmalert/notifier/alertmanager_test.go
@@ -1,9 +1,11 @@
 package notifier

 import (
+	"context"
 	"encoding/json"
 	"net/http"
 	"net/http/httptest"
+	"strconv"
 	"testing"
 	"time"
 )
@@ -40,8 +42,8 @@ func TestAlertManager_Send(t *testing.T) {
 			if len(a) != 1 {
 				t.Errorf("expected 1 alert in array got %d", len(a))
 			}
-			if a[0].GeneratorURL != "group0" {
-				t.Errorf("exptected alert0 as generatorURL got %s", a[0].GeneratorURL)
+			if a[0].GeneratorURL != "0/0" {
+				t.Errorf("exptected 0/0 as generatorURL got %s", a[0].GeneratorURL)
 			}
 			if a[0].Labels["alertname"] != "alert0" {
 				t.Errorf("exptected alert0 as alert name got %s", a[0].Labels["alertname"])
@@ -56,17 +58,17 @@ func TestAlertManager_Send(t *testing.T) {
 	})
 	srv := httptest.NewServer(mux)
 	defer srv.Close()
-	am := NewAlertManager(srv.URL, func(group, name string) string {
-		return group + name
+	am := NewAlertManager(srv.URL, func(alert Alert) string {
+		return strconv.FormatUint(alert.GroupID, 10) + "/" + strconv.FormatUint(alert.ID, 10)
 	}, srv.Client())
-	if err := am.Send([]Alert{{}, {}}); err == nil {
+	if err := am.Send(context.Background(), []Alert{{}, {}}); err == nil {
 		t.Error("expected connection error got nil")
 	}
-	if err := am.Send([]Alert{}); err == nil {
+	if err := am.Send(context.Background(), []Alert{}); err == nil {
 		t.Error("expected wrong http code error got nil")
 	}
-	if err := am.Send([]Alert{{
-		Group:       "group",
+	if err := am.Send(context.Background(), []Alert{{
+		GroupID:     0,
 		Name:        "alert0",
 		Start:       time.Now().UTC(),
 		End:         time.Now().UTC(),
--- a/app/vmalert/notifier/notifier.go
+++ b/app/vmalert/notifier/notifier.go
@@ -1,6 +1,8 @@
 package notifier

+import "context"
+
 // Notifier is common interface for alert manager provider
 type Notifier interface {
-	Send(alerts []Alert) error
+	Send(ctx context.Context, alerts []Alert) error
 }
--- a/app/vmalert/notifier/package_test.go
+++ b/app/vmalert/notifier/package_test.go
@@ -0,0 +1,13 @@
+package notifier
+
+import (
+	"net/url"
+	"os"
+	"testing"
+)
+
+func TestMain(m *testing.M) {
+	u, _ := url.Parse("https://victoriametrics.com/path")
+	InitTemplateFunc(u)
+	os.Exit(m.Run())
+}
--- a/app/vmalert/notifier/template_func.go
+++ b/app/vmalert/notifier/template_func.go
@@ -142,6 +142,15 @@ func InitTemplateFunc(externalURL *url.URL) {
 		"externalURL": func() string {
 			return externalURL.String()
 		},
+		"pathEscape": func(u string) string {
+			return url.PathEscape(u)
+		},
+		"queryEscape": func(q string) string {
+			return url.QueryEscape(q)
+		},
+		"quotesEscape": func(q string) string {
+			return strings.Replace(q, `"`, `\"`, -1)
+		},
 	}
 }

--- a/app/vmalert/notifier/utils.go
+++ b/app/vmalert/notifier/utils.go
@@ -17,5 +17,5 @@ func (eg *errGroup) err() error {
 }

 func (eg *errGroup) Error() string {
-	return fmt.Sprintf("errors:%s", strings.Join(eg.errs, "\n"))
+	return fmt.Sprintf("errors: %s", strings.Join(eg.errs, "\n"))
 }
--- a/app/vmalert/recording.go
+++ b/app/vmalert/recording.go
@@ -0,0 +1,149 @@
+package main
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"hash/fnv"
+	"sort"
+	"sync"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
+)
+
+// RecordingRule is a Rule that supposed
+// to evaluate configured Expression and
+// return TimeSeries as result.
+type RecordingRule struct {
+	RuleID  uint64
+	Name    string
+	Expr    string
+	Labels  map[string]string
+	GroupID uint64
+
+	// guard status fields
+	mu sync.RWMutex
+	// stores last moment of time Exec was called
+	lastExecTime time.Time
+	// stores last error that happened in Exec func
+	// resets on every successful Exec
+	// may be used as Health state
+	lastExecError error
+}
+
+// String implements Stringer interface
+func (rr *RecordingRule) String() string {
+	return rr.Name
+}
+
+// ID returns unique Rule ID
+// within the parent Group.
+func (rr *RecordingRule) ID() uint64 {
+	return rr.RuleID
+}
+
+func newRecordingRule(gID uint64, cfg config.Rule) *RecordingRule {
+	return &RecordingRule{
+		RuleID:  cfg.ID,
+		Name:    cfg.Record,
+		Expr:    cfg.Expr,
+		Labels:  cfg.Labels,
+		GroupID: gID,
+	}
+}
+
+var errDuplicate = errors.New("result contains metrics with the same labelset after applying rule labels")
+
+// Exec executes RecordingRule expression via the given Querier.
+func (rr *RecordingRule) Exec(ctx context.Context, q datasource.Querier, series bool) ([]prompbmarshal.TimeSeries, error) {
+	if !series {
+		return nil, nil
+	}
+
+	qMetrics, err := q.Query(ctx, rr.Expr)
+
+	rr.mu.Lock()
+	defer rr.mu.Unlock()
+
+	rr.lastExecTime = time.Now()
+	rr.lastExecError = err
+	if err != nil {
+		return nil, fmt.Errorf("failed to execute query %q: %s", rr.Expr, err)
+	}
+
+	duplicates := make(map[uint64]prompbmarshal.TimeSeries, len(qMetrics))
+	var tss []prompbmarshal.TimeSeries
+	for _, r := range qMetrics {
+		ts := rr.toTimeSeries(r, rr.lastExecTime)
+		h := hashTimeSeries(ts)
+		if _, ok := duplicates[h]; ok {
+			rr.lastExecError = errDuplicate
+			return nil, errDuplicate
+		}
+		duplicates[h] = ts
+		tss = append(tss, ts)
+	}
+	return tss, nil
+}
+
+func hashTimeSeries(ts prompbmarshal.TimeSeries) uint64 {
+	hash := fnv.New64a()
+	labels := ts.Labels
+	sort.Slice(labels, func(i, j int) bool {
+		return labels[i].Name < labels[j].Name
+	})
+	for _, l := range labels {
+		hash.Write([]byte(l.Name))
+		hash.Write([]byte(l.Value))
+		hash.Write([]byte("\xff"))
+	}
+	return hash.Sum64()
+}
+
+func (rr *RecordingRule) toTimeSeries(m datasource.Metric, timestamp time.Time) prompbmarshal.TimeSeries {
+	labels := make(map[string]string)
+	for _, l := range m.Labels {
+		labels[l.Name] = l.Value
+	}
+	labels["__name__"] = rr.Name
+	// override existing labels with configured ones
+	for k, v := range rr.Labels {
+		labels[k] = v
+	}
+	return newTimeSeries(m.Value, labels, timestamp)
+}
+
+// UpdateWith copies all significant fields.
+// alerts state isn't copied since
+// it should be updated in next 2 Execs
+func (rr *RecordingRule) UpdateWith(r Rule) error {
+	nr, ok := r.(*RecordingRule)
+	if !ok {
+		return fmt.Errorf("BUG: attempt to update recroding rule with wrong type %#v", r)
+	}
+	rr.Expr = nr.Expr
+	rr.Labels = nr.Labels
+	return nil
+}
+
+// RuleAPI returns Rule representation in form
+// of APIRecordingRule
+func (rr *RecordingRule) RuleAPI() APIRecordingRule {
+	var lastErr string
+	if rr.lastExecError != nil {
+		lastErr = rr.lastExecError.Error()
+	}
+	return APIRecordingRule{
+		// encode as strings to avoid rounding
+		ID:         fmt.Sprintf("%d", rr.ID()),
+		GroupID:    fmt.Sprintf("%d", rr.GroupID),
+		Name:       rr.Name,
+		Expression: rr.Expr,
+		LastError:  lastErr,
+		LastExec:   rr.lastExecTime,
+		Labels:     rr.Labels,
+	}
+}
--- a/app/vmalert/recording_test.go
+++ b/app/vmalert/recording_test.go
@@ -0,0 +1,121 @@
+package main
+
+import (
+	"context"
+	"errors"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
+)
+
+func TestRecoridngRule_ToTimeSeries(t *testing.T) {
+	timestamp := time.Now()
+	testCases := []struct {
+		rule    *RecordingRule
+		metrics []datasource.Metric
+		expTS   []prompbmarshal.TimeSeries
+	}{
+		{
+			&RecordingRule{Name: "foo"},
+			[]datasource.Metric{metricWithValueAndLabels(t, 10,
+				"__name__", "bar",
+			)},
+			[]prompbmarshal.TimeSeries{
+				newTimeSeries(10, map[string]string{
+					"__name__": "foo",
+				}, timestamp),
+			},
+		},
+		{
+			&RecordingRule{Name: "foobarbaz"},
+			[]datasource.Metric{
+				metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"),
+				metricWithValueAndLabels(t, 2, "__name__", "bar", "job", "bar"),
+				metricWithValueAndLabels(t, 3, "__name__", "baz", "job", "baz"),
+			},
+			[]prompbmarshal.TimeSeries{
+				newTimeSeries(1, map[string]string{
+					"__name__": "foobarbaz",
+					"job":      "foo",
+				}, timestamp),
+				newTimeSeries(2, map[string]string{
+					"__name__": "foobarbaz",
+					"job":      "bar",
+				}, timestamp),
+				newTimeSeries(3, map[string]string{
+					"__name__": "foobarbaz",
+					"job":      "baz",
+				}, timestamp),
+			},
+		},
+		{
+			&RecordingRule{Name: "job:foo", Labels: map[string]string{
+				"source": "test",
+			}},
+			[]datasource.Metric{
+				metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
+				metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar")},
+			[]prompbmarshal.TimeSeries{
+				newTimeSeries(2, map[string]string{
+					"__name__": "job:foo",
+					"job":      "foo",
+					"source":   "test",
+				}, timestamp),
+				newTimeSeries(1, map[string]string{
+					"__name__": "job:foo",
+					"job":      "bar",
+					"source":   "test",
+				}, timestamp),
+			},
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.rule.Name, func(t *testing.T) {
+			fq := &fakeQuerier{}
+			fq.add(tc.metrics...)
+			tss, err := tc.rule.Exec(context.TODO(), fq, true)
+			if err != nil {
+				t.Fatalf("unexpected Exec err: %s", err)
+			}
+			if err := compareTimeSeries(t, tc.expTS, tss); err != nil {
+				t.Fatalf("timeseries missmatch: %s", err)
+			}
+		})
+	}
+}
+
+func TestRecoridngRule_ToTimeSeriesNegative(t *testing.T) {
+	rr := &RecordingRule{Name: "job:foo", Labels: map[string]string{
+		"job": "test",
+	}}
+
+	fq := &fakeQuerier{}
+	expErr := "connection reset by peer"
+	fq.setErr(errors.New(expErr))
+
+	_, err := rr.Exec(context.TODO(), fq, true)
+	if err == nil {
+		t.Fatalf("expected to get err; got nil")
+	}
+	if !strings.Contains(err.Error(), expErr) {
+		t.Fatalf("expected to get err %q; got %q insterad", expErr, err)
+	}
+
+	fq.reset()
+
+	// add metrics which differs only by `job` label
+	// which will be overridden by rule
+	fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"))
+	fq.add(metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "bar"))
+
+	_, err = rr.Exec(context.TODO(), fq, true)
+	if err == nil {
+		t.Fatalf("expected to get err; got nil")
+	}
+	if !strings.Contains(err.Error(), errDuplicate.Error()) {
+		t.Fatalf("expected to get err %q; got %q insterad", errDuplicate, err)
+	}
+}
--- a/app/vmalert/remotewrite/remotewrite.go
+++ b/app/vmalert/remotewrite/remotewrite.go
@@ -30,6 +30,7 @@ type Client struct {
 	doneCh chan struct{}
 }

+// Config is config for remote write.
 type Config struct {
 	// Addr of remote storage
 	Addr string
@@ -37,23 +38,30 @@ type Config struct {
 	BasicAuthUser string
 	BasicAuthPass string

+	// Concurrency defines number of readers that
+	// concurrently read from the queue and flush data
+	Concurrency int
 	// MaxBatchSize defines max number of timeseries
 	// to be flushed at once
 	MaxBatchSize int
 	// MaxQueueSize defines max length of input queue
-	// populated by Push method
+	// populated by Push method.
+	// Push will be rejected once queue is full.
 	MaxQueueSize int
 	// FlushInterval defines time interval for flushing batches
 	FlushInterval time.Duration
 	// WriteTimeout defines timeout for HTTP write request
 	// to remote storage
 	WriteTimeout time.Duration
+	// Transport will be used by the underlying http.Client
+	Transport *http.Transport
 }

 const (
+	defaultConcurrency   = 4
 	defaultMaxBatchSize  = 1e3
-	defaultMaxQueueSize  = 100
-	defaultFlushInterval = 5 * time.Second
+	defaultMaxQueueSize  = 1e5
+	defaultFlushInterval = time.Second
 	defaultWriteTimeout  = 30 * time.Second
 )

@@ -79,7 +87,8 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
 	}
 	c := &Client{
 		c: &http.Client{
-			Timeout: cfg.WriteTimeout,
+			Timeout:   cfg.WriteTimeout,
+			Transport: cfg.Transport,
 		},
 		addr:          strings.TrimSuffix(cfg.Addr, "/") + writePath,
 		baUser:        cfg.BasicAuthUser,
@@ -89,7 +98,13 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
 		doneCh:        make(chan struct{}),
 		input:         make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
 	}
-	c.run(ctx)
+	cc := defaultConcurrency
+	if cfg.Concurrency > 0 {
+		cc = cfg.Concurrency
+	}
+	for i := 0; i < cc; i++ {
+		c.run(ctx)
+	}
 	return c, nil
 }

@@ -102,7 +117,8 @@ func (c *Client) Push(s prompbmarshal.TimeSeries) error {
 	case c.input <- s:
 		return nil
 	default:
-		return fmt.Errorf("failed to push timeseries - queue is full (%d entries)",
+		return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
+			"Queue size is controlled by -remoteWrite.maxQueueSize flag",
 			c.maxQueueSize)
 	}
 }
@@ -126,7 +142,10 @@ func (c *Client) run(ctx context.Context) {
 		for ts := range c.input {
 			wr.Timeseries = append(wr.Timeseries, ts)
 		}
-		lastCtx, cancel := context.WithTimeout(context.Background(), time.Second*10)
+		if len(wr.Timeseries) < 1 {
+			return
+		}
+		lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
 		c.flush(lastCtx, wr)
 		cancel()
 	}
--- a/app/vmalert/rule.go
+++ b/app/vmalert/rule.go
@@ -2,268 +2,23 @@ package main

 import (
 	"context"
-	"errors"
-	"fmt"
-	"hash/fnv"
-	"sort"
-	"strconv"
-	"sync"
-	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
 )

-// Group grouping array of alert
-type Group struct {
-	Name  string
-	Rules []*Rule
-}
-
-// Rule is basic alert entity
-type Rule struct {
-	Name        string            `yaml:"alert"`
-	Expr        string            `yaml:"expr"`
-	For         time.Duration     `yaml:"for"`
-	Labels      map[string]string `yaml:"labels"`
-	Annotations map[string]string `yaml:"annotations"`
-
-	group *Group
-
-	// guard status fields
-	mu sync.RWMutex
-	// stores list of active alerts
-	alerts map[uint64]*notifier.Alert
-	// stores last moment of time Exec was called
-	lastExecTime time.Time
-	// stores last error that happened in Exec func
-	// resets on every successful Exec
-	// may be used as Health state
-	lastExecError error
-}
-
-// Validate validates rule
-func (r *Rule) Validate() error {
-	if r.Name == "" {
-		return errors.New("rule name can not be empty")
-	}
-	if r.Expr == "" {
-		return fmt.Errorf("expression for rule %q can't be empty", r.Name)
-	}
-	if _, err := metricsql.Parse(r.Expr); err != nil {
-		return fmt.Errorf("invalid expression for rule %q: %w", r.Name, err)
-	}
-	return nil
-}
-
-// Exec executes Rule expression via the given Querier.
-// Based on the Querier results Rule maintains notifier.Alerts
-func (r *Rule) Exec(ctx context.Context, q datasource.Querier) error {
-	qMetrics, err := q.Query(ctx, r.Expr)
-	r.mu.Lock()
-	defer r.mu.Unlock()
-
-	r.lastExecError = err
-	r.lastExecTime = time.Now()
-	if err != nil {
-		return fmt.Errorf("failed to execute query %q: %s", r.Expr, err)
-	}
-
-	for h, a := range r.alerts {
-		// cleanup inactive alerts from previous Eval
-		if a.State == notifier.StateInactive {
-			delete(r.alerts, h)
-		}
-	}
-
-	updated := make(map[uint64]struct{})
-	// update list of active alerts
-	for _, m := range qMetrics {
-		h := hash(m)
-		updated[h] = struct{}{}
-		if _, ok := r.alerts[h]; ok {
-			continue
-		}
-		a, err := r.newAlert(m)
-		if err != nil {
-			r.lastExecError = err
-			return fmt.Errorf("failed to create alert: %s", err)
-		}
-		a.ID = h
-		a.State = notifier.StatePending
-		r.alerts[h] = a
-	}
-
-	for h, a := range r.alerts {
-		// if alert wasn't updated in this iteration
-		// means it is resolved already
-		if _, ok := updated[h]; !ok {
-			a.State = notifier.StateInactive
-			// set endTime to last execution time
-			// so it can be sent by notifier on next step
-			a.End = r.lastExecTime
-			continue
-		}
-		if a.State == notifier.StatePending && time.Since(a.Start) >= r.For {
-			a.State = notifier.StateFiring
-			alertsFired.Inc()
-		}
-		if a.State == notifier.StateFiring {
-			a.End = r.lastExecTime.Add(3 * *evaluationInterval)
-		}
-	}
-	return nil
-}
-
-// TODO: consider hashing algorithm in VM
-func hash(m datasource.Metric) uint64 {
-	hash := fnv.New64a()
-	labels := m.Labels
-	sort.Slice(labels, func(i, j int) bool {
-		return labels[i].Name < labels[j].Name
-	})
-	for _, l := range labels {
-		hash.Write([]byte(l.Name))
-		hash.Write([]byte(l.Value))
-		hash.Write([]byte("\xff"))
-	}
-	return hash.Sum64()
-}
-
-func (r *Rule) newAlert(m datasource.Metric) (*notifier.Alert, error) {
-	a := &notifier.Alert{
-		Group:  r.group.Name,
-		Name:   r.Name,
-		Labels: map[string]string{},
-		Value:  m.Value,
-		Start:  time.Now(),
-		// TODO: support End time
-	}
-
-	// 1. use data labels
-	for _, l := range m.Labels {
-		a.Labels[l.Name] = l.Value
-	}
-
-	// 2. template rule labels with data labels
-	rLabels, err := a.ExecTemplate(r.Labels)
-	if err != nil {
-		return a, err
-	}
-
-	// 3. merge data labels and rule labels
-	// metric labels may be overridden by
-	// rule labels
-	for k, v := range rLabels {
-		a.Labels[k] = v
-	}
-
-	// 4. template merged labels
-	a.Labels, err = a.ExecTemplate(a.Labels)
-	if err != nil {
-		return a, err
-	}
-
-	a.Annotations, err = a.ExecTemplate(r.Annotations)
-	return a, err
-}
-
-// AlertAPI generates APIAlert object from alert by its id(hash)
-func (r *Rule) AlertAPI(id uint64) *APIAlert {
-	r.mu.RLock()
-	defer r.mu.RUnlock()
-	a, ok := r.alerts[id]
-	if !ok {
-		return nil
-	}
-	return r.newAlertAPI(*a)
-}
-
-// AlertsAPI generates list of APIAlert objects from existing alerts
-func (r *Rule) AlertsAPI() []*APIAlert {
-	var alerts []*APIAlert
-	r.mu.RLock()
-	for _, a := range r.alerts {
-		alerts = append(alerts, r.newAlertAPI(*a))
-	}
-	r.mu.RUnlock()
-	return alerts
-}
-
-func (r *Rule) newAlertAPI(a notifier.Alert) *APIAlert {
-	return &APIAlert{
-		ID:          a.ID,
-		Name:        a.Name,
-		Group:       a.Group,
-		Expression:  r.Expr,
-		Labels:      a.Labels,
-		Annotations: a.Annotations,
-		State:       a.State.String(),
-		ActiveAt:    a.Start,
-		Value:       strconv.FormatFloat(a.Value, 'e', -1, 64),
-	}
-}
-
-const (
-	// AlertMetricName is the metric name for synthetic alert timeseries.
-	alertMetricName = "ALERTS"
-	// AlertForStateMetricName is the metric name for 'for' state of alert.
-	alertForStateMetricName = "ALERTS_FOR_STATE"
-
-	// AlertNameLabel is the label name indicating the name of an alert.
-	alertNameLabel = "alertname"
-	// AlertStateLabel is the label name indicating the state of an alert.
-	alertStateLabel = "alertstate"
-)
-
-func (r *Rule) AlertToTimeSeries(a *notifier.Alert, timestamp time.Time) []prompbmarshal.TimeSeries {
-	var tss []prompbmarshal.TimeSeries
-	tss = append(tss, alertToTimeSeries(r.Name, a, timestamp))
-	if r.For > 0 {
-		tss = append(tss, alertForToTimeSeries(r.Name, a, timestamp))
-	}
-	return tss
-}
-
-func alertToTimeSeries(name string, a *notifier.Alert, timestamp time.Time) prompbmarshal.TimeSeries {
-	labels := make(map[string]string)
-	for k, v := range a.Labels {
-		labels[k] = v
-	}
-	labels["__name__"] = alertMetricName
-	labels[alertNameLabel] = name
-	labels[alertStateLabel] = a.State.String()
-	return newTimeSeries(1, labels, timestamp)
-}
-
-func alertForToTimeSeries(name string, a *notifier.Alert, timestamp time.Time) prompbmarshal.TimeSeries {
-	labels := make(map[string]string)
-	for k, v := range a.Labels {
-		labels[k] = v
-	}
-	labels["__name__"] = alertForStateMetricName
-	labels[alertNameLabel] = name
-	return newTimeSeries(float64(a.Start.Unix()), labels, timestamp)
-}
-
-func newTimeSeries(value float64, labels map[string]string, timestamp time.Time) prompbmarshal.TimeSeries {
-	ts := prompbmarshal.TimeSeries{}
-	ts.Samples = append(ts.Samples, prompbmarshal.Sample{
-		Value:     value,
-		Timestamp: timestamp.UnixNano() / 1e6,
-	})
-	keys := make([]string, 0, len(labels))
-	for k := range labels {
-		keys = append(keys, k)
-	}
-	sort.Strings(keys)
-	for _, key := range keys {
-		ts.Labels = append(ts.Labels, prompbmarshal.Label{
-			Name:  key,
-			Value: labels[key],
-		})
-	}
-	return ts
+// Rule represents alerting or recording rule
+// that has unique ID, can be Executed and
+// updated with other Rule.
+type Rule interface {
+	// Returns unique ID that may be used for
+	// identifying this Rule among others.
+	ID() uint64
+	// Exec executes the rule with given context
+	// and Querier. If returnSeries is true, Exec
+	// may return TimeSeries as result of execution
+	Exec(ctx context.Context, q datasource.Querier, returnSeries bool) ([]prompbmarshal.TimeSeries, error)
+	// UpdateWith performs modification of current Rule
+	// with fields of the given Rule.
+	UpdateWith(Rule) error
 }
--- a/app/vmalert/rule_test.go
+++ b/app/vmalert/rule_test.go
@@ -1,399 +0,0 @@
-package main
-
-import (
-	"context"
-	"testing"
-	"time"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
-	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
-)
-
-func TestRule_Validate(t *testing.T) {
-	if err := (&Rule{}).Validate(); err == nil {
-		t.Errorf("exptected empty name error")
-	}
-	if err := (&Rule{Name: "alert"}).Validate(); err == nil {
-		t.Errorf("exptected empty expr error")
-	}
-	if err := (&Rule{Name: "alert", Expr: "test{"}).Validate(); err == nil {
-		t.Errorf("exptected invalid expr error")
-	}
-	if err := (&Rule{Name: "alert", Expr: "test>0"}).Validate(); err != nil {
-		t.Errorf("exptected valid rule got %s", err)
-	}
-}
-
-func TestRule_AlertToTimeSeries(t *testing.T) {
-	timestamp := time.Now()
-	testCases := []struct {
-		rule  *Rule
-		alert *notifier.Alert
-		expTS []prompbmarshal.TimeSeries
-	}{
-		{
-			newTestRule("instant", 0),
-			&notifier.Alert{State: notifier.StateFiring},
-			[]prompbmarshal.TimeSeries{
-				newTimeSeries(1, map[string]string{
-					"__name__":      alertMetricName,
-					alertStateLabel: notifier.StateFiring.String(),
-					alertNameLabel:  "instant",
-				}, timestamp),
-			},
-		},
-		{
-			newTestRule("instant extra labels", 0),
-			&notifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
-				"job":      "foo",
-				"instance": "bar",
-			}},
-			[]prompbmarshal.TimeSeries{
-				newTimeSeries(1, map[string]string{
-					"__name__":      alertMetricName,
-					alertStateLabel: notifier.StateFiring.String(),
-					alertNameLabel:  "instant extra labels",
-					"job":           "foo",
-					"instance":      "bar",
-				}, timestamp),
-			},
-		},
-		{
-			newTestRule("instant labels override", 0),
-			&notifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
-				alertStateLabel: "foo",
-				"__name__":      "bar",
-			}},
-			[]prompbmarshal.TimeSeries{
-				newTimeSeries(1, map[string]string{
-					"__name__":      alertMetricName,
-					alertStateLabel: notifier.StateFiring.String(),
-					alertNameLabel:  "instant labels override",
-				}, timestamp),
-			},
-		},
-		{
-			newTestRule("for", time.Second),
-			&notifier.Alert{State: notifier.StateFiring, Start: timestamp.Add(time.Second)},
-			[]prompbmarshal.TimeSeries{
-				newTimeSeries(1, map[string]string{
-					"__name__":      alertMetricName,
-					alertStateLabel: notifier.StateFiring.String(),
-					alertNameLabel:  "for",
-				}, timestamp),
-				newTimeSeries(float64(timestamp.Add(time.Second).Unix()), map[string]string{
-					"__name__":     alertForStateMetricName,
-					alertNameLabel: "for",
-				}, timestamp),
-			},
-		},
-		{
-			newTestRule("for pending", 10*time.Second),
-			&notifier.Alert{State: notifier.StatePending, Start: timestamp.Add(time.Second)},
-			[]prompbmarshal.TimeSeries{
-				newTimeSeries(1, map[string]string{
-					"__name__":      alertMetricName,
-					alertStateLabel: notifier.StatePending.String(),
-					alertNameLabel:  "for pending",
-				}, timestamp),
-				newTimeSeries(float64(timestamp.Add(time.Second).Unix()), map[string]string{
-					"__name__":     alertForStateMetricName,
-					alertNameLabel: "for pending",
-				}, timestamp),
-			},
-		},
-	}
-	for _, tc := range testCases {
-		t.Run(tc.rule.Name, func(t *testing.T) {
-			tss := tc.rule.AlertToTimeSeries(tc.alert, timestamp)
-			if len(tc.expTS) != len(tss) {
-				t.Fatalf("expected number of timeseries %d; got %d", len(tc.expTS), len(tss))
-			}
-			for i := range tc.expTS {
-				expTS, gotTS := tc.expTS[i], tss[i]
-				if len(expTS.Samples) != len(gotTS.Samples) {
-					t.Fatalf("expected number of samples %d; got %d", len(expTS.Samples), len(gotTS.Samples))
-				}
-				for i, exp := range expTS.Samples {
-					got := gotTS.Samples[i]
-					if got.Value != exp.Value {
-						t.Errorf("expected value %.2f; got %.2f", exp.Value, got.Value)
-					}
-					if got.Timestamp != exp.Timestamp {
-						t.Errorf("expected timestamp %d; got %d", exp.Timestamp, got.Timestamp)
-					}
-				}
-				if len(expTS.Labels) != len(gotTS.Labels) {
-					t.Fatalf("expected number of labels %d; got %d", len(expTS.Labels), len(gotTS.Labels))
-				}
-				for i, exp := range expTS.Labels {
-					got := gotTS.Labels[i]
-					if got.Name != exp.Name {
-						t.Errorf("expected label name %q; got %q", exp.Name, got.Name)
-					}
-					if got.Value != exp.Value {
-						t.Errorf("expected label value %q; got %q", exp.Value, got.Value)
-					}
-				}
-			}
-		})
-	}
-}
-
-func newTestRule(name string, waitFor time.Duration) *Rule {
-	return &Rule{Name: name, alerts: make(map[uint64]*notifier.Alert), For: waitFor}
-}
-
-func TestRule_Exec(t *testing.T) {
-	testCases := []struct {
-		rule      *Rule
-		steps     [][]datasource.Metric
-		expAlerts map[uint64]*notifier.Alert
-	}{
-		{
-			newTestRule("empty", 0),
-			[][]datasource.Metric{},
-			map[uint64]*notifier.Alert{},
-		},
-		{
-			newTestRule("single-firing", 0),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
-			},
-		},
-		{
-			newTestRule("single-firing=>inactive", 0),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-				{},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateInactive},
-			},
-		},
-		{
-			newTestRule("single-firing=>inactive=>firing", 0),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-				{},
-				{metricWithLabels(t, "__name__", "foo")},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
-			},
-		},
-		{
-			newTestRule("single-firing=>inactive=>firing=>inactive", 0),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-				{},
-				{metricWithLabels(t, "__name__", "foo")},
-				{},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateInactive},
-			},
-		},
-		{
-			newTestRule("single-firing=>inactive=>firing=>inactive=>empty", 0),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-				{},
-				{metricWithLabels(t, "__name__", "foo")},
-				{},
-				{},
-			},
-			map[uint64]*notifier.Alert{},
-		},
-		{
-			newTestRule("single-firing=>inactive=>firing=>inactive=>empty=>firing", 0),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-				{},
-				{metricWithLabels(t, "__name__", "foo")},
-				{},
-				{},
-				{metricWithLabels(t, "__name__", "foo")},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
-			},
-		},
-		{
-			newTestRule("multiple-firing", 0),
-			[][]datasource.Metric{
-				{
-					metricWithLabels(t, "__name__", "foo"),
-					metricWithLabels(t, "__name__", "foo1"),
-					metricWithLabels(t, "__name__", "foo2"),
-				},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo")):  {State: notifier.StateFiring},
-				hash(metricWithLabels(t, "__name__", "foo1")): {State: notifier.StateFiring},
-				hash(metricWithLabels(t, "__name__", "foo2")): {State: notifier.StateFiring},
-			},
-		},
-		{
-			newTestRule("multiple-steps-firing", 0),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-				{metricWithLabels(t, "__name__", "foo1")},
-				{metricWithLabels(t, "__name__", "foo2")},
-			},
-			// 1: fire first alert
-			// 2: fire second alert, set first inactive
-			// 3: fire third alert, set second inactive, delete first one
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo1")): {State: notifier.StateInactive},
-				hash(metricWithLabels(t, "__name__", "foo2")): {State: notifier.StateFiring},
-			},
-		},
-		{
-			newTestRule("duplicate", 0),
-			[][]datasource.Metric{
-				{
-					// metrics with the same labelset should result in one alert
-					metricWithLabels(t, "__name__", "foo", "type", "bar"),
-					metricWithLabels(t, "type", "bar", "__name__", "foo"),
-				},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo", "type", "bar")): {State: notifier.StateFiring},
-			},
-		},
-		{
-			newTestRule("for-pending", time.Minute),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StatePending},
-			},
-		},
-		{
-			newTestRule("for-fired", time.Millisecond),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-				{metricWithLabels(t, "__name__", "foo")},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
-			},
-		},
-		{
-			newTestRule("for-pending=>inactive", time.Millisecond),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-				{metricWithLabels(t, "__name__", "foo")},
-				// empty step to reset pending alerts
-				{},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateInactive},
-			},
-		},
-		{
-			newTestRule("for-pending=>firing=>inactive", time.Millisecond),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-				{metricWithLabels(t, "__name__", "foo")},
-				// empty step to reset pending alerts
-				{},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateInactive},
-			},
-		},
-		{
-			newTestRule("for-pending=>firing=>inactive=>pending", time.Millisecond),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-				{metricWithLabels(t, "__name__", "foo")},
-				// empty step to reset pending alerts
-				{},
-				{metricWithLabels(t, "__name__", "foo")},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StatePending},
-			},
-		},
-		{
-			newTestRule("for-pending=>firing=>inactive=>pending=>firing", time.Millisecond),
-			[][]datasource.Metric{
-				{metricWithLabels(t, "__name__", "foo")},
-				{metricWithLabels(t, "__name__", "foo")},
-				// empty step to reset pending alerts
-				{},
-				{metricWithLabels(t, "__name__", "foo")},
-				{metricWithLabels(t, "__name__", "foo")},
-			},
-			map[uint64]*notifier.Alert{
-				hash(metricWithLabels(t, "__name__", "foo")): {State: notifier.StateFiring},
-			},
-		},
-	}
-	fakeGroup := &Group{Name: "TestRule_Exec"}
-	for _, tc := range testCases {
-		t.Run(tc.rule.Name, func(t *testing.T) {
-			fq := &fakeQuerier{}
-			tc.rule.group = fakeGroup
-			for _, step := range tc.steps {
-				fq.reset()
-				fq.add(t, step...)
-				if err := tc.rule.Exec(context.TODO(), fq); err != nil {
-					t.Fatalf("unexpected err: %s", err)
-				}
-				// artificial delay between applying steps
-				time.Sleep(time.Millisecond)
-			}
-			if len(tc.rule.alerts) != len(tc.expAlerts) {
-				t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
-			}
-			for key, exp := range tc.expAlerts {
-				got, ok := tc.rule.alerts[key]
-				if !ok {
-					t.Fatalf("expected to have key %d", key)
-				}
-				if got.State != exp.State {
-					t.Fatalf("expected state %d; got %d", exp.State, got.State)
-				}
-			}
-		})
-	}
-}
-
-func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
-	t.Helper()
-	if len(labels) == 0 || len(labels)%2 != 0 {
-		t.Fatalf("expected to get even number of labels")
-	}
-	m := datasource.Metric{}
-	for i := 0; i < len(labels); i += 2 {
-		m.Labels = append(m.Labels, datasource.Label{
-			Name:  labels[i],
-			Value: labels[i+1],
-		})
-	}
-	return m
-}
-
-type fakeQuerier struct {
-	metrics []datasource.Metric
-}
-
-func (fq *fakeQuerier) reset() {
-	fq.metrics = fq.metrics[:0]
-}
-
-func (fq *fakeQuerier) add(t *testing.T, metrics ...datasource.Metric) {
-	fq.metrics = append(fq.metrics, metrics...)
-}
-
-func (fq fakeQuerier) Query(ctx context.Context, query string) ([]datasource.Metric, error) {
-	return fq.metrics, nil
-}
--- a/app/vmalert/utils.go
+++ b/app/vmalert/utils.go
@@ -0,0 +1,27 @@
+package main
+
+import (
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
+	"sort"
+	"time"
+)
+
+func newTimeSeries(value float64, labels map[string]string, timestamp time.Time) prompbmarshal.TimeSeries {
+	ts := prompbmarshal.TimeSeries{}
+	ts.Samples = append(ts.Samples, prompbmarshal.Sample{
+		Value:     value,
+		Timestamp: timestamp.UnixNano() / 1e6,
+	})
+	keys := make([]string, 0, len(labels))
+	for k := range labels {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	for _, key := range keys {
+		ts.Labels = append(ts.Labels, prompbmarshal.Label{
+			Name:  key,
+			Value: labels[key],
+		})
+	}
+	return ts
+}
--- a/app/vmalert/web.go
+++ b/app/vmalert/web.go
@@ -7,33 +7,23 @@ import (
 	"sort"
 	"strconv"
 	"strings"
-	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
 )

-// APIAlert has info for an alert.
-type APIAlert struct {
-	ID          uint64            `json:"id"`
-	Name        string            `json:"name"`
-	Group       string            `json:"group"`
-	Expression  string            `json:"expression"`
-	State       string            `json:"state"`
-	Value       string            `json:"value"`
-	Labels      map[string]string `json:"labels"`
-	Annotations map[string]string `json:"annotations"`
-	ActiveAt    time.Time         `json:"activeAt"`
-}
-
 type requestHandler struct {
-	groups []Group
+	m *manager
 }

 var pathList = [][]string{
+	{"/api/v1/groups", "list all loaded groups and rules"},
 	{"/api/v1/alerts", "list all active alerts"},
-	{"/api/v1/groupName/alertID/status", "get alert status by ID"},
+	{"/api/v1/groupID/alertID/status", "get alert status by ID"},
 	// /metrics is served by httpserver by default
 	{"/metrics", "list of application metrics"},
+	{"/-/reload", "reload configuration"},
 }

 func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
@@ -45,8 +35,16 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
 			fmt.Fprintf(w, "<a href='%s'>%q</a> - %s<br/>", p, p, doc)
 		}
 		return true
+	case "/api/v1/groups":
+		resph.handle(rh.listGroups())
+		return true
 	case "/api/v1/alerts":
-		resph.handle(rh.list())
+		resph.handle(rh.listAlerts())
+		return true
+	case "/-/reload":
+		logger.Infof("api config reload was called, sending sighup")
+		procutil.SelfSIGHUP()
+		w.WriteHeader(http.StatusOK)
 		return true
 	default:
 		// /api/v1/<groupName>/<alertID>/status
@@ -58,6 +56,37 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
 	}
 }

+type listGroupsResponse struct {
+	Data struct {
+		Groups []APIGroup `json:"groups"`
+	} `json:"data"`
+	Status string `json:"status"`
+}
+
+func (rh *requestHandler) listGroups() ([]byte, error) {
+	rh.m.groupsMu.RLock()
+	defer rh.m.groupsMu.RUnlock()
+
+	lr := listGroupsResponse{Status: "success"}
+	for _, g := range rh.m.groups {
+		lr.Data.Groups = append(lr.Data.Groups, g.toAPI())
+	}
+
+	// sort list of alerts for deterministic output
+	sort.Slice(lr.Data.Groups, func(i, j int) bool {
+		return lr.Data.Groups[i].Name < lr.Data.Groups[j].Name
+	})
+
+	b, err := json.Marshal(lr)
+	if err != nil {
+		return nil, &httpserver.ErrorWithStatusCode{
+			Err:        fmt.Errorf(`error encoding list of active alerts: %s`, err),
+			StatusCode: http.StatusInternalServerError,
+		}
+	}
+	return b, nil
+}
+
 type listAlertsResponse struct {
 	Data struct {
 		Alerts []*APIAlert `json:"alerts"`
@@ -65,17 +94,24 @@ type listAlertsResponse struct {
 	Status string `json:"status"`
 }

-func (rh *requestHandler) list() ([]byte, error) {
+func (rh *requestHandler) listAlerts() ([]byte, error) {
+	rh.m.groupsMu.RLock()
+	defer rh.m.groupsMu.RUnlock()
+
 	lr := listAlertsResponse{Status: "success"}
-	for _, g := range rh.groups {
+	for _, g := range rh.m.groups {
 		for _, r := range g.Rules {
-			lr.Data.Alerts = append(lr.Data.Alerts, r.AlertsAPI()...)
+			a, ok := r.(*AlertingRule)
+			if !ok {
+				continue
+			}
+			lr.Data.Alerts = append(lr.Data.Alerts, a.AlertsAPI()...)
 		}
 	}

 	// sort list of alerts for deterministic output
 	sort.Slice(lr.Data.Alerts, func(i, j int) bool {
-		return lr.Data.Alerts[i].Name < lr.Data.Alerts[j].Name
+		return lr.Data.Alerts[i].ID < lr.Data.Alerts[j].ID
 	})

 	b, err := json.Marshal(lr)
@@ -89,6 +125,9 @@ func (rh *requestHandler) list() ([]byte, error) {
 }

 func (rh *requestHandler) alert(path string) ([]byte, error) {
+	rh.m.groupsMu.RLock()
+	defer rh.m.groupsMu.RUnlock()
+
 	parts := strings.SplitN(strings.TrimPrefix(path, "/api/v1/"), "/", 3)
 	if len(parts) != 3 {
 		return nil, &httpserver.ErrorWithStatusCode{
@@ -96,29 +135,20 @@ func (rh *requestHandler) alert(path string) ([]byte, error) {
 			StatusCode: http.StatusBadRequest,
 		}
 	}
-	group := strings.TrimRight(parts[0], "/")
-	idStr := strings.TrimRight(parts[1], "/")
-	id, err := strconv.ParseUint(idStr, 10, 0)
+
+	groupID, err := uint64FromPath(parts[0])
 	if err != nil {
-		return nil, &httpserver.ErrorWithStatusCode{
-			Err:        fmt.Errorf(`cannot parse int from %q`, idStr),
-			StatusCode: http.StatusBadRequest,
-		}
+		return nil, badRequest(fmt.Errorf(`cannot parse groupID: %s`, err))
 	}
-	for _, g := range rh.groups {
-		if g.Name != group {
-			continue
-		}
-		for i := range g.Rules {
-			if apiAlert := g.Rules[i].AlertAPI(id); apiAlert != nil {
-				return json.Marshal(apiAlert)
-			}
-		}
+	alertID, err := uint64FromPath(parts[1])
+	if err != nil {
+		return nil, badRequest(fmt.Errorf(`cannot parse alertID: %s`, err))
 	}
-	return nil, &httpserver.ErrorWithStatusCode{
-		Err:        fmt.Errorf(`cannot find alert %s in %q`, idStr, group),
-		StatusCode: http.StatusNotFound,
+	resp, err := rh.m.AlertAPI(groupID, alertID)
+	if err != nil {
+		return nil, errResponse(err, http.StatusNotFound)
 	}
+	return json.Marshal(resp)
 }

 // responseHandler wrapper on http.ResponseWriter with sugar
@@ -132,3 +162,19 @@ func (w responseHandler) handle(b []byte, err error) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Write(b)
 }
+
+func uint64FromPath(path string) (uint64, error) {
+	s := strings.TrimRight(path, "/")
+	return strconv.ParseUint(s, 10, 0)
+}
+
+func badRequest(err error) *httpserver.ErrorWithStatusCode {
+	return errResponse(err, http.StatusBadRequest)
+}
+
+func errResponse(err error, sc int) *httpserver.ErrorWithStatusCode {
+	return &httpserver.ErrorWithStatusCode{
+		Err:        err,
+		StatusCode: sc,
+	}
+}
--- a/app/vmalert/web_test.go
+++ b/app/vmalert/web_test.go
@@ -11,18 +11,20 @@ import (
 )

 func TestHandler(t *testing.T) {
-	rule := &Rule{
+	ar := &AlertingRule{
 		Name: "alert",
 		alerts: map[uint64]*notifier.Alert{
 			0: {},
 		},
 	}
-	rh := &requestHandler{
-		groups: []Group{{
-			Name:  "group",
-			Rules: []*Rule{rule},
-		}},
+	g := &Group{
+		Name:  "group",
+		Rules: []Rule{ar},
 	}
+	m := &manager{groups: make(map[uint64]*Group)}
+	m.groups[0] = g
+	rh := &requestHandler{m: m}
+
 	getResp := func(url string, to interface{}, code int) {
 		t.Helper()
 		resp, err := http.Get(url)
@@ -52,19 +54,26 @@ func TestHandler(t *testing.T) {
 			t.Errorf("expected 1 alert got %d", length)
 		}
 	})
-	t.Run("/api/v1/group/0/status", func(t *testing.T) {
+	t.Run("/api/v1/groups", func(t *testing.T) {
+		lr := listGroupsResponse{}
+		getResp(ts.URL+"/api/v1/groups", &lr, 200)
+		if length := len(lr.Data.Groups); length != 1 {
+			t.Errorf("expected 1 group got %d", length)
+		}
+	})
+	t.Run("/api/v1/0/0/status", func(t *testing.T) {
 		alert := &APIAlert{}
-		getResp(ts.URL+"/api/v1/group/0/status", alert, 200)
-		expAlert := rule.newAlertAPI(*rule.alerts[0])
+		getResp(ts.URL+"/api/v1/0/0/status", alert, 200)
+		expAlert := ar.newAlertAPI(*ar.alerts[0])
 		if !reflect.DeepEqual(alert, expAlert) {
 			t.Errorf("expected %v is equal to %v", alert, expAlert)
 		}
 	})
-	t.Run("/api/v1/group/1/status", func(t *testing.T) {
-		getResp(ts.URL+"/api/v1/group/1/status", nil, 404)
+	t.Run("/api/v1/0/1/status", func(t *testing.T) {
+		getResp(ts.URL+"/api/v1/0/1/status", nil, 404)
 	})
-	t.Run("/api/v1/unknown-group/0/status", func(t *testing.T) {
-		getResp(ts.URL+"/api/v1/unknown-group/0/status", nil, 404)
+	t.Run("/api/v1/1/0/status", func(t *testing.T) {
+		getResp(ts.URL+"/api/v1/1/0/status", nil, 404)
 	})
 	t.Run("/", func(t *testing.T) {
 		getResp(ts.URL, nil, 200)
--- a/app/vmalert/web_types.go
+++ b/app/vmalert/web_types.go
@@ -0,0 +1,54 @@
+package main
+
+import (
+	"time"
+)
+
+// APIAlert represents an notifier.AlertingRule state
+// for WEB view
+type APIAlert struct {
+	ID          string            `json:"id"`
+	Name        string            `json:"name"`
+	GroupID     string            `json:"group_id"`
+	Expression  string            `json:"expression"`
+	State       string            `json:"state"`
+	Value       string            `json:"value"`
+	Labels      map[string]string `json:"labels"`
+	Annotations map[string]string `json:"annotations"`
+	ActiveAt    time.Time         `json:"activeAt"`
+}
+
+// APIGroup represents Group for WEB view
+type APIGroup struct {
+	Name           string             `json:"name"`
+	ID             string             `json:"id"`
+	File           string             `json:"file"`
+	Interval       string             `json:"interval"`
+	Concurrency    int                `json:"concurrency"`
+	AlertingRules  []APIAlertingRule  `json:"alerting_rules"`
+	RecordingRules []APIRecordingRule `json:"recording_rules"`
+}
+
+// APIAlertingRule represents AlertingRule for WEB view
+type APIAlertingRule struct {
+	ID          string            `json:"id"`
+	Name        string            `json:"name"`
+	GroupID     string            `json:"group_id"`
+	Expression  string            `json:"expression"`
+	For         string            `json:"for"`
+	LastError   string            `json:"last_error"`
+	LastExec    time.Time         `json:"last_exec"`
+	Labels      map[string]string `json:"labels"`
+	Annotations map[string]string `json:"annotations"`
+}
+
+// APIRecordingRule represents RecordingRule for WEB view
+type APIRecordingRule struct {
+	ID         string            `json:"id"`
+	Name       string            `json:"name"`
+	GroupID    string            `json:"group_id"`
+	Expression string            `json:"expression"`
+	LastError  string            `json:"last_error"`
+	LastExec   time.Time         `json:"last_exec"`
+	Labels     map[string]string `json:"labels"`
+}
--- a/app/vmauth/Makefile
+++ b/app/vmauth/Makefile
@@ -0,0 +1,76 @@
+# All these commands must run from repository root.
+
+vmauth:
+	APP_NAME=vmauth $(MAKE) app-local
+
+vmauth-race:
+	APP_NAME=vmauth RACE=-race $(MAKE) app-local
+
+vmauth-prod:
+	APP_NAME=vmauth $(MAKE) app-via-docker
+
+vmauth-pure-prod:
+	APP_NAME=vmauth $(MAKE) app-via-docker-pure
+
+vmauth-amd64-prod:
+	APP_NAME=vmauth $(MAKE) app-via-docker-amd64
+
+vmauth-arm-prod:
+	APP_NAME=vmauth $(MAKE) app-via-docker-arm
+
+vmauth-arm64-prod:
+	APP_NAME=vmauth $(MAKE) app-via-docker-arm64
+
+vmauth-ppc64le-prod:
+	APP_NAME=vmauth $(MAKE) app-via-docker-ppc64le
+
+vmauth-386-prod:
+	APP_NAME=vmauth $(MAKE) app-via-docker-386
+
+package-vmauth:
+	APP_NAME=vmauth $(MAKE) package-via-docker
+
+package-vmauth-pure:
+	APP_NAME=vmauth $(MAKE) package-via-docker-pure
+
+package-vmauth-amd64:
+	APP_NAME=vmauth $(MAKE) package-via-docker-amd64
+
+package-vmauth-arm:
+	APP_NAME=vmauth $(MAKE) package-via-docker-arm
+
+package-vmauth-arm64:
+	APP_NAME=vmauth $(MAKE) package-via-docker-arm64
+
+package-vmauth-ppc64le:
+	APP_NAME=vmauth $(MAKE) package-via-docker-ppc64le
+
+package-vmauth-386:
+	APP_NAME=vmauth $(MAKE) package-via-docker-386
+
+publish-vmauth:
+	APP_NAME=vmauth $(MAKE) publish-via-docker
+
+run-vmauth:
+	APP_NAME=vmauth \
+	DOCKER_OPTS='-v $(shell pwd)/app/vmauth/:/app/vmauth' \
+	ARGS='-auth.config=app/vmauth/example_config.yml' \
+	$(MAKE) run-via-docker
+
+vmauth-amd64:
+	CGO_ENABLED=1 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-amd64 ./app/vmauth
+
+vmauth-arm:
+	CGO_ENABLED=0 GOOS=linux GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-arm ./app/vmauth
+
+vmauth-arm64:
+	CGO_ENABLED=0 GOOS=linux GOARCH=arm64 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-arm64 ./app/vmauth
+
+vmauth-ppc64le:
+	CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-ppc64le ./app/vmauth
+
+vmauth-386:
+	CGO_ENABLED=0 GOOS=linux GOARCH=386 GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/vmauth-386 ./app/vmauth
+
+vmauth-pure:
+	APP_NAME=vmauth $(MAKE) app-local-pure
--- a/app/vmauth/README.md
+++ b/app/vmauth/README.md
@@ -0,0 +1,139 @@
+## vmauth
+
+`vmauth` is a simple auth proxy and router for [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics).
+It reads username and password from [Basic Auth headers](https://en.wikipedia.org/wiki/Basic_access_authentication),
+matches them against configs pointed by `-auth.config` command-line flag and proxies incoming HTTP requests to the configured per-user `url_prefix` on successful match.
+
+
+### Quick start
+
+Just download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases), unpack it
+and pass the following flag to `vmauth` binary in order to start authorizing and routing requests:
+
+```
+/path/to/vmauth -auth.config=/path/to/auth/config.yml
+```
+
+After that `vmauth` starts accepting HTTP requests on port `8427` and routing them according to the provided [-auth.config](#auth-config).
+The port can be modified via `-httpListenAddr` command-line flag.
+
+The auth config can be reloaded by passing `SIGHUP` signal to `vmauth`.
+
+Docker images for `vmauth` are available [here](https://hub.docker.com/r/victoriametrics/vmauth/tags).
+
+Pass `-help` to `vmauth` in order to see all the supported command-line flags with their descriptions.
+
+Feel free [contacting us](mailto:info@victoriametrics.com) if you need customized auth proxy for VictoriaMetrics with the support of LDAP, SSO, RBAC, SAML, accounting, limits, etc.
+
+
+### Auth config
+
+Auth config is represented in the following simple `yml` format:
+
+```yml
+
+# Arbitrary number of usernames may be put here.
+# Usernames must be unique.
+
+users:
+
+  # The user for querying local single-node VictoriaMetrics.
+  # All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
+  # will be routed to http://localhost:8428 .
+  # For example, http://vmauth:8427/api/v1/query is routed to http://localhost:8428/api/v1/query
+- username: "local-single-node"
+  password: "***"
+  url_prefix: "http://localhost:8428"
+
+  # The user for querying account 123 in VictoriaMetrics cluster
+  # See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format
+  # All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
+  # will be routed to http://vmselect:8481/select/123/prometheus .
+  # For example, http://vmauth:8427/api/v1/query is routed to http://vmselect:8481/select/123/prometheus/api/v1/select
+- username: "cluster-select-account-123"
+  password: "***"
+  url_prefix: "http://vmselect:8481/select/123/prometheus"
+
+  # The user for inserting Prometheus data into VictoriaMetrics cluster under account 42
+  # See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format
+  # All the reuqests to http://vmauth:8427 with the given Basic Auth (username:password)
+  # will be routed to http://vminsert:8480/insert/42/prometheus .
+  # For example, http://vmauth:8427/api/v1/write is routed to http://vminsert:8480/insert/42/prometheus/api/v1/write
+- username: "cluster-insert-account-42"
+  password: "***"
+  url_prefix: "http://vminsert:8480/insert/42/prometheus"
+```
+
+
+### Security
+
+Do not transfer Basic Auth headers in plaintext over untrusted networks. Enable https. This can be done by passing the following `-tls*` command-line flags to `vmauth`:
+
+```
+  -tls
+    	Whether to enable TLS (aka HTTPS) for incoming requests. -tlsCertFile and -tlsKeyFile must be set if -tls is set
+  -tlsCertFile string
+    	Path to file with TLS certificate. Used only if -tls is set. Prefer ECDSA certs instead of RSA certs, since RSA certs are slow
+  -tlsKeyFile string
+    	Path to file with TLS key. Used only if -tls is set
+```
+
+Alternatively, [https termination proxy](https://en.wikipedia.org/wiki/TLS_termination_proxy) may be put in front of `vmauth`.
+
+
+### Monitoring
+
+`vmauth` exports various metrics in Prometheus exposition format at `http://vmauth-host:8427/metrics` page. It is recommended setting up regular scraping of this page
+either via [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) or via Prometheus, so the exported metrics could be analyzed later.
+
+
+### How to build from sources
+
+It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmauth` is located in `vmutils-*` archives there.
+
+
+#### Development build
+
+1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.13.
+2. Run `make vmauth` from the root folder of the repository.
+   It builds `vmauth` binary and puts it into the `bin` folder.
+
+#### Production build
+
+1. [Install docker](https://docs.docker.com/install/).
+2. Run `make vmauth-prod` from the root folder of the repository.
+   It builds `vmauth-prod` binary and puts it into the `bin` folder.
+
+#### Building docker images
+
+Run `make package-vmauth`. It builds `victoriametrics/vmauth:<PKG_TAG>` docker image locally.
+`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
+The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmauth`.
+
+By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
+by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
+
+```bash
+ROOT_IMAGE=alpine:3.11 make package-vmauth
+```
+
+
+### Profiling
+
+`vmauth` provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
+
+* Memory profile. It can be collected with the following command:
+
+```bash
+curl -s http://<vmauth-host>:8427/debug/pprof/heap > mem.pprof
+```
+
+* CPU profile. It can be collected with the following command:
+
+```bash
+curl -s http://<vmauth-host>:8427/debug/pprof/profile > cpu.pprof
+```
+
+The command for collecting CPU profile waits for 30 seconds before returning.
+
+The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
--- a/app/vmauth/auth_config.go
+++ b/app/vmauth/auth_config.go
@@ -0,0 +1,129 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"net/url"
+	"strings"
+	"sync"
+	"sync/atomic"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
+	"github.com/VictoriaMetrics/metrics"
+	"gopkg.in/yaml.v2"
+)
+
+var (
+	authConfigPath = flag.String("auth.config", "", "Path to auth config. See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/README.md "+
+		"for details on the format of this auth config")
+)
+
+// AuthConfig represents auth config.
+type AuthConfig struct {
+	Users []UserInfo `yaml:"users"`
+}
+
+// UserInfo is user information read from authConfigPath
+type UserInfo struct {
+	Username  string `yaml:"username"`
+	Password  string `yaml:"password"`
+	URLPrefix string `yaml:"url_prefix"`
+
+	requests *metrics.Counter
+}
+
+func initAuthConfig() {
+	if len(*authConfigPath) == 0 {
+		logger.Fatalf("missing required `-auth.config` command-line flag")
+	}
+	m, err := readAuthConfig(*authConfigPath)
+	if err != nil {
+		logger.Fatalf("cannot load auth config from `-auth.config=%s`: %s", *authConfigPath, err)
+	}
+	authConfig.Store(m)
+	stopCh = make(chan struct{})
+	authConfigWG.Add(1)
+	go func() {
+		defer authConfigWG.Done()
+		authConfigReloader()
+	}()
+}
+
+func stopAuthConfig() {
+	close(stopCh)
+	authConfigWG.Wait()
+}
+
+func authConfigReloader() {
+	sighupCh := procutil.NewSighupChan()
+	for {
+		select {
+		case <-stopCh:
+			return
+		case <-sighupCh:
+			logger.Infof("SIGHUP received; loading -auth.config=%q", *authConfigPath)
+			m, err := readAuthConfig(*authConfigPath)
+			if err != nil {
+				logger.Errorf("failed to load -auth.config=%q; using the last successfully loaded config; error: %s", *authConfigPath, err)
+				continue
+			}
+			authConfig.Store(m)
+			logger.Infof("Successfully reloaded -auth.config=%q", *authConfigPath)
+		}
+	}
+}
+
+var authConfig atomic.Value
+var authConfigWG sync.WaitGroup
+var stopCh chan struct{}
+
+func readAuthConfig(path string) (map[string]*UserInfo, error) {
+	data, err := ioutil.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read %q: %s", path, err)
+	}
+	m, err := parseAuthConfig(data)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse %q: %s", path, err)
+	}
+	logger.Infof("Loaded information about %d users from %q", len(m), path)
+	return m, nil
+}
+
+func parseAuthConfig(data []byte) (map[string]*UserInfo, error) {
+	var ac AuthConfig
+	if err := yaml.UnmarshalStrict(data, &ac); err != nil {
+		return nil, fmt.Errorf("cannot unmarshal AuthConfig data: %s", err)
+	}
+	uis := ac.Users
+	if len(uis) == 0 {
+		return nil, fmt.Errorf("`users` section cannot be empty in AuthConfig")
+	}
+	m := make(map[string]*UserInfo, len(uis))
+	for i := range uis {
+		ui := &uis[i]
+		if m[ui.Username] != nil {
+			return nil, fmt.Errorf("duplicate username found; username: %q", ui.Username)
+		}
+		urlPrefix := ui.URLPrefix
+		// Remove trailing '/' from urlPrefix
+		for strings.HasSuffix(urlPrefix, "/") {
+			urlPrefix = urlPrefix[:len(urlPrefix)-1]
+		}
+		// Validate urlPrefix
+		target, err := url.Parse(urlPrefix)
+		if err != nil {
+			return nil, fmt.Errorf("invalid `url_prefix: %q`: %s", urlPrefix, err)
+		}
+		if target.Scheme != "http" && target.Scheme != "https" {
+			return nil, fmt.Errorf("unsupported scheme for `url_prefix: %q`: %q; must be `http` or `https`", urlPrefix, target.Scheme)
+		}
+
+		ui.URLPrefix = urlPrefix
+		ui.requests = metrics.GetOrCreateCounter(fmt.Sprintf(`vmauth_user_requests_total{username=%q}`, ui.Username))
+		m[ui.Username] = ui
+	}
+	return m, nil
+}
--- a/app/vmauth/auth_config_test.go
+++ b/app/vmauth/auth_config_test.go
@@ -0,0 +1,112 @@
+package main
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestParseAuthConfigFailure(t *testing.T) {
+	f := func(s string) {
+		t.Helper()
+		_, err := parseAuthConfig([]byte(s))
+		if err == nil {
+			t.Fatalf("expecting non-nil error")
+		}
+	}
+
+	// Empty config
+	f(``)
+
+	// Invalid entry
+	f(`foobar`)
+	f(`foobar: baz`)
+
+	// Empty users
+	f(`users: []`)
+
+	// Missing url_prefix
+	f(`
+users:
+- username: foo
+`)
+
+	// Invalid url_prefix
+	f(`
+users:
+- username: foo
+  url_prefix: bar
+`)
+	f(`
+users:
+- username: foo
+  url_prefix: ftp://bar
+`)
+	f(`
+users:
+- username: foo
+  url_prefix: //bar
+`)
+
+	// Duplicate users
+	f(`
+users:
+- username: foo
+  url_prefix: http://foo.bar
+- username: bar
+  url_prefix: http://xxx.yyy
+- username: foo
+  url_prefix: https://sss.sss
+`)
+}
+
+func TestParseAuthConfigSuccess(t *testing.T) {
+	f := func(s string, expectedAuthConfig map[string]*UserInfo) {
+		t.Helper()
+		m, err := parseAuthConfig([]byte(s))
+		if err != nil {
+			t.Fatalf("unexpected error: %s", err)
+		}
+		removeMetrics(m)
+		if !reflect.DeepEqual(m, expectedAuthConfig) {
+			t.Fatalf("unexpected auth config\ngot\n%v\nwant\n%v", m, expectedAuthConfig)
+		}
+	}
+
+	// Single user
+	f(`
+users:
+- username: foo
+  password: bar
+  url_prefix: http://aaa:343/bbb
+`, map[string]*UserInfo{
+		"foo": {
+			Username:  "foo",
+			Password:  "bar",
+			URLPrefix: "http://aaa:343/bbb",
+		},
+	})
+
+	// Multiple users
+	f(`
+users:
+- username: foo
+  url_prefix: http://foo
+- username: bar
+  url_prefix: https://bar/x///
+`, map[string]*UserInfo{
+		"foo": {
+			Username:  "foo",
+			URLPrefix: "http://foo",
+		},
+		"bar": {
+			Username:  "bar",
+			URLPrefix: "https://bar/x",
+		},
+	})
+}
+
+func removeMetrics(m map[string]*UserInfo) {
+	for _, info := range m {
+		info.requests = nil
+	}
+}
--- a/app/vmauth/deployment/Dockerfile
+++ b/app/vmauth/deployment/Dockerfile
@@ -0,0 +1,8 @@
+ARG base_image
+FROM $base_image
+
+EXPOSE 8427
+
+ENTRYPOINT ["/vmauth-prod"]
+ARG src_binary
+COPY $src_binary ./vmauth-prod
--- a/app/vmauth/example_config.yml
+++ b/app/vmauth/example_config.yml
@@ -0,0 +1,31 @@
+# Arbitrary number of usernames may be put here.
+# Usernames must be unique.
+
+users:
+
+  # The user for querying local single-node VictoriaMetrics.
+  # All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
+  # will be routed to http://localhost:8428 .
+  # For example, http://vmauth:8427/api/v1/query is routed to http://localhost:8428/api/v1/query
+- username: "local-single-node"
+  password: "***"
+  url_prefix: "http://localhost:8428"
+
+  # The user for querying account 123 in VictoriaMetrics cluster
+  # See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format
+  # All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
+  # will be routed to http://vmselect:8481/select/123/prometheus .
+  # For example, http://vmauth:8427/api/v1/query is routed to http://vmselect:8481/select/123/prometheus/api/v1/select
+- username: "cluster-select-account-123"
+  password: "***"
+  url_prefix: "http://vmselect:8481/select/123/prometheus"
+
+  # The user for inserting Prometheus data into VictoriaMetrics cluster under account 42
+  # See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format
+  # All the reuqests to http://vmauth:8427 with the given Basic Auth (username:password)
+  # will be routed to http://vminsert:8480/insert/42/prometheus .
+  # For example, http://vmauth:8427/api/v1/write is routed to http://vminsert:8480/insert/42/prometheus/api/v1/write
+- username: "cluster-insert-account-42"
+  password: "***"
+  url_prefix: "http://vminsert:8480/insert/42/prometheus"
+
--- a/app/vmauth/main.go
+++ b/app/vmauth/main.go
@@ -0,0 +1,104 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"net/http"
+	"net/http/httputil"
+	"net/url"
+	"os"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
+)
+
+var (
+	httpListenAddr = flag.String("httpListenAddr", ":8427", "TCP address to listen for http connections")
+)
+
+func main() {
+	// Write flags and help message to stdout, since it is easier to grep or pipe.
+	flag.CommandLine.SetOutput(os.Stdout)
+	flag.Usage = usage
+	envflag.Parse()
+	buildinfo.Init()
+	logger.Init()
+	logger.Infof("starting vmauth at %q...", *httpListenAddr)
+	startTime := time.Now()
+	initAuthConfig()
+	go httpserver.Serve(*httpListenAddr, requestHandler)
+	logger.Infof("started vmauth in %.3f seconds", time.Since(startTime).Seconds())
+
+	sig := procutil.WaitForSigterm()
+	logger.Infof("received signal %s", sig)
+
+	startTime = time.Now()
+	logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
+	if err := httpserver.Stop(*httpListenAddr); err != nil {
+		logger.Fatalf("cannot stop the webservice: %s", err)
+	}
+	logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
+	stopAuthConfig()
+	logger.Infof("successfully stopped vmauth in %.3f seconds", time.Since(startTime).Seconds())
+}
+
+func requestHandler(w http.ResponseWriter, r *http.Request) bool {
+	username, password, ok := r.BasicAuth()
+	if !ok {
+		httpserver.Errorf(w, "Missing `Authorization: Basic *` header")
+		return true
+	}
+	ac := authConfig.Load().(map[string]*UserInfo)
+	info := ac[username]
+	if info == nil || info.Password != password {
+		httpserver.Errorf(w, "Cannot find the provided username %q or password in config", username)
+		return true
+	}
+	info.requests.Inc()
+
+	targetURL := createTargetURL(info.URLPrefix, r.URL)
+	if _, err := url.Parse(targetURL); err != nil {
+		httpserver.Errorf(w, "Invalid targetURL=%q: %s", targetURL, err)
+		return true
+	}
+	r.Header.Set("vm-target-url", targetURL)
+	reverseProxy.ServeHTTP(w, r)
+	return true
+}
+
+var reverseProxy = &httputil.ReverseProxy{
+	Director: func(r *http.Request) {
+		targetURL := r.Header.Get("vm-target-url")
+		target, err := url.Parse(targetURL)
+		if err != nil {
+			logger.Panicf("BUG: unexpected error when parsing targetURL=%q: %s", targetURL, err)
+		}
+		r.URL = target
+	},
+	Transport: func() *http.Transport {
+		tr := http.DefaultTransport.(*http.Transport).Clone()
+		// Automatic compression must be disabled in order to fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/535
+		tr.DisableCompression = true
+		// Disable HTTP/2.0, since VictoriaMetrics components don't support HTTP/2.0 (because there is no sense in this).
+		tr.ForceAttemptHTTP2 = false
+		return tr
+	}(),
+	FlushInterval: time.Second,
+	ErrorLog:      logger.StdErrorLogger(),
+}
+
+func usage() {
+	const s = `
+vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.
+
+See the docs at https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/README.md .
+`
+
+	f := flag.CommandLine.Output()
+	fmt.Fprintf(f, "%s\n", s)
+	flag.PrintDefaults()
+}
--- a/app/vmauth/target_url.go
+++ b/app/vmauth/target_url.go
@@ -0,0 +1,16 @@
+package main
+
+import (
+	"net/url"
+	"path"
+	"strings"
+)
+
+func createTargetURL(prefix string, u *url.URL) string {
+	// Prevent from attacks with using `..` in r.URL.Path
+	u.Path = path.Clean(u.Path)
+	if !strings.HasPrefix(u.Path, "/") {
+		u.Path = "/" + u.Path
+	}
+	return prefix + u.RequestURI()
+}
--- a/app/vmauth/target_url_test.go
+++ b/app/vmauth/target_url_test.go
@@ -0,0 +1,26 @@
+package main
+
+import (
+	"net/url"
+	"testing"
+)
+
+func TestCreateTargetURL(t *testing.T) {
+	f := func(prefix, requestURI, expectedTarget string) {
+		t.Helper()
+		u, err := url.Parse(requestURI)
+		if err != nil {
+			t.Fatalf("cannot parse %q: %s", requestURI, err)
+		}
+		target := createTargetURL(prefix, u)
+		if target != expectedTarget {
+			t.Fatalf("unexpected target; got %q; want %q", target, expectedTarget)
+		}
+	}
+	f("http://foo.bar", "", "http://foo.bar/.")
+	f("http://foo.bar", "/", "http://foo.bar/")
+	f("http://foo.bar", "a/b?c=d", "http://foo.bar/a/b?c=d")
+	f("https://sss:3894/x/y", "/z", "https://sss:3894/x/y/z")
+	f("https://sss:3894/x/y", "/../../aaa", "https://sss:3894/x/y/aaa")
+	f("https://sss:3894/x/y", "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s/../d")
+}
--- a/app/vmbackup/README.md
+++ b/app/vmbackup/README.md
@@ -19,6 +19,9 @@ Backed up data can be restored with [vmrestore](https://github.com/VictoriaMetri

 See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.

+See also [vmbackuper](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) tool built on top of `vmbackup`. This tool simplifies
+creation of hourly, daily, weekly and monthly backups.
+

 ### Use cases

@@ -86,6 +89,8 @@ or from any day (`YYYYMMDD` backups). Note that hourly backup shouldn't run when

 Do not forget removing old snapshots and backups when they are no longer needed for saving storage costs.

+See also [vmbackuper tool](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) for automating smart backups.
+

 ### How does it work?

@@ -118,6 +123,8 @@ See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-
 * If the backup is slow, then try setting higher value for `-concurrency` flag. This will increase the number of concurrent workers that upload data to backup storage.
 * If `vmbackup` eats all the network bandwidth, then set `-maxBytesPerSecond` to the desired value.
 * If `vmbackup` has been interrupted due to temporary error, then just restart it with the same args. It will resume the backup process.
+* Backups created from [single-node VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md) cannot be restored
+  at [cluster VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md) and vice versa.


 ### Advanced usage
--- a/app/vmbackup/main.go
+++ b/app/vmbackup/main.go
@@ -31,6 +31,8 @@ var (
 )

 func main() {
+	// Write flags and help message to stdout, since it is easier to grep or pipe.
+	flag.CommandLine.SetOutput(os.Stdout)
 	flag.Usage = usage
 	envflag.Parse()
 	buildinfo.Init()
--- a/app/vminsert/main.go
+++ b/app/vminsert/main.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"net/http"
 	"strings"
+	"sync/atomic"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/csvimport"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/graphite"
@@ -19,6 +20,7 @@ import (
 	influxserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/influx"
 	opentsdbserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/opentsdb"
 	opentsdbhttpserver "github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/opentsdbhttp"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
@@ -130,6 +132,11 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 		w.Header().Set("Content-Type", "text/plain")
 		promscrape.WriteHumanReadableTargetsStatus(w)
 		return true
+	case "/-/reload":
+		promscrapeConfigReloadRequests.Inc()
+		procutil.SelfSIGHUP()
+		w.WriteHeader(http.StatusNoContent)
+		return true
 	default:
 		// This is not our link
 		return false
@@ -152,4 +159,16 @@ var (
 	influxQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/query", protocol="influx"}`)

 	promscrapeTargetsRequests = metrics.NewCounter(`vm_http_requests_total{path="/targets"}`)
+
+	promscrapeConfigReloadRequests = metrics.NewCounter(`vm_http_requests_total{path="/-/reload"}`)
+
+	_ = metrics.NewGauge(`vm_metrics_with_dropped_labels_total`, func() float64 {
+		return float64(atomic.LoadUint64(&storage.MetricsWithDroppedLabels))
+	})
+	_ = metrics.NewGauge(`vm_too_long_label_names_total`, func() float64 {
+		return float64(atomic.LoadUint64(&storage.TooLongLabelNames))
+	})
+	_ = metrics.NewGauge(`vm_too_long_label_values_total`, func() float64 {
+		return float64(atomic.LoadUint64(&storage.TooLongLabelValues))
+	})
 )
--- a/app/vmrestore/README.md
+++ b/app/vmrestore/README.md
@@ -21,7 +21,8 @@ vmrestore -src=gcs://<bucket>/<path/to/backup> -storageDataPath=<local/path/to/r
 * `<local/path/to/restore>` is the path to folder where data will be restored. This folder must be passed
  to VictoriaMetrics in `-storageDataPath` command-line flag after the restore process is complete.

-The original `-storageDataPath` directory may contain old files. They will be susbstituted by the files from backup.
+The original `-storageDataPath` directory may contain old files. They will be susbstituted by the files from backup,
+i.e. the end result would be similar to [rsync --delete](https://askubuntu.com/questions/476041/how-do-i-make-rsync-delete-files-that-have-been-deleted-from-the-source-folder).


 ### Troubleshooting
@@ -52,7 +53,7 @@ Run `vmrestore -help` in order to see all the available options:
  -envflag.prefix string
    	Prefix for environment variables if -envflag.enable is set
  -fs.disableMmap
-    	Whether to use pread() instead of mmap() for reading data files
+    	Whether to use pread() instead of mmap() for reading data files. By default mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot data files bigger than 2^32 bytes in memory
  -loggerFormat string
    	Format for logs. Possible values: default, json (default "default")
  -loggerLevel string
@@ -68,8 +69,8 @@ Run `vmrestore -help` in order to see all the available options:
  -src string
    	Source path with backup on the remote storage. Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
  -storageDataPath string
-    	Destination path where backup must be restored. VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case only missing data is downloaded from backup (default "victoria-metrics-data")
-  -version
+    	Destination path where backup must be restored. VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case the contents of -storageDataPath dir is synchronized with -src contents, i.e. it works like 'rsync --delete' (default "victoria-metrics-data")
+ -version
    	Show VictoriaMetrics version
 ```

--- a/app/vmrestore/main.go
+++ b/app/vmrestore/main.go
@@ -3,6 +3,7 @@ package main
 import (
 	"flag"
 	"fmt"
+	"os"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/actions"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/backup/common"
@@ -16,13 +17,16 @@ var (
 	src = flag.String("src", "", "Source path with backup on the remote storage. "+
 		"Example: gcs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir")
 	storageDataPath = flag.String("storageDataPath", "victoria-metrics-data", "Destination path where backup must be restored. "+
-		"VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case only missing data is downloaded from backup")
+		"VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case the contents of -storageDataPath dir "+
+		"is synchronized with -src contents, i.e. it works like 'rsync --delete'")
 	concurrency             = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce restore duration")
 	maxBytesPerSecond       = flag.Int("maxBytesPerSecond", 0, "The maximum download speed. There is no limit if it is set to 0")
 	skipBackupCompleteCheck = flag.Bool("skipBackupCompleteCheck", false, "Whether to skip checking for 'backup complete' file in -src. This may be useful for restoring from old backups, which were created without 'backup complete' file")
 )

 func main() {
+	// Write flags and help message to stdout, since it is easier to grep or pipe.
+	flag.CommandLine.SetOutput(os.Stdout)
 	flag.Usage = usage
 	envflag.Parse()
 	buildinfo.Init()
--- a/app/vmselect/netstorage/netstorage.go
+++ b/app/vmselect/netstorage/netstorage.go
@@ -7,13 +7,12 @@ import (
 	"runtime"
 	"sort"
 	"sync"
-	"sync/atomic"
 	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
 )
@@ -72,6 +71,50 @@ func (rss *Results) mustClose() {
 	rss.sr = nil
 }

+var timeseriesWorkCh = make(chan *timeseriesWork, gomaxprocs)
+
+type timeseriesWork struct {
+	rss    *Results
+	pts    *packedTimeseries
+	f      func(rs *Result, workerID uint)
+	doneCh chan error
+
+	rowsProcessed int
+}
+
+func init() {
+	for i := 0; i < gomaxprocs; i++ {
+		go timeseriesWorker(uint(i))
+	}
+}
+
+func timeseriesWorker(workerID uint) {
+	var rs Result
+	var rsLastResetTime uint64
+	for tsw := range timeseriesWorkCh {
+		rss := tsw.rss
+		if time.Until(rss.deadline.Deadline) < 0 {
+			tsw.doneCh <- fmt.Errorf("timeout exceeded during query execution: %s", rss.deadline.String())
+			continue
+		}
+		if err := tsw.pts.Unpack(&rs, rss.tr, rss.fetchData); err != nil {
+			tsw.doneCh <- fmt.Errorf("error during time series unpacking: %s", err)
+			continue
+		}
+		if len(rs.Timestamps) > 0 || !rss.fetchData {
+			tsw.f(&rs, workerID)
+		}
+		tsw.rowsProcessed = len(rs.Values)
+		tsw.doneCh <- nil
+		currentTime := fasttime.UnixTimestamp()
+		if cap(rs.Values) > 1024*1024 && 4*len(rs.Values) < cap(rs.Values) && currentTime-rsLastResetTime > 10 {
+			// Reset rs in order to preseve memory usage after processing big time series with millions of rows.
+			rs = Result{}
+			rsLastResetTime = currentTime
+		}
+	}
+}
+
 // RunParallel runs in parallel f for all the results from rss.
 //
 // f shouldn't hold references to rs after returning.
@@ -81,72 +124,36 @@ func (rss *Results) mustClose() {
 func (rss *Results) RunParallel(f func(rs *Result, workerID uint)) error {
 	defer rss.mustClose()

-	workersCount := 1 + len(rss.packedTimeseries)/32
-	if workersCount > gomaxprocs {
-		workersCount = gomaxprocs
-	}
-	if workersCount == 0 {
-		logger.Panicf("BUG: workersCount cannot be zero")
-	}
-	workCh := make(chan *packedTimeseries, workersCount)
-	doneCh := make(chan error)
-
-	// Start workers.
-	rowsProcessedTotal := uint64(0)
-	for i := 0; i < workersCount; i++ {
-		go func(workerID uint) {
-			rs := getResult()
-			defer putResult(rs)
-			maxWorkersCount := gomaxprocs / workersCount
-
-			var err error
-			rowsProcessed := 0
-			for pts := range workCh {
-				if time.Until(rss.deadline.Deadline) < 0 {
-					err = fmt.Errorf("timeout exceeded during query execution: %s", rss.deadline.String())
-					break
-				}
-				if err = pts.Unpack(rs, rss.tr, rss.fetchData, maxWorkersCount); err != nil {
-					break
-				}
-				if len(rs.Timestamps) == 0 && rss.fetchData {
-					// Skip empty blocks.
-					continue
-				}
-				rowsProcessed += len(rs.Values)
-				f(rs, workerID)
-			}
-			atomic.AddUint64(&rowsProcessedTotal, uint64(rowsProcessed))
-			// Drain the remaining work
-			for range workCh {
-			}
-			doneCh <- err
-		}(uint(i))
-	}
-
 	// Feed workers with work.
+	tsws := make([]*timeseriesWork, len(rss.packedTimeseries))
 	for i := range rss.packedTimeseries {
-		workCh <- &rss.packedTimeseries[i]
+		tsw := &timeseriesWork{
+			rss:    rss,
+			pts:    &rss.packedTimeseries[i],
+			f:      f,
+			doneCh: make(chan error, 1),
+		}
+		timeseriesWorkCh <- tsw
+		tsws[i] = tsw
 	}
 	seriesProcessedTotal := len(rss.packedTimeseries)
 	rss.packedTimeseries = rss.packedTimeseries[:0]
-	close(workCh)

-	// Wait until workers finish.
-	var errors []error
-	for i := 0; i < workersCount; i++ {
-		if err := <-doneCh; err != nil {
-			errors = append(errors, err)
+	// Wait until work is complete.
+	var firstErr error
+	rowsProcessedTotal := 0
+	for _, tsw := range tsws {
+		if err := <-tsw.doneCh; err != nil && firstErr == nil {
+			// Return just the first error, since other errors
+			// are likely duplicate the first error.
+			firstErr = err
 		}
+		rowsProcessedTotal += tsw.rowsProcessed
 	}
+
 	perQueryRowsProcessed.Update(float64(rowsProcessedTotal))
 	perQuerySeriesProcessed.Update(float64(seriesProcessedTotal))
-	if len(errors) > 0 {
-		// Return just the first error, since other errors
-		// is likely duplicate the first error.
-		return errors[0]
-	}
-	return nil
+	return firstErr
 }

 var perQueryRowsProcessed = metrics.NewHistogram(`vm_per_query_rows_processed_count`)
@@ -159,70 +166,74 @@ type packedTimeseries struct {
 	brs        []storage.BlockRef
 }

+var unpackWorkCh = make(chan *unpackWork, gomaxprocs)
+
+type unpackWork struct {
+	br        storage.BlockRef
+	tr        storage.TimeRange
+	fetchData bool
+	doneCh    chan error
+	sb        *sortBlock
+}
+
+func init() {
+	for i := 0; i < gomaxprocs; i++ {
+		go unpackWorker()
+	}
+}
+
+func unpackWorker() {
+	for upw := range unpackWorkCh {
+		sb := getSortBlock()
+		if err := sb.unpackFrom(upw.br, upw.tr, upw.fetchData); err != nil {
+			putSortBlock(sb)
+			upw.doneCh <- fmt.Errorf("cannot unpack block: %s", err)
+			continue
+		}
+		upw.sb = sb
+		upw.doneCh <- nil
+	}
+}
+
 // Unpack unpacks pts to dst.
-func (pts *packedTimeseries) Unpack(dst *Result, tr storage.TimeRange, fetchData bool, maxWorkersCount int) error {
+func (pts *packedTimeseries) Unpack(dst *Result, tr storage.TimeRange, fetchData bool) error {
 	dst.reset()

 	if err := dst.MetricName.Unmarshal(bytesutil.ToUnsafeBytes(pts.metricName)); err != nil {
 		return fmt.Errorf("cannot unmarshal metricName %q: %s", pts.metricName, err)
 	}

-	workersCount := 1 + len(pts.brs)/32
-	if workersCount > maxWorkersCount {
-		workersCount = maxWorkersCount
-	}
-	if workersCount == 0 {
-		logger.Panicf("BUG: workersCount cannot be zero")
-	}
-
-	sbs := make([]*sortBlock, 0, len(pts.brs))
-	var sbsLock sync.Mutex
-
-	workCh := make(chan storage.BlockRef, workersCount)
-	doneCh := make(chan error)
-
-	// Start workers
-	for i := 0; i < workersCount; i++ {
-		go func() {
-			var err error
-			for br := range workCh {
-				sb := getSortBlock()
-				if err = sb.unpackFrom(br, tr, fetchData); err != nil {
-					break
-				}
-
-				sbsLock.Lock()
-				sbs = append(sbs, sb)
-				sbsLock.Unlock()
-			}
-
-			// Drain the remaining work
-			for range workCh {
-			}
-			doneCh <- err
-		}()
-	}
-
 	// Feed workers with work
-	for _, br := range pts.brs {
-		workCh <- br
+	upws := make([]*unpackWork, len(pts.brs))
+	for i, br := range pts.brs {
+		upw := &unpackWork{
+			br:        br,
+			tr:        tr,
+			fetchData: fetchData,
+			doneCh:    make(chan error, 1),
+		}
+		unpackWorkCh <- upw
+		upws[i] = upw
 	}
 	pts.brs = pts.brs[:0]
-	close(workCh)

-	// Wait until workers finish
-	var errors []error
-	for i := 0; i < workersCount; i++ {
-		if err := <-doneCh; err != nil {
-			errors = append(errors, err)
+	// Wait until work is complete
+	sbs := make([]*sortBlock, 0, len(pts.brs))
+	var firstErr error
+	for _, upw := range upws {
+		if err := <-upw.doneCh; err != nil && firstErr == nil {
+			// Return the first error only, since other errors are likely the same.
+			firstErr = err
+		}
+		if firstErr == nil {
+			sbs = append(sbs, upw.sb)
+		} else {
+			putSortBlock(upw.sb)
 		}
 	}
-	if len(errors) > 0 {
-		// Return the first error only, since other errors are likely the same.
-		return errors[0]
+	if firstErr != nil {
+		return firstErr
 	}
-
-	// Merge blocks
 	mergeSortBlocks(dst, sbs)
 	return nil
 }
@@ -537,25 +548,6 @@ func ProcessSearchQuery(sq *storage.SearchQuery, fetchData bool, deadline Deadli
 	return &rss, nil
 }

-func getResult() *Result {
-	v := rsPool.Get()
-	if v == nil {
-		return &Result{}
-	}
-	return v.(*Result)
-}
-
-func putResult(rs *Result) {
-	if len(rs.Values) > 8192 {
-		// Do not pool big results, since they may occupy too much memory.
-		return
-	}
-	rs.reset()
-	rsPool.Put(rs)
-}
-
-var rsPool sync.Pool
-
 func setupTfss(tagFilterss [][]storage.TagFilter) ([]*storage.TagFilters, error) {
 	tfss := make([]*storage.TagFilters, 0, len(tagFilterss))
 	for _, tagFilters := range tagFilterss {
--- a/app/vmselect/prometheus/prometheus.go
+++ b/app/vmselect/prometheus/prometheus.go
@@ -15,10 +15,11 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
+	"github.com/VictoriaMetrics/metricsql"
 	"github.com/valyala/fastjson/fastfloat"
 	"github.com/valyala/quicktemplate"
 )
@@ -395,14 +396,14 @@ func TSDBStatusHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
 	if err := r.ParseForm(); err != nil {
 		return fmt.Errorf("cannot parse form values: %s", err)
 	}
-	date := time.Now().Unix() / secsPerDay
+	date := fasttime.UnixDate()
 	dateStr := r.FormValue("date")
 	if len(dateStr) > 0 {
 		t, err := time.Parse("2006-01-02", dateStr)
 		if err != nil {
 			return fmt.Errorf("cannot parse `date` arg %q: %s", dateStr, err)
 		}
-		date = t.Unix() / secsPerDay
+		date = uint64(t.Unix()) / secsPerDay
 	}
 	topN := 10
 	topNStr := r.FormValue("topN")
@@ -419,7 +420,7 @@ func TSDBStatusHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
 		}
 		topN = n
 	}
-	status, err := netstorage.GetTSDBStatusForDate(deadline, uint64(date), topN)
+	status, err := netstorage.GetTSDBStatusForDate(deadline, date, topN)
 	if err != nil {
 		return fmt.Errorf(`cannot obtain tsdb status for date=%d, topN=%d: %s`, date, topN, err)
 	}
@@ -992,7 +993,7 @@ func getBool(r *http.Request, argKey string) bool {
 }

 func currentTime() int64 {
-	return int64(time.Now().UTC().Unix()) * 1e3
+	return int64(fasttime.UnixTimestamp() * 1000)
 }

 func getTagFilterssFromMatches(matches []string) ([][]storage.TagFilter, error) {
--- a/app/vmselect/promql/aggr.go
+++ b/app/vmselect/promql/aggr.go
@@ -8,9 +8,9 @@ import (
 	"strings"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
+	"github.com/VictoriaMetrics/metricsql"
 	"github.com/valyala/histogram"
 )

@@ -43,6 +43,8 @@ var aggrFuncs = map[string]aggrFunc{
 	"bottomk_max":    newAggrFuncRangeTopK(maxValue, true),
 	"bottomk_avg":    newAggrFuncRangeTopK(avgValue, true),
 	"bottomk_median": newAggrFuncRangeTopK(medianValue, true),
+	"any":            newAggrFunc(aggrFuncAny),
+	"outliersk":      aggrFuncOutliersK,
 }

 type aggrFunc func(afa *aggrFuncArg) ([]*timeseries, error)
@@ -64,7 +66,7 @@ func newAggrFunc(afe func(tss []*timeseries) []*timeseries) aggrFunc {
 		if err := expectTransformArgsNum(args, 1); err != nil {
 			return nil, err
 		}
-		return aggrFuncExt(afe, args[0], &afa.ae.Modifier, false)
+		return aggrFuncExt(afe, args[0], &afa.ae.Modifier, afa.ae.Limit, false)
 	}
 }

@@ -80,7 +82,7 @@ func removeGroupTags(metricName *storage.MetricName, modifier *metricsql.Modifie
 	}
 }

-func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeseries, modifier *metricsql.ModifierExpr, keepOriginal bool) ([]*timeseries, error) {
+func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeseries, modifier *metricsql.ModifierExpr, maxSeries int, keepOriginal bool) ([]*timeseries, error) {
 	arg := copyTimeseriesMetricNames(argOrig)

 	// Perform grouping.
@@ -92,7 +94,13 @@ func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeserie
 		if keepOriginal {
 			ts = argOrig[i]
 		}
-		m[string(bb.B)] = append(m[string(bb.B)], ts)
+		tss := m[string(bb.B)]
+		if tss == nil && maxSeries > 0 && len(m) >= maxSeries {
+			// We already reached time series limit after grouping. Skip other time series.
+			continue
+		}
+		tss = append(tss, ts)
+		m[string(bb.B)] = tss
 	}
 	bbPool.Put(bb)

@@ -112,6 +120,10 @@ func aggrFuncExt(afe func(tss []*timeseries) []*timeseries, argOrig []*timeserie
 	return rvs, nil
 }

+func aggrFuncAny(tss []*timeseries) []*timeseries {
+	return tss[:1]
+}
+
 func aggrFuncSum(tss []*timeseries) []*timeseries {
 	if len(tss) == 1 {
 		// Fast path - nothing to sum.
@@ -441,7 +453,7 @@ func aggrFuncCountValues(afa *aggrFuncArg) ([]*timeseries, error) {
 		}
 		return rvs
 	}
-	return aggrFuncExt(afe, args[1], &afa.ae.Modifier, false)
+	return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, false)
 }

 func newAggrFuncTopK(isReverse bool) aggrFunc {
@@ -468,15 +480,10 @@ func newAggrFuncTopK(isReverse bool) aggrFunc {
 			}
 			return removeNaNs(tss)
 		}
-		return aggrFuncExt(afe, args[1], &afa.ae.Modifier, true)
+		return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
 	}
 }

-type tsWithValue struct {
-	ts    *timeseries
-	value float64
-}
-
 func newAggrFuncRangeTopK(f func(values []float64) float64, isReverse bool) aggrFunc {
 	return func(afa *aggrFuncArg) ([]*timeseries, error) {
 		args := afa.args
@@ -488,34 +495,42 @@ func newAggrFuncRangeTopK(f func(values []float64) float64, isReverse bool) aggr
 			return nil, err
 		}
 		afe := func(tss []*timeseries) []*timeseries {
-			maxs := make([]tsWithValue, len(tss))
-			for i, ts := range tss {
-				value := f(ts.Values)
-				maxs[i] = tsWithValue{
-					ts:    ts,
-					value: value,
-				}
-			}
-			sort.Slice(maxs, func(i, j int) bool {
-				a := maxs[i].value
-				b := maxs[j].value
-				if isReverse {
-					a, b = b, a
-				}
-				return lessWithNaNs(a, b)
-			})
-			for i := range maxs {
-				tss[i] = maxs[i].ts
-			}
-			for i, k := range ks {
-				fillNaNsAtIdx(i, k, tss)
-			}
-			return removeNaNs(tss)
+			return getRangeTopKTimeseries(tss, ks, f, isReverse)
 		}
-		return aggrFuncExt(afe, args[1], &afa.ae.Modifier, true)
+		return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
 	}
 }

+func getRangeTopKTimeseries(tss []*timeseries, ks []float64, f func(values []float64) float64, isReverse bool) []*timeseries {
+	type tsWithValue struct {
+		ts    *timeseries
+		value float64
+	}
+	maxs := make([]tsWithValue, len(tss))
+	for i, ts := range tss {
+		value := f(ts.Values)
+		maxs[i] = tsWithValue{
+			ts:    ts,
+			value: value,
+		}
+	}
+	sort.Slice(maxs, func(i, j int) bool {
+		a := maxs[i].value
+		b := maxs[j].value
+		if isReverse {
+			a, b = b, a
+		}
+		return lessWithNaNs(a, b)
+	})
+	for i := range maxs {
+		tss[i] = maxs[i].ts
+	}
+	for i, k := range ks {
+		fillNaNsAtIdx(i, k, tss)
+	}
+	return removeNaNs(tss)
+}
+
 func fillNaNsAtIdx(idx int, k float64, tss []*timeseries) {
 	if math.IsNaN(k) {
 		k = 0
@@ -577,16 +592,54 @@ func avgValue(values []float64) float64 {
 func medianValue(values []float64) float64 {
 	h := histogram.GetFast()
 	for _, v := range values {
-		if math.IsNaN(v) {
-			continue
+		if !math.IsNaN(v) {
+			h.Update(v)
 		}
-		h.Update(v)
 	}
 	value := h.Quantile(0.5)
 	histogram.PutFast(h)
 	return value
 }

+func aggrFuncOutliersK(afa *aggrFuncArg) ([]*timeseries, error) {
+	args := afa.args
+	if err := expectTransformArgsNum(args, 2); err != nil {
+		return nil, err
+	}
+	ks, err := getScalar(args[0], 0)
+	if err != nil {
+		return nil, err
+	}
+	afe := func(tss []*timeseries) []*timeseries {
+		// Calculate medians for each point across tss.
+		medians := make([]float64, len(ks))
+		h := histogram.GetFast()
+		for n := range ks {
+			h.Reset()
+			for j := range tss {
+				v := tss[j].Values[n]
+				if !math.IsNaN(v) {
+					h.Update(v)
+				}
+			}
+			medians[n] = h.Quantile(0.5)
+		}
+		histogram.PutFast(h)
+
+		// Return topK time series with the highest variance from median.
+		f := func(values []float64) float64 {
+			sum2 := float64(0)
+			for n, v := range values {
+				d := v - medians[n]
+				sum2 += d * d
+			}
+			return sum2
+		}
+		return getRangeTopKTimeseries(tss, ks, f, false)
+	}
+	return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
+}
+
 func aggrFuncLimitK(afa *aggrFuncArg) ([]*timeseries, error) {
 	args := afa.args
 	if err := expectTransformArgsNum(args, 2); err != nil {
@@ -618,7 +671,7 @@ func aggrFuncLimitK(afa *aggrFuncArg) ([]*timeseries, error) {
 		}
 		return tss
 	}
-	return aggrFuncExt(afe, args[1], &afa.ae.Modifier, true)
+	return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
 }

 func aggrFuncQuantile(afa *aggrFuncArg) ([]*timeseries, error) {
@@ -631,7 +684,7 @@ func aggrFuncQuantile(afa *aggrFuncArg) ([]*timeseries, error) {
 		return nil, err
 	}
 	afe := newAggrQuantileFunc(phis)
-	return aggrFuncExt(afe, args[1], &afa.ae.Modifier, false)
+	return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, false)
 }

 func aggrFuncMedian(afa *aggrFuncArg) ([]*timeseries, error) {
@@ -641,30 +694,24 @@ func aggrFuncMedian(afa *aggrFuncArg) ([]*timeseries, error) {
 	}
 	phis := evalNumber(afa.ec, 0.5)[0].Values
 	afe := newAggrQuantileFunc(phis)
-	return aggrFuncExt(afe, args[0], &afa.ae.Modifier, false)
+	return aggrFuncExt(afe, args[0], &afa.ae.Modifier, afa.ae.Limit, false)
 }

 func newAggrQuantileFunc(phis []float64) func(tss []*timeseries) []*timeseries {
 	return func(tss []*timeseries) []*timeseries {
 		dst := tss[0]
+		h := histogram.GetFast()
+		defer histogram.PutFast(h)
 		for n := range dst.Values {
-			sort.Slice(tss, func(i, j int) bool {
-				a := tss[i].Values[n]
-				b := tss[j].Values[n]
-				return lessWithNaNs(a, b)
-			})
+			h.Reset()
+			for j := range tss {
+				v := tss[j].Values[n]
+				if !math.IsNaN(v) {
+					h.Update(v)
+				}
+			}
 			phi := phis[n]
-			if math.IsNaN(phi) {
-				phi = 1
-			}
-			if phi < 0 {
-				phi = 0
-			}
-			if phi > 1 {
-				phi = 1
-			}
-			idx := int(math.Round(float64(len(tss)-1) * phi))
-			dst.Values[n] = tss[idx].Values[n]
+			dst.Values[n] = h.Quantile(phi)
 		}
 		tss[0] = dst
 		return tss[:1]
--- a/app/vmselect/promql/aggr_incremental.go
+++ b/app/vmselect/promql/aggr_incremental.go
@@ -5,7 +5,7 @@ import (
 	"strings"
 	"sync"

-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
+	"github.com/VictoriaMetrics/metricsql"
 )

 // callbacks for optimized incremental calculations for aggregate functions
@@ -48,6 +48,11 @@ var incrementalAggrFuncCallbacksMap = map[string]*incrementalAggrFuncCallbacks{
 		mergeAggrFunc:    mergeAggrGeomean,
 		finalizeAggrFunc: finalizeAggrGeomean,
 	},
+	"any": {
+		updateAggrFunc:   updateAggrAny,
+		mergeAggrFunc:    mergeAggrAny,
+		finalizeAggrFunc: finalizeAggrCommon,
+	},
 }

 type incrementalAggrFuncContext struct {
@@ -81,6 +86,10 @@ func (iafc *incrementalAggrFuncContext) updateTimeseries(ts *timeseries, workerI
 	bb.B = marshalMetricNameSorted(bb.B[:0], &ts.MetricName)
 	iac := m[string(bb.B)]
 	if iac == nil {
+		if iafc.ae.Limit > 0 && len(m) >= iafc.ae.Limit {
+			// Skip this time series, since the limit on the number of output time series has been already reached.
+			return
+		}
 		tsAggr := &timeseries{
 			Values:     make([]float64, len(ts.Values)),
 			Timestamps: ts.Timestamps,
@@ -106,6 +115,10 @@ func (iafc *incrementalAggrFuncContext) finalizeTimeseries() []*timeseries {
 		for k, iac := range m {
 			iacGlobal := mGlobal[k]
 			if iacGlobal == nil {
+				if iafc.ae.Limit > 0 && len(mGlobal) >= iafc.ae.Limit {
+					// Skip this time series, since the limit on the number of output time series has been already reached.
+					continue
+				}
 				mGlobal[k] = iac
 				continue
 			}
@@ -450,3 +463,25 @@ func finalizeAggrGeomean(iac *incrementalAggrContext) {
 		dstValues[i] = math.Pow(dstValues[i], 1/v)
 	}
 }
+
+func updateAggrAny(iac *incrementalAggrContext, values []float64) {
+	dstCounts := iac.values
+	if dstCounts[0] > 0 {
+		return
+	}
+	for i := range values {
+		dstCounts[i] = 1
+	}
+	iac.ts.Values = append(iac.ts.Values[:0], values...)
+}
+
+func mergeAggrAny(dst, src *incrementalAggrContext) {
+	srcValues := src.ts.Values
+	srcCounts := src.values
+	dstCounts := dst.values
+	if dstCounts[0] > 0 {
+		return
+	}
+	dstCounts[0] = srcCounts[0]
+	dst.ts.Values = append(dst.ts.Values[:0], srcValues...)
+}
--- a/app/vmselect/promql/aggr_incremental_test.go
+++ b/app/vmselect/promql/aggr_incremental_test.go
@@ -8,7 +8,7 @@ import (
 	"sync"
 	"testing"

-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
+	"github.com/VictoriaMetrics/metricsql"
 )

 func TestIncrementalAggr(t *testing.T) {
--- a/app/vmselect/promql/binary_op.go
+++ b/app/vmselect/promql/binary_op.go
@@ -6,9 +6,9 @@ import (
 	"strings"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql/binaryop"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/metricsql"
+	"github.com/VictoriaMetrics/metricsql/binaryop"
 )

 var binaryOpFuncs = map[string]binaryOpFunc{
@@ -206,7 +206,7 @@ func groupJoin(singleTimeseriesSide string, be *metricsql.BinaryOpExpr, rvsLeft,
 		resetMetricGroupIfRequired(be, tsLeft)
 		if len(tssRight) == 1 {
 			// Easy case - right part contains only a single matching time series.
-			tsLeft.MetricName.AddMissingTags(joinTags, &tssRight[0].MetricName)
+			tsLeft.MetricName.SetTags(joinTags, &tssRight[0].MetricName)
 			rvsLeft = append(rvsLeft, tsLeft)
 			rvsRight = append(rvsRight, tssRight[0])
 			continue
@@ -225,7 +225,7 @@ func groupJoin(singleTimeseriesSide string, be *metricsql.BinaryOpExpr, rvsLeft,
 		for _, tsRight := range tssRight {
 			var tsCopy timeseries
 			tsCopy.CopyFromShallowTimestamps(tsLeft)
-			tsCopy.MetricName.AddMissingTags(joinTags, &tsRight.MetricName)
+			tsCopy.MetricName.SetTags(joinTags, &tsRight.MetricName)
 			bb.B = marshalMetricTagsSorted(bb.B[:0], &tsCopy.MetricName)
 			if tsExisting := m[string(bb.B)]; tsExisting != nil {
 				// Try merging tsExisting with tsRight if they don't overlap.
@@ -310,9 +310,22 @@ func binaryOpOr(bfa *binaryOpFuncArg) ([]*timeseries, error) {
 	for _, tss := range mLeft {
 		rvs = append(rvs, tss...)
 	}
-	for k, tss := range mRight {
-		if mLeft[k] == nil {
-			rvs = append(rvs, tss...)
+	for k, tssRight := range mRight {
+		tssLeft := mLeft[k]
+		if tssLeft == nil {
+			rvs = append(rvs, tssRight...)
+			continue
+		}
+		// Fill gaps in tssLeft with values from tssRight as Prometheus does.
+		// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/552
+		valuesRight := tssRight[0].Values
+		for _, tsLeft := range tssLeft {
+			valuesLeft := tsLeft.Values
+			for i, v := range valuesLeft {
+				if math.IsNaN(v) {
+					valuesLeft[i] = valuesRight[i]
+				}
+			}
 		}
 	}
 	return rvs, nil
--- a/app/vmselect/promql/eval.go
+++ b/app/vmselect/promql/eval.go
@@ -11,9 +11,9 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
+	"github.com/VictoriaMetrics/metricsql"
 )

 var (
@@ -663,13 +663,21 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
 	pointsPerTimeseries := 1 + (ec.End-ec.Start)/ec.Step
 	timeseriesLen := rssLen
 	if iafc != nil {
-		// Incremental aggregates require hold only GOMAXPROCS timeseries in memory.
+		// Incremental aggregates require holding only GOMAXPROCS timeseries in memory.
 		timeseriesLen = runtime.GOMAXPROCS(-1)
 		if iafc.ae.Modifier.Op != "" {
-			// Increase the number of timeseries for non-empty group list: `aggr() by (something)`,
-			// since each group can have own set of time series in memory.
-			// Estimate the number of such groups is lower than 1000 :)
-			timeseriesLen *= 1000
+			if iafc.ae.Limit > 0 {
+				// There is an explicit limit on the number of output time series.
+				timeseriesLen *= iafc.ae.Limit
+			} else {
+				// Increase the number of timeseries for non-empty group list: `aggr() by (something)`,
+				// since each group can have own set of time series in memory.
+				timeseriesLen *= 1000
+			}
+		}
+		// The maximum number of output time series is limited by rssLen.
+		if timeseriesLen > rssLen {
+			timeseriesLen = rssLen
 		}
 	}
 	rollupPoints := mulNoOverflow(pointsPerTimeseries, int64(timeseriesLen*len(rcs)))
@@ -680,7 +688,7 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
 		return nil, fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+
 			"possible solutions are: reducing the number of matching time series; switching to node with more RAM; "+
 			"increasing -memory.allowedPercent; increasing `step` query arg (%gs)",
-			rollupPoints, rssLen*len(rcs), pointsPerTimeseries, float64(ec.Step)/1e3)
+			rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, float64(ec.Step)/1e3)
 	}
 	defer rml.Put(uint64(rollupMemorySize))

--- a/app/vmselect/promql/exec.go
+++ b/app/vmselect/promql/exec.go
@@ -11,8 +11,8 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/metrics"
+	"github.com/VictoriaMetrics/metricsql"
 )

 var logSlowQueryDuration = flag.Duration("search.logSlowQueryDuration", 5*time.Second, "Log queries with execution time exceeding this value. Zero disables slow query logging")
--- a/app/vmselect/promql/exec_test.go
+++ b/app/vmselect/promql/exec_test.go
@@ -1480,7 +1480,7 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
-	t.Run(`label_replace(mismatch)`, func(t *testing.T) {
+	t.Run(`label_replace(nonexisting_src)`, func(t *testing.T) {
 		t.Parallel()
 		q := `label_replace(time(), "__name__", "x${1}y", "foo", ".+")`
 		r := netstorage.Result{
@@ -1491,6 +1491,21 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`label_replace(mismatch)`, func(t *testing.T) {
+		t.Parallel()
+		q := `label_replace(label_set(time(), "foo", "foobar"), "__name__", "x${1}y", "foo", "bar(.+)")`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1000, 1200, 1400, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("foobar"),
+		}}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`label_replace(match)`, func(t *testing.T) {
 		t.Parallel()
 		q := `label_replace(time(), "__name__", "x${1}y", "foo", ".*")`
@@ -1849,6 +1864,17 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`scalar or scalar`, func(t *testing.T) {
+		t.Parallel()
+		q := `time() > 1400 or 123`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{123, 123, 123, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`timseries-with-tags unless 2`, func(t *testing.T) {
 		t.Parallel()
 		q := `label_set(time(), "foo", "bar") unless 2`
@@ -1988,25 +2014,37 @@ func TestExecSuccess(t *testing.T) {
 	})
 	t.Run(`scalar * ignoring(foo) group_right vector`, func(t *testing.T) {
 		t.Parallel()
-		q := `sort_desc(2 * ignoring(foo) group_right(a,foo) (label_set(time(), "foo", "bar") or label_set(10, "foo", "qwert")))`
+		q := `sort_desc(label_set(2, "a", "2") * ignoring(foo,a) group_right(a) (label_set(time(), "foo", "bar", "a", "1"), label_set(10, "foo", "qwert")))`
 		r1 := netstorage.Result{
 			MetricName: metricNameExpected,
 			Values:     []float64{2000, 2400, 2800, 3200, 3600, 4000},
 			Timestamps: timestampsExpected,
 		}
-		r1.MetricName.Tags = []storage.Tag{{
-			Key:   []byte("foo"),
-			Value: []byte("bar"),
-		}}
+		r1.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("a"),
+				Value: []byte("2"),
+			},
+			{
+				Key:   []byte("foo"),
+				Value: []byte("bar"),
+			},
+		}
 		r2 := netstorage.Result{
 			MetricName: metricNameExpected,
 			Values:     []float64{20, 20, 20, 20, 20, 20},
 			Timestamps: timestampsExpected,
 		}
-		r2.MetricName.Tags = []storage.Tag{{
-			Key:   []byte("foo"),
-			Value: []byte("qwert"),
-		}}
+		r2.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("a"),
+				Value: []byte("2"),
+			},
+			{
+				Key:   []byte("foo"),
+				Value: []byte("qwert"),
+			},
+		}
 		resultExpected := []netstorage.Result{r1, r2}
 		f(q, resultExpected)
 	})
@@ -2321,9 +2359,9 @@ func TestExecSuccess(t *testing.T) {
 	t.Run(`vector + vector on group_left matching`, func(t *testing.T) {
 		t.Parallel()
 		q := `sort_desc(
-			(label_set(time(), "t1", "v123", "t2", "v3") or label_set(10, "t2", "v3", "xxx", "yy"))
+			(label_set(time(), "t1", "v123", "t2", "v3"), label_set(10, "t2", "v3", "xxx", "yy"))
 			+ on (foo, t2) group_left (t1, noxxx)
-			(label_set(100, "t1", "v1") or label_set(time(), "t2", "v3", "noxxx", "aa"))
+			(label_set(100, "t1", "v1"), label_set(time(), "t2", "v3", "noxxx", "aa"))
 		)`
 		r1 := netstorage.Result{
 			MetricName: metricNameExpected,
@@ -2335,10 +2373,6 @@ func TestExecSuccess(t *testing.T) {
 				Key:   []byte("noxxx"),
 				Value: []byte("aa"),
 			},
-			{
-				Key:   []byte("t1"),
-				Value: []byte("v123"),
-			},
 			{
 				Key:   []byte("t2"),
 				Value: []byte("v3"),
@@ -3496,6 +3530,21 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r1, r2}
 		f(q, resultExpected)
 	})
+	t.Run(`sum(multi-vector) by (known-tag) limit 1`, func(t *testing.T) {
+		t.Parallel()
+		q := `sum(label_set(10, "foo", "bar") or label_set(time()/100, "baz", "sss")) by (foo) limit 1`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{10, 10, 10, 10, 10, 10},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("bar"),
+		}}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`sum(multi-vector) by (known-tags)`, func(t *testing.T) {
 		t.Parallel()
 		q := `sum(label_set(10, "foo", "bar", "baz", "sss", "x", "y") or label_set(time()/100, "baz", "sss", "foo", "bar")) by (foo, baz, foo)`
@@ -3562,7 +3611,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `sort(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s]))`
 		r1 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{14, 15, 12, 13, 15, 11},
+			Values:     []float64{14, 16, 12, 13, 15, 11},
 			Timestamps: timestampsExpected,
 		}
 		r1.MetricName.Tags = []storage.Tag{
@@ -3592,7 +3641,7 @@ func TestExecSuccess(t *testing.T) {
 		}
 		r3 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{13, 11, 16, 19, 13, 16},
+			Values:     []float64{13, 10, 16, 19, 13, 16},
 			Timestamps: timestampsExpected,
 		}
 		r3.MetricName.Tags = []storage.Tag{
@@ -3613,7 +3662,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `sort(sum(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s])) by (vmrange))`
 		r1 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{14, 15, 12, 13, 15, 11},
+			Values:     []float64{14, 16, 12, 13, 15, 11},
 			Timestamps: timestampsExpected,
 		}
 		r1.MetricName.Tags = []storage.Tag{
@@ -3635,7 +3684,7 @@ func TestExecSuccess(t *testing.T) {
 		}
 		r3 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{13, 11, 16, 19, 13, 16},
+			Values:     []float64{13, 10, 16, 19, 13, 16},
 			Timestamps: timestampsExpected,
 		}
 		r3.MetricName.Tags = []storage.Tag{
@@ -3663,7 +3712,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `topk_max(1, histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s]))`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{13, 11, 16, 19, 13, 16},
+			Values:     []float64{13, 10, 16, 19, 13, 16},
 			Timestamps: timestampsExpected,
 		}
 		r.MetricName.Tags = []storage.Tag{
@@ -3768,6 +3817,17 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r1, r2}
 		f(q, resultExpected)
 	})
+	t.Run(`any()`, func(t *testing.T) {
+		t.Parallel()
+		q := `any(label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{10, 10, 10, 10, 10, 10},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`topk(-1)`, func(t *testing.T) {
 		t.Parallel()
 		q := `sort(topk(-1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
@@ -4170,14 +4230,63 @@ func TestExecSuccess(t *testing.T) {
 	t.Run(`quantile(NaN)`, func(t *testing.T) {
 		t.Parallel()
 		q := `quantile(NaN, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
+		resultExpected := []netstorage.Result{}
+		f(q, resultExpected)
+	})
+	t.Run(`outliersk(0)`, func(t *testing.T) {
+		t.Parallel()
+		q := `outliersk(0, (
+			label_set(1300, "foo", "bar"),
+			label_set(time(), "baz", "sss"),
+		))`
+		resultExpected := []netstorage.Result{}
+		f(q, resultExpected)
+	})
+	t.Run(`outliersk(1)`, func(t *testing.T) {
+		t.Parallel()
+		q := `outliersk(1, (
+			label_set(2000, "foo", "bar"),
+			label_set(time(), "baz", "sss"),
+		))`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{10, 10, 10, 10.666666666666666, 12, 13.333333333333334},
+			Values:     []float64{1000, 1200, 1400, 1600, 1800, 2000},
 			Timestamps: timestampsExpected,
 		}
+		r.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("baz"),
+			Value: []byte("sss"),
+		}}
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`outliersk(3)`, func(t *testing.T) {
+		t.Parallel()
+		q := `sort_desc(outliersk(3, (
+			label_set(1300, "foo", "bar"),
+			label_set(time(), "baz", "sss"),
+		)))`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1000, 1200, 1400, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("baz"),
+			Value: []byte("sss"),
+		}}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1300, 1300, 1300, 1300, 1300, 1300},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("bar"),
+		}}
+		resultExpected := []netstorage.Result{r1, r2}
+		f(q, resultExpected)
+	})
 	t.Run(`range_quantile(0.5)`, func(t *testing.T) {
 		t.Parallel()
 		q := `range_quantile(0.5, time())`
@@ -5505,6 +5614,8 @@ func TestExecError(t *testing.T) {
 	f(`hoeffding_bound_upper()`)
 	f(`hoeffding_bound_upper(1)`)
 	f(`hoeffding_bound_upper(0.99, foo, 1)`)
+	f(`outliersk()`)
+	f(`outliersk(1)`)

 	// Invalid argument type
 	f(`median_over_time({}, 2)`)
@@ -5544,6 +5655,7 @@ func TestExecError(t *testing.T) {
 	f(`alias(1, 2)`)
 	f(`aggr_over_time(1, 2)`)
 	f(`aggr_over_time(("foo", "bar"), 3)`)
+	f(`outliersk((label_set(1, "foo", "bar"), label_set(2, "x", "y")), 123)`)

 	// Duplicate timeseries
 	f(`(label_set(1, "foo", "bar") or label_set(2, "foo", "baz"))
--- a/app/vmselect/promql/parser.go
+++ b/app/vmselect/promql/parser.go
@@ -3,8 +3,8 @@ package promql
 import (
 	"fmt"

-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/metricsql"
 )

 // IsRollup verifies whether s is a rollup with non-empty window.
--- a/app/vmselect/promql/rollup.go
+++ b/app/vmselect/promql/rollup.go
@@ -9,9 +9,9 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/metrics"
+	"github.com/VictoriaMetrics/metricsql"
 	"github.com/valyala/histogram"
 )

@@ -72,6 +72,8 @@ var rollupFuncs = map[string]newRollupFunc{
 	"aggr_over_time":        newRollupFuncTwoArgs(rollupFake),
 	"hoeffding_bound_upper": newRollupHoeffdingBoundUpper,
 	"hoeffding_bound_lower": newRollupHoeffdingBoundLower,
+	"ascent_over_time":      newRollupFuncOneArg(rollupAscentOverTime),
+	"descent_over_time":     newRollupFuncOneArg(rollupDescentOverTime),

 	// `timestamp` function must return timestamp for the last datapoint on the current window
 	// in order to properly handle offset and timestamps unaligned to the current step.
@@ -116,6 +118,9 @@ var rollupAggrFuncs = map[string]rollupFunc{
 	"scrape_interval":     rollupScrapeInterval,
 	"tmin_over_time":      rollupTmin,
 	"tmax_over_time":      rollupTmax,
+	"ascent_over_time":    rollupAscentOverTime,
+	"descent_over_time":   rollupDescentOverTime,
+	"timestamp":           rollupTimestamp,
 }

 var rollupFuncsCannotAdjustWindow = map[string]bool{
@@ -138,6 +143,8 @@ var rollupFuncsCannotAdjustWindow = map[string]bool{
 	"increases_over_time": true,
 	"decreases_over_time": true,
 	"integrate":           true,
+	"ascent_over_time":    true,
+	"descent_over_time":   true,
 }

 var rollupFuncsRemoveCounterResets = map[string]bool{
@@ -1527,6 +1534,52 @@ func rollupTimestamp(rfa *rollupFuncArg) float64 {
 	return float64(timestamps[len(timestamps)-1]) / 1e3
 }

+func rollupAscentOverTime(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
+	// before calling rollup funcs.
+	values := rfa.values
+	prevValue := rfa.prevValue
+	if math.IsNaN(prevValue) {
+		if len(values) == 0 {
+			return nan
+		}
+		prevValue = values[0]
+		values = values[1:]
+	}
+	s := float64(0)
+	for _, v := range values {
+		d := v - prevValue
+		if d > 0 {
+			s += d
+		}
+		prevValue = v
+	}
+	return s
+}
+
+func rollupDescentOverTime(rfa *rollupFuncArg) float64 {
+	// There is no need in handling NaNs here, since they must be cleaned up
+	// before calling rollup funcs.
+	values := rfa.values
+	prevValue := rfa.prevValue
+	if math.IsNaN(prevValue) {
+		if len(values) == 0 {
+			return nan
+		}
+		prevValue = values[0]
+		values = values[1:]
+	}
+	s := float64(0)
+	for _, v := range values {
+		d := prevValue - v
+		if d > 0 {
+			s += d
+		}
+		prevValue = v
+	}
+	return s
+}
+
 func rollupFirst(rfa *rollupFuncArg) float64 {
 	// There is no need in handling NaNs here, since they must be cleaned up
 	// before calling rollup funcs.
--- a/app/vmselect/promql/rollup_result_cache.go
+++ b/app/vmselect/promql/rollup_result_cache.go
@@ -10,12 +10,13 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
 	"github.com/VictoriaMetrics/fastcache"
 	"github.com/VictoriaMetrics/metrics"
+	"github.com/VictoriaMetrics/metricsql"
 )

 var (
@@ -64,18 +65,18 @@ func InitRollupResultCache(cachePath string) {

 	stats := &fastcache.Stats{}
 	var statsLock sync.Mutex
-	var statsLastUpdate time.Time
+	var statsLastUpdate uint64
 	fcs := func() *fastcache.Stats {
 		statsLock.Lock()
 		defer statsLock.Unlock()

-		if time.Since(statsLastUpdate) < time.Second {
+		if fasttime.UnixTimestamp()-statsLastUpdate < 2 {
 			return stats
 		}
 		var fcs fastcache.Stats
 		c.UpdateStats(&fcs)
 		stats = &fcs
-		statsLastUpdate = time.Now()
+		statsLastUpdate = fasttime.UnixTimestamp()
 		return stats
 	}
 	if len(rollupResultCachePath) > 0 {
--- a/app/vmselect/promql/rollup_result_cache_test.go
+++ b/app/vmselect/promql/rollup_result_cache_test.go
@@ -3,8 +3,8 @@ package promql
 import (
 	"testing"

-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/metricsql"
 )

 func TestRollupResultCache(t *testing.T) {
--- a/app/vmselect/promql/rollup_test.go
+++ b/app/vmselect/promql/rollup_test.go
@@ -4,7 +4,7 @@ import (
 	"math"
 	"testing"

-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
+	"github.com/VictoriaMetrics/metricsql"
 )

 var (
@@ -389,6 +389,9 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
 	f("ideriv", 0)
 	f("decreases_over_time", 5)
 	f("increases_over_time", 5)
+	f("ascent_over_time", 142)
+	f("descent_over_time", 231)
+	f("timestamp", 0.13)
 }

 func TestRollupNewRollupFuncError(t *testing.T) {
--- a/app/vmselect/promql/transform.go
+++ b/app/vmselect/promql/transform.go
@@ -12,8 +12,8 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+	"github.com/VictoriaMetrics/metricsql"
 	"github.com/valyala/histogram"
 )

@@ -1219,6 +1219,9 @@ func labelReplace(tss []*timeseries, srcLabel string, r *regexp.Regexp, dstLabel
 		mn := &ts.MetricName
 		dstValue := getDstValue(mn, dstLabel)
 		srcValue := mn.GetTagValue(srcLabel)
+		if !r.Match(srcValue) {
+			continue
+		}
 		b := r.ReplaceAll(srcValue, replacementBytes)
 		*dstValue = append((*dstValue)[:0], b...)
 		if len(b) == 0 {
--- a/app/vmstorage/main.go
+++ b/app/vmstorage/main.go
@@ -409,6 +409,16 @@ func registerStorageMetrics() {
 		return float64(m().AddRowsConcurrencyCurrent)
 	})

+	metrics.NewGauge(`vm_slow_row_inserts_total`, func() float64 {
+		return float64(m().SlowRowInserts)
+	})
+	metrics.NewGauge(`vm_slow_per_day_index_inserts_total`, func() float64 {
+		return float64(m().SlowPerDayIndexInserts)
+	})
+	metrics.NewGauge(`vm_slow_metric_name_loads_total`, func() float64 {
+		return float64(m().SlowMetricNameLoads)
+	})
+
 	metrics.NewGauge(`vm_rows{type="storage/big"}`, func() float64 {
 		return float64(tm().BigRowsCount)
 	})
@@ -452,6 +462,9 @@ func registerStorageMetrics() {
 	metrics.NewGauge(`vm_cache_entries{type="storage/hour_metric_ids"}`, func() float64 {
 		return float64(m().HourMetricIDCacheSize)
 	})
+	metrics.NewGauge(`vm_cache_entries{type="storage/next_day_metric_ids"}`, func() float64 {
+		return float64(m().NextDayMetricIDCacheSize)
+	})
 	metrics.NewGauge(`vm_cache_entries{type="storage/bigIndexBlocks"}`, func() float64 {
 		return float64(tm().BigIndexBlocksCacheSize)
 	})
@@ -492,6 +505,9 @@ func registerStorageMetrics() {
 	metrics.NewGauge(`vm_cache_size_bytes{type="storage/hour_metric_ids"}`, func() float64 {
 		return float64(m().HourMetricIDCacheSizeBytes)
 	})
+	metrics.NewGauge(`vm_cache_size_bytes{type="storage/next_day_metric_ids"}`, func() float64 {
+		return float64(m().NextDayMetricIDCacheSizeBytes)
+	})
 	metrics.NewGauge(`vm_cache_size_bytes{type="indexdb/tagFilters"}`, func() float64 {
 		return float64(idbm().TagCacheSizeBytes)
 	})
--- a/dashboards/victoriametrics.json
+++ b/dashboards/victoriametrics.json
--- a/deployment/docker/Makefile
+++ b/deployment/docker/Makefile
@@ -2,9 +2,9 @@

 DOCKER_NAMESPACE := victoriametrics

-ROOT_IMAGE ?= scratch
-CERTS_IMAGE := alpine:3.11
-GO_BUILDER_IMAGE := golang:1.14.2
+ROOT_IMAGE ?= alpine:3.12
+CERTS_IMAGE := alpine:3.12
+GO_BUILDER_IMAGE := golang:1.14.4
 BUILDER_IMAGE := local/builder:2.0.0-$(shell echo $(GO_BUILDER_IMAGE) | tr : _)
 BASE_IMAGE := local/base:1.1.1-$(shell echo $(ROOT_IMAGE) | tr : _)-$(shell echo $(CERTS_IMAGE) | tr : _)

@@ -32,7 +32,9 @@ app-via-docker: package-base package-builder
 		--env GO111MODULE=on \
 		$(DOCKER_OPTS) \
 		$(BUILDER_IMAGE) \
-		go build $(RACE) -mod=vendor -trimpath -ldflags "-s -w -extldflags '-static' $(GO_BUILDINFO)" -tags 'netgo osusergo' \
+		go build $(RACE) -mod=vendor -trimpath \
+			-ldflags "-extldflags '-static' $(GO_BUILDINFO)" \
+			-tags 'netgo osusergo nethttpomithttp2' \
 			-o bin/$(APP_NAME)$(APP_SUFFIX)-prod $(PKG_PREFIX)/app/$(APP_NAME)

 package-via-docker:
--- a/deployment/docker/docker-compose.yml
+++ b/deployment/docker/docker-compose.yml
@@ -2,7 +2,7 @@ version: '3.5'
 services:
  prometheus:
    container_name: prometheus
-    image: prom/prometheus:v2.17.2
+    image: prom/prometheus:v2.19.1
    depends_on:
      - "victoriametrics"
    ports:
@@ -35,7 +35,7 @@ services:
    restart: always
  grafana:
    container_name: grafana
-    image: grafana/grafana:6.7.2
+    image: grafana/grafana:7.0.3
    entrypoint: >
      /bin/sh -c "
      cd /var/lib/grafana &&
--- a/docs/Articles.md
+++ b/docs/Articles.md
@@ -26,9 +26,15 @@
 * [Billy: how VictoriaMetrics deals with more than 500 billion rows](https://medium.com/@valyala/billy-how-victoriametrics-deals-with-more-than-500-billion-rows-e82ff8f725da)


-## Third-party articles
+## Third-party articles and slides

 * [Better Prometheus rate() function with VictoriaMetrics](https://www.percona.com/blog/2020/02/28/better-prometheus-rate-function-with-victoriametrics/)
+* [Infrastructure monitoring with Prometheus at Zerodha](https://zerodha.tech/blog/infra-monitoring-at-zerodha/)
+* [Sismology: Iguana Solutions’ Monitoring System](https://medium.com/@IG1.com/sismology-iguana-solutions-monitoring-system-f46e4170447f)
+* [Monitoring K8S with VictoriaMetrics](https://docs.google.com/presentation/d/1g7yUyVEaAp4tPuRy-MZbPXKqJ1z78_5VKuV841aQfsg/edit)
+* [CMS monitoring R&D: Real-time monitoring and alerts](https://indico.cern.ch/event/877333/contributions/3696707/attachments/1972189/3281133/CMS_mon_RD_for_opInt.pdf)
 * [Disk usage: VictoriaMetrics vs Prometheus](https://stas.starikevich.com/posts/disk-usage-for-vm-versus-prometheus/)
 * [Benchmarking time series workloads on Apache Kudu using TSBS](https://blog.cloudera.com/benchmarking-time-series-workloads-on-apache-kudu-using-tsbs/)
 * [What are Open Source Time Series Databases?](https://www.iunera.com/kraken/fabric/time-series-database/)
+* [Evaluating performance and correctness](https://www.robustperception.io/evaluating-performance-and-correctness)
+* [Running VictoriaMetrics on Raspberry PI](https://stas.starikevich.com/posts/raspberry-pi-4-prometheus/)
--- a/docs/CaseStudies.md
+++ b/docs/CaseStudies.md
@@ -3,6 +3,9 @@
 Below are approved public case studies and talks from VictoriaMetrics users. Join our [community Slack channel](http://slack.victoriametrics.com/)
 and feel free asking for references, reviews and additional case studies from real VictoriaMetrics users there.

+See also [articles about VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Articles).
+
+
 ## Adidas

 See [slides](https://promcon.io/2019-munich/slides/remote-write-storage-wars.pdf) and [video](https://youtu.be/OsH6gPdxR4s)
@@ -10,6 +13,20 @@ from [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-wri
 VictoriaMetrics is compared to Thanos, Corex and M3DB in the talk.


+## CERN
+
+The European Organization for Nuclear Research known as [CERN](https://home.cern/) uses VictoriaMetrics for real-time monitoring
+of the [CMS](https://home.cern/science/experiments/cms) detector system.
+According to [published talk](https://indico.cern.ch/event/877333/contributions/3696707/attachments/1972189/3281133/CMS_mon_RD_for_opInt.pdf)
+VictoriaMetrics is used for the following purposes as a part of "CMS Monitoring cluster":
+
+* As long-term storage for messages consumed from the [NATS messaging system](https://nats.io/). Consumed messages are pushed directly to VictoriaMetrics via HTTP protocol
+* As long-term storage for Prometheus monitoring system (30 days retention policy, there are plans to increase it up to ½ year)
+* As a data source for visualizing metrics in Grafana.
+
+R&D topic: Evaluate VictoraMetrics vs InfluxDB for large cardinality data.
+
+
 ## COLOPL

 [COLOPL](http://www.colopl.co.jp/en/) is Japaneese Game Development company. It started using VictoriaMetrics
@@ -24,6 +41,28 @@ See [slides](https://speakerdeck.com/inletorder/monitoring-platform-with-victori
 from `Large-scale, super-load system monitoring platform built with VictoriaMetrics` talk at [Prometheus Meetup Tokyo #3](https://prometheus.connpass.com/event/157721/).


+## Zerodha
+
+[Zerodha](https://zerodha.com/) is India's largest stock broker. Monitoring team at Zerodha faced with the following requirements:
+
+* Multiple K8s clusters to monitor
+* Consistent monitoring infra for each cluster across the fleet
+* Ability to handle billions of timeseries events at any point of time
+* Easier to operate and cost effective
+
+Thanos, Cortex and VictoriaMetrics were evaluated as a long-term storage for Prometheus. VictoriaMetrics has been selected due to the following reasons:
+
+* Blazing fast benchmarks for a single node setup.
+* Single binary mode. Easy to scale vertically, very less operational headache.
+* Considerable [improvements on creating Histograms](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350).
+* [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL) gives us the ability to extend PromQL with more aggregation operators.
+* API is compatible with Prometheus, almost all standard PromQL queries just work out of the box.
+* Handles storage well, with periodic compaction. Makes it easy to take snapshots.
+
+See [Monitoring K8S with VictoriaMetrics](https://docs.google.com/presentation/d/1g7yUyVEaAp4tPuRy-MZbPXKqJ1z78_5VKuV841aQfsg/edit) slides,
+[video](https://youtu.be/ZJQYW-cFOms) and [Infrastructure monitoring with Prometheus at Zerodha](https://zerodha.tech/blog/infra-monitoring-at-zerodha/) blog post for more details.
+
+
 ## Wix.com

 [Wix.com](https://en.wikipedia.org/wiki/Wix.com) is the leading web development platform.
@@ -194,7 +233,7 @@ Such a scheme has the following benefits comparing to Prometheus:

 Cons are the following:

- VictoriaMetrics doesn't support replication - we run extra instance of VictoriaMetrics and Promxy in front of VictoriaMetrics pair for high availability.
+- VictoriaMetrics didn't support replication (it [supports replication now](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#replication-and-data-safety)) - we run extra instance of VictoriaMetrics and Promxy in front of VictoriaMetrics pair for high availability.
 - VictoriaMetrics stores 1 extra month for defined retention (if retention is set to N months, then VM stores N+1 months of data), but this is still better than other solutions.

 Some numbers from our single-node VictoriaMetrics setup:
--- a/docs/Cluster-VictoriaMetrics.md
+++ b/docs/Cluster-VictoriaMetrics.md
@@ -1,7 +1,7 @@
-<img alt="Victoria Metrics" src="logo.png">
-
 # Cluster version

+<img alt="Victoria Metrics" src="logo.png">
+
 VictoriaMetrics is fast, cost-effective and scalable time series database. It can be used as a long-term remote storage for Prometheus.

 It is recommended using [single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics) instead of cluster version
@@ -16,8 +16,9 @@ Join [our Slack](http://slack.victoriametrics.com/) or [contact us](mailto:info@
 ## Prominent features

 - Supports all the features of [single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics).
- Performance and capacity scales horizontally.
- Supports multiple independent namespaces for time series data (aka multi-tenancy).
+- Performance and capacity scales horizontally. See [these docs for details](#cluster-resizing-and-scalability).
+- Supports multiple independent namespaces for time series data (aka multi-tenancy). See [these docs for details](#multitenancy).
+- Supports replication. See [these docs for details](#replication-and-data-safety).


 ## Architecture overview
@@ -36,6 +37,26 @@ It increases cluster availability, simplifies cluster maintenance and cluster sc
 <img src="https://docs.google.com/drawings/d/e/2PACX-1vTvk2raU9kFgZ84oF-OKolrGwHaePhHRsZEcfQ1I_EC5AB_XPWwB392XshxPramLJ8E4bqptTnFn5LL/pub?w=1104&amp;h=746">


+## Multitenancy
+
+VictoriaMetrics cluster supports multiple isolated tenants (aka namespaces).
+Tenants are identified by `accountID` or `accountID:projectID`, which are put inside request urls.
+See [these docs](#url-format) for details. Some facts about tenants in VictoriaMetrics:
+
+* Each `accountID` and `projectID` is identified by an arbitrary 32-bit integer in the range `[0 .. 2^32)`.
+If `projectID` is missing, then it is automatically assigned to `0`. It is expected that other information about tenants
+such as auth tokens, tenant names, limits, accounting, etc. is stored in a separate relational database. This database must be managed
+by a separate service sitting in front of VictoriaMetrics cluster such as [vmauth](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/README.md).
+[Contact us](mailto:info@victoriametrics.com) if you need help with creating such a service.
+
+* Tenants are automatically created when the first data point is written into the given tenant.
+
+* Data for all the tenants is evenly spread among available `vmstorage` nodes. This guarantees even load among `vmstorage` nodes
+when different tenants have different amounts of data and different query load.
+
+* VictoriaMetrics doesn't support querying multiple tenants in a single request.
+
+
 ## Binaries

 Compiled binaries for cluster version are available in the `assets` section of [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
@@ -91,11 +112,11 @@ Run `make package`. It will build the following docker images locally:
 `<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
 The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package`.

-By default images are built on top of `scratch` image. It is possible to build on top of any other base image
-by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds images on top of `alpine:3.11` image:
+By default images are built on top of `alpine` image in order to improve debuggability. It is possible to build an image on top of any other base image
+by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds images on top of `scratch` image:

 ```bash
-ROOT_IMAGE=alpine:3.11 make package
+ROOT_IMAGE=scratch make package
 ```

 ## Operation
@@ -111,7 +132,7 @@ A minimal cluster must contain the following nodes:
 It is recommended to run at least two nodes for each service
 for high availability purposes.

-An http load balancer must be put in front of `vminsert` and `vmselect` nodes:
+An http load balancer such as `nginx` must be put in front of `vminsert` and `vmselect` nodes:
 - requests starting with `/insert` must be routed to port `8480` on `vminsert` nodes.
 - requests starting with `/select` must be routed to port `8481` on `vmselect` nodes.

@@ -137,7 +158,8 @@ By default the following TCP ports are used:
 - `vmselect` - 8481
 - `vmstorage` - 8482

-It is recommended setting up Prometheus to scrape `/metrics` pages from all the cluster components, so they can be monitored and analyzed
+It is recommended setting up [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md)
+or Prometheus to scrape `/metrics` pages from all the cluster components, so they can be monitored and analyzed
 with [the official Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176)
 or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11831).

@@ -145,23 +167,28 @@ or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/gr
 ### URL format

 * URLs for data ingestion: `http://<vminsert>:8480/insert/<accountID>/<suffix>`, where:
-  - `<accountID>` is an arbitrary number identifying namespace for data ingestion (aka tenant)
+  - `<accountID>` is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant). It is possible to set it as `accountID:projectID`,
+    where `projectID` is also arbitrary 32-bit integer. If `projectID` isn't set, then it equals to `0`.
  - `<suffix>` may have the following values:
-     - `prometheus` - for inserting data with [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
-     - `influx/write` or `influx/api/v2/write` - for inserting data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
+     - `prometheus` and `prometheus/api/v1/write` - for inserting data with [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
+     - `influx/write` and `influx/api/v2/write` - for inserting data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/).
     - `opentsdb/api/put` - for accepting [OpenTSDB HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html).
+       This handler is disabled by default. It is exposed on a distinct TCP address set via `-opentsdbHTTPListenAddr` command-line flag.
+       See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#sending-opentsdb-data-via-http-apiput-requests) for details.
     - `prometheus/api/v1/import` - for importing data obtained via `api/v1/export` on `vmselect` (see below).
+     - `prometheus/api/v1/import/csv` - for importing arbitrary CSV data. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-import-csv-data) for details.

 * URLs for querying: `http://<vmselect>:8481/select/<accountID>/prometheus/<suffix>`, where:
  - `<accountID>` is an arbitrary number identifying data namespace for the query (aka tenant)
  - `<suffix>` may have the following values:
-    - `api/v1/query` - performs [PromQL instant query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries)
-    - `api/v1/query_range` - performs [PromQL range query](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries)
-    - `api/v1/series` - performs [series query](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)
-    - `api/v1/labels` - returns a [list of label names](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names)
-    - `api/v1/label/<label_name>/values` - returns values for the given `<label_name>` according [to API](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values)
-    - `federate` - returns [federated metrics](https://prometheus.io/docs/prometheus/latest/federation/)
-    - `api/v1/export` - exports raw data. See [this article](https://medium.com/@valyala/analyzing-prometheus-data-with-external-tools-5f3e5e147639) for details
+    - `api/v1/query` - performs [PromQL instant query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries).
+    - `api/v1/query_range` - performs [PromQL range query](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries).
+    - `api/v1/series` - performs [series query](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers).
+    - `api/v1/labels` - returns a [list of label names](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names).
+    - `api/v1/label/<label_name>/values` - returns values for the given `<label_name>` according [to API](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values).
+    - `federate` - returns [federated metrics](https://prometheus.io/docs/prometheus/latest/federation/).
+    - `api/v1/export` - exports raw data. See [this article](https://medium.com/@valyala/analyzing-prometheus-data-with-external-tools-5f3e5e147639) for details.
+    - `api/v1/status/tsdb` - for time series stats. See [these docs](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats) for details.

 * URL for time series deletion: `http://<vmselect>:8481/delete/<accountID>/prometheus/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`.
  Note that the `delete_series` handler should be used only in exceptional cases such as deletion of accidentally ingested incorrect time series. It shouldn't
@@ -179,7 +206,7 @@ or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/gr
  across `vmstorage` nodes.


-### Cluster resizing and scalability.
+### Cluster resizing and scalability

 Cluster performance and capacity scales with adding new nodes.

@@ -198,15 +225,6 @@ Steps to add `vmstorage` node:
 3. Gradually restart all the `vminsert` nodes with new `-storageNode` arg containing `<new_vmstorage_host>:8400`.


-### Cluster availability
-
-* HTTP load balancer must stop routing requests to unavailable `vminsert` and `vmselect` nodes.
-* The cluster remains available if at least a single `vmstorage` node exists:
-
-  - `vminsert` re-routes incoming data from unavailable `vmstorage` nodes to healthy `vmstorage` nodes
-  - `vmselect` continues serving partial responses if at least a single `vmstorage` node is available.
-
-
 ### Updating / reconfiguring cluster nodes

 All the node types - `vminsert`, `vmselect` and `vmstorage` - may be updated via graceful shutdown.
@@ -217,6 +235,17 @@ Cluster should remain in working state if at least a single node of each type re
 the update process. See [cluster availability](#cluster-availability) section for details.


+### Cluster availability
+
+* HTTP load balancer must stop routing requests to unavailable `vminsert` and `vmselect` nodes.
+* The cluster remains available if at least a single `vmstorage` node exists:
+
+  - `vminsert` re-routes incoming data from unavailable `vmstorage` nodes to healthy `vmstorage` nodes
+  - `vmselect` continues serving partial responses if at least a single `vmstorage` node is available.
+
+Data replication can be used for increasing storage durability. See [these docs](#replication-and-data-safety) for details.
+
+
 ### Capacity planning

 Each instance type - `vminsert`, `vmselect` and `vmstorage` - can run on the most suitable hardware.
@@ -226,6 +255,8 @@ Each instance type - `vminsert`, `vmselect` and `vmstorage` - can run on the mos
 * The recommended total number of vCPU cores for all the `vminsert` instances can be calculated from the ingestion rate: `vCPUs = ingestion_rate / 150K`.
 * The recommended number of vCPU cores per each `vminsert` instance should equal to the number of `vmstorage` instances in the cluster.
 * The amount of RAM per each `vminsert` instance should be 1GB or more. RAM is used as a buffer for spikes in ingestion rate.
+  The maximum amount of used RAM per `vminsert` node can be tuned with `-memory.allowedPercent` command-line flag. For instance, `-memory.allowedPercent=20`
+  limits the maximum amount of used RAM to 20% of the available RAM on the host system.
 * Sometimes `-rpc.disableCompression` command-line flag on `vminsert` instances could increase ingestion capacity at the cost
  of higher network bandwidth usage between `vminsert` and `vmstorage`.

@@ -243,6 +274,21 @@ The recommended hardware for `vmselect` instances highly depends on the type of
 small number of vCPU cores and small amount of RAM on `vmselect`, while heavy queries over big number of time series (>10K) usually require
 bigger number of vCPU cores and bigger amounts of RAM.

+In general it is recommended increasing the number of vCPU cores and RAM per `vmselect` node for higher query performance,
+while adding new `vmselect` nodes only when old nodes are overloaded with incoming query stream.
+
+
+### High availability
+
+It is recommended to run all the components for a single cluster in the same subnetwork with high bandwidth, low latency and low error rates.
+This improves cluster performance and availability.
+It isn't recommended spreading components for a single cluster across multiple availability zones, since cross-AZ network usually has lower bandwidth, higher latency
+and higher error rates comparing the network inside AZ.
+
+If you need multi-AZ setup, then it is recommended running independed clusters in each AZ and setting up
+[vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) in front of these clusters, so it could replicate incoming data
+into all the cluster. Then [promxy](https://github.com/jacksontj/promxy) could be used for querying the data from multiple clusters.
+

 ### Helm

@@ -254,7 +300,18 @@ Upgrade follows `Cluster resizing procedure` under the hood.

 ### Replication and data safety

-VictoriaMetrics offloads replication to the underlying storage pointed by `-storageDataPath`.
+In order to enable application-level replication, `-replicationFactor=N` command-line flag must be passed to `vminsert`.
+This guarantees that all the data remains available for querying if up to `N-1` `vmstorage` nodes are unavailable.
+For example, when `-replicationFactor=3` is passed to `vminsert`, then it replicates all the ingested data to 3 distinct `vmstorage` nodes.
+
+When the replication is enabled, `-dedup.minScrapeInterval=1ms` command-line flag must be passed to `vmselect`
+in order to de-duplicate replicated data during queries. It is OK if `-dedup.minScrapeInterval` exceeds 1ms
+when [deduplication](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#deduplication) is used additionally to replication.
+
+Note that [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883),
+so it is recommended performing regular backups. See [these docs](#backups) for details.
+
+By default VictoriaMetrics offloads replication to the underlying storage pointed by `-storageDataPath`.
 It is recommended storing data on [Google Compute Engine persistent disks](https://cloud.google.com/compute/docs/disks/#pdspecs),
 since they are protected from data loss and data corruption. They also provide consistently high performance
 and [may be resized](https://cloud.google.com/compute/docs/disks/add-persistent-disk) without downtime.
@@ -262,8 +319,6 @@ HDD-based persistent disks should be enough for the majority of use cases.

 It is recommended using durable replicated persistent volumes in Kubernetes.

-Note that [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883).
-

 ### Backups

@@ -303,8 +358,7 @@ Due to `KISS` cluster version of VictoriaMetrics has no the following "features"

 - Fragile gossip protocols. See [failed attempt in Thanos](https://github.com/improbable-eng/thanos/blob/030bc345c12c446962225221795f4973848caab5/docs/proposals/completed/201809_gossip-removal.md).
 - Hard-to-understand-and-implement-properly [Paxos protocols](https://www.quora.com/In-distributed-systems-what-is-a-simple-explanation-of-the-Paxos-algorithm).
- Complex replication schemes, which may go nuts in unforesseen edge cases. The replication is offloaded to the underlying durable replicated storage
-  such as [persistent disks in Google Compute Engine](https://cloud.google.com/compute/docs/disks/#pdspecs).
+- Complex replication schemes, which may go nuts in unforesseen edge cases. See [replication docs](#replication-and-data-safety) for details.
 - Automatic data reshuffling between storage nodes, which may hurt cluster performance and availability.
 - Automatic cluster resizing, which may cost you a lot of money if improperly configured.
 - Automatic discovering and addition of new nodes in the cluster, which may mix data between dev and prod clusters :)
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -2,64 +2,64 @@

 ### What is the main purpose of VictoriaMetrics?

-To provide the best long-term [remote storage](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage) solution for [Prometheus](https://prometheus.io/).
+To provide the best monitoring solution.
+
+
+### Who uses VictoriaMetrics?
+
+See [case studies](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies).


 ### Which features does VictoriaMetrics have?

-* Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
-  Additionally, VictoriaMetrics extends PromQL with opt-in [useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL).
-* High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
-  and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
-  [Outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
-* [Uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) when working with millions of unique time series (aka high cardinality).
-* High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
-  may be crammed into a limited storage comparing to TimescaleDB.
-* Optimized for storage with high-latency IO and low iops (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
-* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, M3DB, Cortex, InfluxDB or TimescaleDB.
-  See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae)
-  and [comparing Thanos to VictoriaMetrics](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
-* Easy operation:
-  * VictoriaMetrics consists of a single executable without external dependencies.
-  * All the configuration is done via explicit command-line flags with reasonable defaults.
-  * All the data is stored in a single directory pointed by `-storageDataPath` flag.
-  * Easy backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
-* Storage is protected from corruption on unclean shutdown (i.e. hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
-* Supports metrics' ingestion and backfilling via the following protocols:
-  * [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
-  * [InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
-  * [Graphite plaintext protocol](https://graphite.readthedocs.io/en/latest/feeding-carbon.html) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
-    if `-graphiteListenAddr` is set.
-  * [OpenTSDB put message](http://opentsdb.net/docs/build/html/api_telnet/put.html) if `-opentsdbListenAddr` is set.
-* Ideally works with big amounts of time series data from IoT sensors, connected car sensors and industrial sensors.
-* Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
-
-
-### Which clients do you target?
-
-The following Prometheus users may be interested in VictoriaMetrics:
- Users who don't want to bother with Prometheus' local storage operational burden - backups, replication, capacity planning, scalability, etc.
- Users with multiple Prometheus instances who want performing arbitrary queries over all the metrics collected by their Prometheus instances (aka `global querying view`).
- Users who want reducing costs for storing huge amounts of time series data.
+See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#prominent-features).


 ### How to start using VictoriaMetrics?

-Start with [single-node version](Single-server-VictoriaMetrics). It is easy to configure and operate. It should fit the majority of use cases.
+See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Quick-Start).


-### Is it safe to enable [remote write storage](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage) in Prometheus?
+### What is the difference between vmagent and Prometheus?

-Yes. Prometheus continues writing data to local storage after enabling remote storage write, so all the existing local storage data
+While both [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) and Prometheus may scrape Prometheus targets (aka `/metrics` pages)
+according to the provided Prometheus-compatible [scrape configs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config)
+and send data to multiple remote storage systems, vmagent has the following additional features:
+
+- vmagent usually requires lower amounts of CPU, RAM and disk IO comparing to Prometheus when scraping big number of targets (more than 1000)
+  or targets with big number of exposed metrics.
+- vmagent provides independent disk-backed buffers per each configured remote storage (aka `-remoteWrite.url`). This means that slow or temporarily unavailable storage
+  doesn't prevent from sending data to healthy storage in parallel. Prometheus uses a single shared buffer for all the configured remote storage systems (aka `remote_write->url`)
+  with the hardcoded retention of 2 hours.
+- vmagent may accept, relabel and filter data obtained via multiple data ingestion protocols additionally to data scraped from Prometheus targets.
+  I.e. it supports both `pull` and `push` protocols for data ingestion.
+  See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md#features) for details.
+- vmagent may be used in different use cases:
+  - [IoT and edge monitoring](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md#iot-and-edge-monitoring)
+  - [Drop-in replacement for Prometheus](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md#drop-in-replacement-for-prometheus)
+  - [Replication and High Availability](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md#replication-and-high-availability)
+  - [Relabeling and Filtering](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md#relabeling-and-filtering)
+  - [Splitting data streams among multiple systems](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md#splitting-data-streams-among-multiple-systems)
+  - [Prometheus remote_write proxy](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md#prometheus-remote_write-proxy)
+
+
+### Is it safe to enable [remote write](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage) in Prometheus?
+
+Yes. Prometheus continues writing data to local storage after enabling remote write, so all the existing local storage data
 and new data is available for querying via Prometheus as usual.

+It is recommended using [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) for scraping Prometheus targets
+and writing data to VictoriaMetrics.
+

 ### How does VictoriaMetrics compare to other remote storage solutions for Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/thanos-io/thanos), [Cortex](https://github.com/cortexproject/cortex), etc.?

-VictoriaMetrics is simpler, faster, more cost-effective and it provides [MetricsQL with useful extensions for PromQL](MetricsQL). The simplicity is twofold:
- It is simpler to configure and operate. There is no need in configuring third-party [sidecars](https://github.com/thanos-io/thanos/blob/master/docs/components/sidecar.md)
-  or fighting with [gossip protocol](https://github.com/improbable-eng/thanos/blob/030bc345c12c446962225221795f4973848caab5/docs/proposals/completed/201809_gossip-removal.md).
- VictoriaMetrics has simpler architecture, which means less bugs and more useful features in the long run comparing to competing TSDBs.
+VictoriaMetrics is simpler, faster, more cost-effective and it provides [MetricsQL query language](MetricsQL) based on PromQL. The simplicity is twofold:
+- It is simpler to configure and operate. There is no need in configuring [sidecars](https://github.com/thanos-io/thanos/blob/master/docs/components/sidecar.md),
+  fighting [gossip protocol](https://github.com/improbable-eng/thanos/blob/030bc345c12c446962225221795f4973848caab5/docs/proposals/completed/201809_gossip-removal.md)
+  or setting up third-party systems such as [Consul](https://github.com/cortexproject/cortex/issues/157), [Cassandra](https://cortexmetrics.io/docs/production/cassandra/),
+  [DynamoDB](https://cortexmetrics.io/docs/production/aws/) or [Memcached](https://cortexmetrics.io/docs/production/caching/).
+- VictoriaMetrics has simpler architecture. This means less bugs and more useful features in the long run comparing to competing TSDBs.

 See [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683)
 and [Remote Write Storage Wars](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) talk from [PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
@@ -70,55 +70,68 @@ VictoriaMetrics also [uses less RAM than Thanos components](https://github.com/t
 ### What is the difference between VictoriaMetrics and [Cortex](https://github.com/cortexproject/cortex)?

 VictoriaMetrics is similar to Cortex in the following aspects:
- Both systems accept data from Prometheus via standard [remote_write API](https://prometheus.io/docs/practices/remote_write/),
-  i.e. there is no need in running sidecars unlike in [Thanos](https://github.com/thanos-io/thanos) case.
- Both systems support multi-tenancy out of the box. See [the corresponding docs for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#url-format).
+- Both systems accept data from [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) or Prometheus
+  via standard [remote_write API](https://prometheus.io/docs/practices/remote_write/), i.e. there is no need in running sidecars
+  unlike in [Thanos](https://github.com/thanos-io/thanos) case.
+- Both systems support multi-tenancy out of the box. See [the corresponding docs for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#multitenancy).
+- Both systems support data replication. See [replication in Cortex](https://github.com/cortexproject/cortex/blob/fe56f1420099aa1bf1ce09316c186e05bddee879/docs/architecture.md#hashing) and [replication in VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#replication-and-data-safety).
+- Both systems scale horizontally to multiple nodes. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#cluster-resizing-and-scalability) for details.
+- Both systems support alerting and recording rules via the corresponding tools such as [vmalert](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/README.md).
+

 The main differences between Cortex and VictoriaMetrics:
 - Cortex re-uses Prometheus source code, while VictoriaMetrics is written from scratch.
- Cortex provides [Ruler](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#ruler) and [Alertmanager](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#alertmanager) components,
-  which are currently missing in VictoriaMetrics. However, these components can be substituted by [Promxy](https://github.com/jacksontj/promxy#how-do-i-use-alertingrecording-rules-in-promxy).
 - Cortex heavily relies on third-party services such as Consul, Memcache, DynamoDB, BigTable, Cassandra, etc.
  This may increase operational complexity and reduce system reliability comparing to VictoriaMetrics' case,
  which doesn't use any external services. Compare [Cortex Architecture](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md)
  to [VictoriaMetrics architecture](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#architecture-overview).
 - VictoriaMetrics provides [production-ready single-node solution](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md),
  which is much easier to setup and operate than Cortex cluster.
- Cortex may lose up to 12 hours of recent data on Ingestor failure - see [the corresponding docs](https://github.com/cortexproject/cortex/blob/master/docs/architecture.md#ingesters-failure-and-data-loss).
+- Cortex may lose up to 12 hours of recent data on Ingestor failure - see [the corresponding docs](https://github.com/cortexproject/cortex/blob/fe56f1420099aa1bf1ce09316c186e05bddee879/docs/architecture.md#ingesters-failure-and-data-loss).
  VictoriaMetrics may lose only a few seconds of recent data, which isn't synced to persistent storage yet.
  See [this article for details](https://medium.com/@valyala/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704).
- Cortex is usually slower and requires more CPU and RAM than VictoriaMetrics. See [this talk from Adidas at PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
+- Cortex is usually slower and requires more CPU and RAM than VictoriaMetrics. See [this talk from Adidas at PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) and [other case studies](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies).
+- VictoriaMetrics accepts data in multiple popular data ingestion protocols additionally to Prometheus remote_write protocol - InfluxDB, OpenTSDB, Graphite, CSV.
+  See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-import-time-series-data) for details.


 ### What is the difference between VictoriaMetrics and [Thanos](https://github.com/thanos-io/thanos)?

 - Thanos re-uses Prometheus source code, while VictoriaMetrics is written from scratch.
- Thanos provides [Ruler component](https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md),
-  while VictoriaMetrics relies on [Promxy for alerting and recording rules](https://github.com/jacksontj/promxy#how-do-i-use-alertingrecording-rules-in-promxy).
 - VictoriaMetrics accepts data via [standard remote_write API for Prometheus](https://prometheus.io/docs/practices/remote_write/),
  while Thanos uses non-standard [Sidecar](https://github.com/thanos-io/thanos/blob/master/docs/components/sidecar.md), which must run alongside each Prometheus instance.
- Thanos Sidecar requires disabling data compaction in Prometheus, which may hurt Prometheus performance and increase RAM usage.
- Thanos stores data on object storage (Amazon S3 or Google GCS), while VictoriaMetrics stores data on block storage (GCP persistent disks, Amazon EBS or bare metal HDD).
+- Thanos Sidecar requires disabling data compaction in Prometheus, which may hurt Prometheus performance and increase RAM usage. See [these docs](https://thanos.io/components/sidecar.md/) for more details.
+- Thanos stores data in object storage (Amazon S3 or Google GCS), while VictoriaMetrics stores data in block storage
+  ([GCP persistent disks](https://cloud.google.com/compute/docs/disks#pdspecs), Amazon EBS or bare metal HDD).
+  While object storage is usually less expensive, block storage provides much lower latencies and higher throughput.
+  VictoriaMetrics works perfectly with HDD-based block storage - there is no need in using more expensive SSD or NVMe disks in most cases.
 - Thanos may lose up to 2 hours of recent data, which wasn't uploaded yet to object storage. VictoriaMetrics may lose only a few seconds of recent data,
  which isn't synced to persistent storage yet. See [this article for details](https://medium.com/@valyala/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704).
+- VictoriaMetrics provides [production-ready single-node solution](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md),
+  which is much easier to setup and operate than Thanos components.
 - Thanos may be harder to setup and operate comparing to VictoriaMetrics, since it has more moving parts, which can be connected with less reliable networks.
  See [this article for details](https://medium.com/faun/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
 - Thanos is usually slower and requires more CPU and RAM than VictoriaMetrics. See [this talk from Adidas at PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
+- VictoriaMetrics accepts data in multiple popular data ingestion protocols additionally to Prometheus remote_write protocol - InfluxDB, OpenTSDB, Graphite, CSV.
+  See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-import-time-series-data) for details.


 ### How does VictoriaMetrics compare to [InfluxDB](https://www.influxdata.com/time-series-platform/influxdb/)?

-VictoriaMetrics requires [10x less RAM](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) and it [works faster](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
-It is easier to configure and operate. It provides [better query language](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085) than InfluxQL or Flux.
+- VictoriaMetrics requires [10x less RAM](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) and it [works faster](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
+- VictoriaMetrics provides [better query language](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085) than InfluxQL or Flux.
+- VictoriaMetrics accepts data in multiple popular data ingestion protocols additionally to InfluxDB - Prometheus remote_write, OpenTSDB, Graphite, CSV.
+  See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#how-to-import-time-series-data) for details.


 ### How does VictoriaMetrics compare to [TimescaleDB](https://www.timescale.com/)?

-TimescaleDB insists on using SQL as a query language. While SQL is more powerful than PromQL, this power is rarely required during typical TSDB usage. Real-world queries usually [look clearer and simpler when written in PromQL than in SQL](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085).
-Additionally, VictoriaMetrics requires [up to 70x less storage space comparing to TimescaleDB](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) for storing the same amount of time series data.
+- TimescaleDB insists on using SQL as a query language. While SQL is more powerful than PromQL, this power is rarely required during typical TSDB usage. Real-world queries usually [look clearer and simpler when written in PromQL than in SQL](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085).
+- VictoriaMetrics requires [up to 70x less storage space comparing to TimescaleDB](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) for storing the same amount of time series data. The gap in storage space usage can be lowered from 70x to 3x if [compression in TimescaleDB is properly configured](https://docs.timescale.com/latest/using-timescaledb/compression) (it isn't an easy task in general case :)).
+- VictoriaMetrics accepts data in multiple popular data ingestion protocols - InfluxDB, OpenTSDB, Graphite, CSV, while TimescaleDB supports only SQL inserts.


-### Does VictoriaMetrics use Prometheus technologies like other clustered TSDBs built on top of Prometheus such as [Thanos](https://github.com/thanos-io/thanos), [Cortex](https://github.com/cortexproject/cortex)?
+### Does VictoriaMetrics use Prometheus technologies like other clustered TSDBs built on top of Prometheus such as [Thanos](https://github.com/thanos-io/thanos) or [Cortex](https://github.com/cortexproject/cortex)?

 No. VictoriaMetrics core is written in Go from scratch by [fasthttp](https://github.com/valyala/fasthttp) [author](https://github.com/valyala).
 The architecture is [optimized for storing and querying large amounts of time series data with high cardinality](https://medium.com/devopslinks/victoriametrics-creating-the-best-remote-storage-for-prometheus-5d92d66787ac). VictoriaMetrics storage uses [certain ideas from ClickHouse](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). Special thanks to [Alexey Milovidov](https://github.com/alexey-milovidov).
@@ -136,6 +149,8 @@ Yes:
 * [TSBS benchmark on high-cardinality time series: VictoriaMetrics vs InfluxDB vs TimescaleDB](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
 * [Standard TSBS benchmark: VictoriaMetrics vs InfluxDB vs TimescaleDB](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)

+See also [other articles about VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Articles).
+

 ### What is the pricing for VictoriaMetrics?

@@ -145,11 +160,11 @@ The following versions are open source and free:

 We provide commercial support for both versions. [Contact us](mailto:info@victoriametrics.com) for the pricing.

-The following versions are commercial:
+The following commercial versions of VictoriaMetrics are planned:
 * Managed cluster in the Cloud.
 * SaaS version.

-[Contact us](mailto:info@victoriametrics.com) for the pricing.
+[Contact us](mailto:info@victoriametrics.com) for more information on our plans.


 ### Why VictoriaMetrics doesn't support [Prometheus remote read API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#%3Cremote_read%3E)?
@@ -168,6 +183,11 @@ or via [Prometheus datasource in Grafana](http://docs.grafana.org/features/datas
 Yes. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#deduplication) for details.


+### Does VictoriaMetrics support replication?
+
+Yes. See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#replication-and-data-safety) for details.
+
+
 ### Where is the source code of VictoriaMetrics?

 Source code for the following versions is available in the following places:
--- a/docs/MetricsQL.md
+++ b/docs/MetricsQL.md
@@ -2,7 +2,9 @@

 VictoriaMetrics implements MetricsQL - query language inspired by [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/).
 It is backwards compatible with PromQL, so Grafana dashboards backed by Prometheus datasource should work the same after switching from Prometheus to VictoriaMetrics.
-[Standalone MetricsQL package](https://godoc.org/github.com/VictoriaMetrics/VictoriaMetrics/lib/metricsql) can be used for parsing MetricsQL in external apps.
+[Standalone MetricsQL package](https://godoc.org/github.com/VictoriaMetrics/metricsql) can be used for parsing MetricsQL in external apps.
+
+If you are unfamiliar with PromQL, then it is suggested reading [this tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085).

 The following functionality is implemented differently in MetricsQL comparing to PromQL in order to improve user experience:
 * MetricsQL takes into account the previous point before the window in square brackets for range functions such as `rate` and `increase`.
@@ -22,6 +24,8 @@ Feel free [filing a feature request](https://github.com/VictoriaMetrics/Victoria
 This functionality can be tried at [an editable Grafana dashboard](http://play-grafana.victoriametrics.com:3000/d/4ome8yJmz/node-exporter-on-victoriametrics-demo).

 - [`WITH` templates](https://play.victoriametrics.com/promql/expand-with-exprs). This feature simplifies writing and managing complex queries. Go to [`WITH` templates playground](https://victoriametrics.com/promql/expand-with-exprs) and try it.
+- All the aggregate functions support optional `limit N` suffix in order to limit the number of output series. For example, `sum(x) by (y) limit 10` limits
+  the number of output time series after the aggregation to 10. All the other time series are dropped.
 - Metric names and metric labels may contain escaped chars. For instance, `foo\-bar{baz\=aa="b"}` is valid expression. It returns time series with name `foo-bar` containing label `baz=aa` with value `b`. Additionally, `\xXX` escape sequence is supported, where `XX` is hexadecimal representation of escaped char.
 - `offset`, range duration and step value for range vector may refer to the current step aka `$__interval` value from Grafana.
  For instance, `rate(metric[10i] offset 5i)` would return per-second rate over a range covering 10 previous steps with the offset of 5 steps.
@@ -72,6 +76,8 @@ This functionality can be tried at [an editable Grafana dashboard](http://play-g
 - `median_over_time(m[d])` - calculates median values for `m` over `d` time window. Shorthand to `quantile_over_time(0.5, m[d])`.
 - `median(q)` - median aggregate. Shorthand to `quantile(0.5, q)`.
 - `limitk(k, q)` - limits the number of time series returned from `q` to `k`.
+- `any(q) by (x)` - returns any time series from `q` for each group in `x`. Note that `any()` removes all the labels except of those listed in `by (x)`.
+  Use `limitk(1, q)` if you need retaining all the labels from `q`.
 - `keep_last_value(q)` - fills missing data (gaps) in `q` with the previous non-empty value.
 - `keep_next_value(q)` - fills missing data (gaps) in `q` with the next non-empty value.
 - `distinct_over_time(m[d])` - returns distinct number of values for `m` data points over `d` duration.
@@ -110,3 +116,9 @@ This functionality can be tried at [an editable Grafana dashboard](http://play-g
  would calculate `min_over_time`, `max_over_time` and `rate` for `m[d]`.
 - `hoeffding_bound_upper(phi, m[d])` and `hoeffding_bound_lower(phi, m[d])` - return upper and lower [Hoeffding bounds](https://en.wikipedia.org/wiki/Hoeffding%27s_inequality)
  for the given `phi` in the range `[0..1]`.
+- `last_over_time(m[d])` - returns the last value for `m` on the time range `d`.
+- `first_over_time(m[d])` - returns the first value for `m` on the time range `d`.
+- `outliersk(N, m)` - returns up to `N` outlier time series for `m`. Outlier time series have the highest deviation from the `median(m)`.
+  This aggregate function is useful to detect anomalies across groups of similar time series.
+- `ascent_over_time(m[d])` - returns the sum of positive deltas between adjancent data points in `m` over `d`. Useful for tracking height gains in GPS track.
+- `descent_over_time(m[d])` - returns the absolute sum of negative deltas between adjancent data points in `m` over `d`. Useful for tracking height loss in GPS track.
--- a/docs/Quick-Start.md
+++ b/docs/Quick-Start.md
@@ -1,7 +1,7 @@
 # Quick Start

 1. Download the latest VictoriaMetrics release from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
-   from [Docker hub](https://hub.docker.com/r/valyala/victoria-metrics/)
+   from [Docker hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/)
   or [build it from sources](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Single-server-VictoriaMetrics#how-to-build-from-sources).

 2. Run the binary or Docker image with the desired command-line flags. Pass `-help` in order to see description for all the available flags
@@ -17,8 +17,10 @@
   See [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/43) in order to configure VictoriaMetrics as OS service.
   It is recommended setting up [VictoriaMetrics monitoring](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#monitoring).

-3. Configure all the Prometheus instances to write data to VictoriaMetrics.
-   See [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Single-server-VictoriaMetrics#prometheus-setup).
+3. Configure [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) or Prometheus to write data to VictoriaMetrics.
+   It is recommended to use `vmagent` instead of Prometheus, since it is more resource efficient. If you still prefer Prometheus, then
+   see [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Single-server-VictoriaMetrics#prometheus-setup)
+   for details on how to configure Prometheus.

 4. Configure Grafana to query VictoriaMetrics instead of Prometheus.
   See [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Single-server-VictoriaMetrics#grafana-setup).
--- a/docs/SampleSizeCalculations.md
+++ b/docs/SampleSizeCalculations.md
@@ -0,0 +1,74 @@
+# Sample size calculations
+
+These calculations are for the “Lowest sample size” graph at https://victoriametrics.com/ .
+
+How many metrics can be stored in 2tb disk for 2 years?
+
+Seconds in 2 years:
+2 years * 365 days * 24 hours * 60 minutes * 60 seconds = 63072000 seconds
+
+Resolution = 1 point per 10 second
+
+That means each metric will contain 6307200 points.
+
+2tb disk contains
+2 (tb) * 1024 (gb) * 1024 (mb) * 1024 (kb) * 1024 (b)  = 2199023255552 bytes
+
+# VictoriaMetrics
+Based on production data from our customers, sample size is 0.4 byte
+That means one metric with 10 seconds resolution will need
+6307200 points * 0.4 bytes/point = 2522880 bytes or 2.4 megabytes.
+Calculation for number of metrics can be stored in 2 tb disk:
+2199023255552 (disk size) / 2522880 (one metric for 2 year) = 871632 metrics
+So in 2tb we can store 871 632 metrics
+
+# Graphite
+Based on https://m30m.github.io/whisper-calculator/ sample size of graphite metrics is 12b + 28b for each metric
+That means, one metric with 10 second resolution will need 75686428 bytes or 72.18 megabytes
+Calculation for number of metrics can be stored in 2 tb disk:
+2199023255552 / 75686428 = 29 054 metrics
+
+# OpenTSDB
+Let's check official openTSDB site
+http://opentsdb.net/faq.html
+16 bytes of HBase overhead, 3 bytes for the metric, 4 bytes for the timestamp, 6 bytes per tag, 2 bytes of OpenTSDB overhead, up to 8 bytes for the value. Integers are stored with variable length encoding and can consume 1, 2, 4 or 8 bytes.
+That means, one metric with 10 second resolution will need
+6307200 * (1 + 4) + 3 + 16 + 2 = 31536021 bytes or 30 megabytes in the best scenario and
+6307200 * (8 + 4) + 3 + 16 + 2 = 75686421 bytes or 72 megabytes in the worst scenario.
+
+Calculation for number of metrics can be stored in 2 tb disk:
+
+2199023255552 / 31536021  = 69 730 metrics for best scenario
+2199023255552 / 75686421 = 29 054 metrics for worst scenario
+
+Also, openTSDB allows to use compression
+" LZO is able to achieve a compression factor of 4.2x "
+So, let's multiply numbers on 4.2
+69 730 * 4,2 = 292 866 metrics for best scenario
+29 054 * 4,2 = 122 026 metrics for worst scenario
+# m3db
+Let's look at official m3db site https://m3db.github.io/m3/m3db/architecture/engine/
+They can achieve a sample size of 1.45 bytes/datapoint
+That means, one metric with 10 second resolution will need 9145440 bytes or 8,72177124 megabytes
+Calculation for number of metrics can be stored in 2 tb disk:
+2199023255552 / 9145440  = 240 450 metrics
+
+# InfluxDB
+Based on official influxDB site https://docs.influxdata.com/influxdb/v1.8/guides/hardware_sizing/#bytes-and-compression
+"Non-string values require approximately three bytes". That means, one metric with 10 second resolution will need
+6307200 * 3 = 18921600 bytes or 18 megabytes
+Calculation for number of metrics can be stored in 2 tb disk:
+
+2199023255552 / 18921600 = 116 217 metrics
+
+# Prometheus
+Let's check official site: https://prometheus.io/docs/prometheus/latest/storage/
+"On average, Prometheus uses only around 1-2 bytes per sample."
+That means, one metric with 10 second resolution will need
+6307200 * 1 = 6307200 bytes in best scenario
+6307200 * 2 = 12614400 bytes in worst scenario.
+
+Calculation for number of metrics can be stored in 2 tb disk:
+
+2199023255552 / 6307200  = 348 652 metrics for the best case
+2199023255552 / 12614400 = 174 326 metrics for the worst cases
--- a/docs/Single-server-VictoriaMetrics.md
+++ b/docs/Single-server-VictoriaMetrics.md
@@ -10,17 +10,26 @@

 ## VictoriaMetrics

-VictoriaMetrics is fast, cost-effective and scalable time-series database. It can be used as long-term remote storage for Prometheus.
+VictoriaMetrics is fast, cost-effective and scalable time-series database.
+
 It is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
 [docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and
 in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics). Just download VictoriaMetrics and see [how to start it](#how-to-start-victoriametrics).

 Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).

+See our [Wiki](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki) for additional documentation.
+
+[Contact us](mailto:info@victoriametrics.com) if you need paid enterprise support for VictoriaMetrics.
+See [features available for enterprise customers](https://github.com/VictoriaMetrics/VictoriaMetrics/issues?q=is%3Aissue+label%3Aenterprise).
+
+
 ## Case studies and talks

 * [Adidas](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#adidas)
+* [CERN](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#cern)
 * [COLOPL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#colopl)
+* [Zerodha](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#zerodha)
 * [Wix.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wixcom)
 * [Wedos.com](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#wedoscom)
 * [Synthesio](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/CaseStudies#synthesio)
@@ -33,6 +42,8 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM

 ## Prominent features

+* VictoriaMetrics can be used as long-term storage for Prometheus or for [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md).
+  See [these docs](#prometheus-setup) for details.
 * Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
  VictoriaMetrics implements [MetricsQL](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL) query language, which is inspired by PromQL.
 * Supports global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query.
@@ -115,6 +126,8 @@ Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaM
 * [Monitoring](#monitoring)
 * [Troubleshooting](#troubleshooting)
 * [Backfilling](#backfilling)
+* [Replication](#replication)
+* [Backups](#backups)
 * [Profiling](#profiling)
 * [Integrations](#integrations)
 * [Third-party contributions](#third-party-contributions)
@@ -137,7 +150,9 @@ The following command-line flags are used the most:

 * `-storageDataPath` - path to data directory. VictoriaMetrics stores all the data in this directory. Default path is `victoria-metrics-data` in current working directory.
 * `-retentionPeriod` - retention period in months for the data. Older data is automatically deleted. Default period is 1 month.
-* `-httpListenAddr` - TCP address to listen to for http requests. By default, it listens port `8428` on all the network interfaces.
+
+Other flags have good enough default values, so set them only if you really need this.
+VictoriaMetrics accepts [Prometheus querying API requests](#prometheus-querying-api-usage) on port `8428` by default.

 Pass `-help` to see all the available flags with description and default values.

@@ -262,6 +277,8 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la
 * [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config)
 * [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
 * [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
+* [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config)
+* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)

 In the future other `*_sd_config` types will be supported.

@@ -312,7 +329,7 @@ to local VictoriaMetrics using `curl`:
 curl -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST 'http://localhost:8428/write'
 ```

-An arbitrary number of lines delimited by '\n' may be sent in a single request.
+An arbitrary number of lines delimited by '\n' (aka newline char) may be sent in a single request.
 After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:

 ```bash
@@ -348,7 +365,7 @@ echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N localhost 2003
 ```

 VictoriaMetrics sets the current time if the timestamp is omitted.
-An arbitrary number of lines delimited by `\n` may be sent in one go.
+An arbitrary number of lines delimited by `\n` (aka newline char) may be sent in one go.
 After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:

 ```bash
@@ -389,7 +406,7 @@ Example for writing data with OpenTSDB protocol to local VictoriaMetrics using `
 echo "put foo.bar.baz `date +%s` 123 tag1=value1 tag2=value2" | nc -N localhost 4242
 ```

-An arbitrary number of lines delimited by `\n` may be sent in one go.
+An arbitrary number of lines delimited by `\n` (aka newline char) may be sent in one go.
 After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint:

 ```bash
@@ -566,11 +583,11 @@ Run `make package-victoria-metrics`. It builds `victoriametrics/victoria-metrics
 `<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
 The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-victoria-metrics`.

-By default the image is built on top of `scratch` image. It is possible to build the package on top of any other base image
-by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `alpine:3.11` image:
+By default the image is built on top of `alpine` image for improved debuggability. It is possible to build the package on top of any other base image
+by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of `scratch` image:

 ```bash
-ROOT_IMAGE=alpine:3.11 make package-victoria-metrics
+ROOT_IMAGE=scratch make package-victoria-metrics
 ```

 ### Start with docker-compose
@@ -758,7 +775,13 @@ The required resources for query path:
 ### High availability

 1) Install multiple VictoriaMetrics instances in distinct datacenters (availability zones).
-2) Add addresses of these instances to `remote_write` section in Prometheus config:
+2) Pass addresses of these instances to [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) via `-remoteWrite.url` command-line flag:
+
+```bash
+/path/to/vmagent -remoteWrite.url=http://<victoriametrics-addr-1>:8428/api/v1/write -remoteWrite.url=http://<victoriametrics-addr-2>:8428/api/v1/write
+```
+
+Alternatively these addresses may be passed to `remote_write` section in Prometheus config:

 ```yml
 remote_write:
@@ -777,6 +800,8 @@ remote_write:
 kill -HUP `pidof prometheus`
 ```

+It is recommended to use [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md) instead of Prometheus for highly loaded setups.
+
 4) Now Prometheus should write data into all the configured `remote_write` urls in parallel.
 5) Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas.
 6) Set up Prometheus datasource in Grafana that points to Promxy.
@@ -787,6 +812,7 @@ to write data to `victoriametrics-addr-1`, while each `r2` should write data to
 Another option is to write data simultaneously from Prometheus HA pair to a pair of VictoriaMetrics instances
 with the enabled de-duplication. See [this section](#deduplication) for details.

+
 ### Deduplication

 VictoriaMetrics de-duplicates data points if `-dedup.minScrapeInterval` command-line flag
@@ -804,6 +830,8 @@ Data is split in per-month subdirectories inside `<-storageDataPath>/data/small`
 Directories for months outside the configured retention are deleted on the first day of new month.
 In order to keep data according to `-retentionPeriod` max disk space usage is going to be `-retentionPeriod` + 1 month.
 For example if `-retentionPeriod` is set to 1, data for January is deleted on March 1st.
+It is safe to extend `-retentionPeriod` on existing data. If `-retentionPeriod` is set to lower
+value than before then data outside the configured period will be eventually deleted.

 ### Multiple retentions

@@ -813,6 +841,11 @@ Just start multiple VictoriaMetrics instances with distinct values for the follo
 * `-storageDataPath`, so the data for each retention period is saved in a separate directory
 * `-httpListenAddr`, so clients may reach VictoriaMetrics instance with proper retention

+Then set up [vmauth](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/README.md) in front of VictoriaMetrics instances,
+so it could route requests from particular user to VictoriaMetrics with the desired retention.
+The same scheme could be implemented for multiple tenants in [VictoriaMetrics cluster](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md).
+
+
 ### Downsampling

 There is no downsampling support at the moment, but:
@@ -825,6 +858,10 @@ There is no downsampling support at the moment, but:
 These properties reduce the need of downsampling. We plan to implement downsampling in the future.
 See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36) for details.

+It is possible to (ab)use [-dedup.minScrapeInterval](#deduplication) for basic downsampling.
+For instance, if interval between the ingested data points is 15s, then `-dedup.minScrapeInterval=5m` will leave
+only a single data point out of 20 initial data points per each 5m interval.
+
 ### Multi-tenancy

 Single-node VictoriaMetrics doesn't support multi-tenancy. Use [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster) instead.
@@ -841,11 +878,14 @@ horizontally scalable long-term remote storage for really large Prometheus deplo

 ### Alerting

-VictoriaMetrics doesn't support rule evaluation and alerting yet, so these actions can be performed at the following places:
+It is recommended using [vmalert](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/README.md) for alerting.
+
+Additionally, alerting can be set up with the following tools:
+
+* With Prometheus - see [the corresponding docs](https://prometheus.io/docs/alerting/overview/).
+* With Promxy - see [the corresponding docs](https://github.com/jacksontj/promxy/blob/master/README.md#how-do-i-use-alertingrecording-rules-in-promxy).
+* With Grafana - see [the corresponding docs](https://grafana.com/docs/alerting/rules/).

-* At Prometheus - see [the corresponding docs](https://prometheus.io/docs/alerting/overview/).
-* At Promxy - see [the corresponding docs](https://github.com/jacksontj/promxy/blob/master/README.md#how-do-i-use-alertingrecording-rules-in-promxy).
-* At Grafana - see [the corresponding docs](https://grafana.com/docs/alerting/rules/).

 ### Security

@@ -862,6 +902,10 @@ Consider setting the following command-line flags:
 Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
 For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=<internal_iface_ip>:2003`.

+Prefer authorizing all the incoming requests from untrusted networks with [vmauth](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/README.md)
+or similar auth proxy.
+
+
 ### Tuning

 * There is no need for VictoriaMetrics tuning since it uses reasonable defaults for command-line flags,
@@ -881,7 +925,8 @@ mkfs.ext4 ... -O 64bit,huge_file,extent -T huge
 ### Monitoring

 VictoriaMetrics exports internal metrics in Prometheus format at `/metrics` page.
-These metrics may be collected via Prometheus by adding the corresponding scrape config to it.
+These metrics may be collected by [vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md)
+or Prometheus by adding the corresponding scrape config to it.
 Alternatively they can be self-scraped by setting `-selfScrapeInterval` command-line flag to duration greater than 0.
 For example, `-selfScrapeInterval=10s` would enable self-scraping of `/metrics` page with 10 seconds interval.

@@ -892,23 +937,32 @@ The most interesting metrics are:

 * `vm_cache_entries{type="storage/hour_metric_ids"}` - the number of time series with new data points during the last hour
  aka active time series.
-* `rate(vm_new_timeseries_created_total[5m])` - time series churn rate.
-* `vm_rows{type="indexdb"}` - the number of rows in inverted index. High value for this number usually mean high churn rate for time series.
-* Sum of `vm_rows{type="storage/big"}` and `vm_rows{type="storage/small"}` - total number of `(timestamp, value)` data points
-  in the database.
-* `vm_rows_inserted_total` - the total number of inserted rows since VictoriaMetrics start.
+* `increase(vm_new_timeseries_created_total[1h])` - time series churn rate during the previous hour.
+* `sum(vm_rows{type=~"storage/.*"})` - total number of `(timestamp, value)` data points in the database.
+* `sum(rate(vm_rows_inserted_total[5m]))` - ingestion rate, i.e. how many samples are inserted int the database per second.
 * `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
-* `sum(vm_data_size_bytes)` - the total data size on disk.
+* `sum(vm_data_size_bytes)` - the total size of data on disk.
+* `increase(vm_slow_row_inserts_total[5m])` - the number of slow inserts during the last 5 minutes.
+  If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
+  of the current number of active time series.
+* `increase(vm_slow_metric_name_loads_total[5m])` - the number of slow loads of metric names during the last 5 minutes.
+  If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
+  of the current number of active time series.
+

 ### Troubleshooting

 * It is recommended to use default command-line flag values (i.e. don't set them explicitly) until the need
  of tweaking these flag values arises.

+* It is recommended upgrading to the latest available release from [this page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
+  since the issue could be already fixed there.
+
 * If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
  then it is likely you have too many active time series for the current amount of RAM.
+  VictoriaMetrics [exposes](#monitoring) `vm_slow_*` metrics, which could be used as an indicator of low amounts of RAM.
  It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
-  ingestion performance.
+  ingestion and query performance in this case.
  Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
  option, since too big value for `-memory.allowedPercent` may result in high I/O usage.

@@ -939,6 +993,10 @@ The most interesting metrics are:
  VictoriaMetrics accepts optional `date=YYYY-MM-DD` and `topN=42` args on this page. By default `date` equals to the current date,
  while `topN` equals to 10.

+* VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.
+  This prevents from ingesting metrics with too many labels. It is recommended [monitoring](#monitoring) `vm_metrics_with_dropped_labels_total`
+  metric in order to determine whether `-maxLabelsPerTimeseries` must be adjusted for your workload.
+

 ### Backfilling

@@ -955,6 +1013,24 @@ the query cache, which could contain incomplete data cached during the backfilli
 Yet another solution is to increase `-search.cacheTimestampOffset` flag value in order to disable caching
 for data with timestamps close to the current time.

+
+### Replication
+
+Single-node VictoriaMetrics doesn't support application-level replication. Use cluster version instead.
+See [these docs](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/README.md#replication-and-data-safety) for details.
+
+Storage-level replication may be offloaded to durable persistent storage such as [Google Cloud disks](https://cloud.google.com/compute/docs/disks#pdspecs).
+
+See also [high availability docs](#high-availability) and [backup docs](#backups).
+
+
+### Backups
+
+VictoriaMetrics supports backups via [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md)
+and [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md) tools.
+We also provide provide `vmbackuper` tool for paid enterprise subscribers - see [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/466) for details.
+
+
 ### Profiling

 VictoriaMetrics provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
--- a/Show More
+++ b/Show More