Merge remote-tracking branch 'origin/cluster' into series-update-api

app/vmselect: fixes and issue with slice reuse
Revert "add datadog /api/v2/series and /api/beta/sketches support (#5094 )"
2026-06-07 10:56:50 +03:00 · 2023-12-05 18:47:15 +03:00 · 2023-12-05 18:46:02 +03:00 · 2023-12-05 02:30:40 +02:00 · 2023-12-05 02:29:00 +02:00 · 2023-12-05 01:35:59 +02:00
2403 changed files with 145807 additions and 72930 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -6,7 +6,7 @@ body:
    attributes:
      value: |
        Before filling a bug report it would be great to [upgrade](https://docs.victoriametrics.com/#how-to-upgrade) 
-        to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
+        to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
        and verify whether the bug is reproducible there.
        It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html) first.
  - type: textarea
--- a/.github/workflows/check-licenses.yml
+++ b/.github/workflows/check-licenses.yml
@@ -14,13 +14,25 @@ jobs:
    name: Build
    runs-on: ubuntu-latest
    steps:
-      - name: Setup Go
-        uses: actions/setup-go@main
-        with:
-          go-version: 1.21.0
-        id: go
      - name: Code checkout
        uses: actions/checkout@master
+
+      - name: Setup Go
+        id: go
+        uses: actions/setup-go@v4
+        with:
+          go-version: stable
+          cache: false
+
+      - name: Cache Go artifacts
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cache/go-build
+            ~/go/pkg/mod
+            ~/go/bin
+          key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
+          restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
+
      - name: Check License
-        run: |
-          make check-licenses
+        run: make check-licenses
--- a/.github/workflows/codeql-analysis-js.yml
+++ b/.github/workflows/codeql-analysis-js.yml
@@ -33,7 +33,7 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4

      - name: Initialize CodeQL
        uses: github/codeql-action/init@v2
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -52,14 +52,25 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4

      - name: Set up Go
+        id: go
        uses: actions/setup-go@v4
        with:
-          go-version: 1.21.0
-          check-latest: true
-          cache: true
+          go-version: stable
+          cache: false
+        if: ${{ matrix.language == 'go' }}
+
+      - name: Cache Go artifacts
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cache/go-build
+            ~/go/pkg/mod
+            ~/go/bin
+          key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
+          restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
        if: ${{ matrix.language == 'go' }}

      # Initializes the CodeQL tools for scanning.
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -7,6 +7,8 @@ on:
    paths-ignore:
      - "docs/**"
      - "**.md"
+      - "dashboards/**"
+      - "deployment/**.yml"
  pull_request:
    branches:
      - master
@@ -14,6 +16,8 @@ on:
    paths-ignore:
      - "docs/**"
      - "**.md"
+      - "dashboards/**"
+      - "deployment/**.yml"
 permissions:
  contents: read

@@ -27,21 +31,58 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Code checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4

      - name: Setup Go
+        id: go
        uses: actions/setup-go@v4
        with:
-          go-version: 1.21.0
-          check-latest: true
-          cache: true
+          go-version: stable
+          cache: false

-      - name: Dependencies
+      - name: Cache Go artifacts
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cache/go-build
+            ~/go/pkg/mod
+            ~/go/bin
+          key: go-artifacts-${{ runner.os }}-check-all-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
+          restore-keys: go-artifacts-${{ runner.os }}-check-all-
+
+      - name: Run check-all
        run: |
-          make install-golangci-lint
          make check-all
          git diff --exit-code

+  build:
+    needs: lint
+    name: build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Code checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Go
+        id: go
+        uses: actions/setup-go@v4
+        with:
+          go-version: stable
+          cache: false
+
+      - name: Cache Go artifacts
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cache/go-build
+            ~/go/pkg/mod
+            ~/go/bin
+          key: go-artifacts-${{ runner.os }}-crossbuild-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
+          restore-keys: go-artifacts-${{ runner.os }}-crossbuild-
+
+      - name: Build
+        run: make crossbuild
+
  test:
    needs: lint
    strategy:
@@ -51,45 +92,29 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Code checkout
-        uses: actions/checkout@v3
-
-      - name: Setup Go
-        uses: actions/setup-go@v4
-        with:
-          go-version: 1.21.0
-          check-latest: true
-          cache: true
-
-      - name: run tests
-        run: |
-          make ${{ matrix.scenario}}
-
-      - name: Publish coverage
-        uses: codecov/codecov-action@v3
-        with:
-          file: ./coverage.txt
-
-  build:
-    needs: test
-    name: build
-    runs-on: ubuntu-latest
-    steps:
-      - name: Code checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4

      - name: Setup Go
        id: go
        uses: actions/setup-go@v4
        with:
-          go-version: 1.21.0
-          check-latest: true
-          cache: true
+          go-version: stable
+          cache: false

-      - uses: actions/cache@v3
+      - name: Cache Go artifacts
+        uses: actions/cache@v3
        with:
-          path: gocache-for-docker
-          key:  gocache-docker-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.mod') }}
+          path: |
+            ~/.cache/go-build
+            ~/go/pkg/mod
+            ~/go/bin
+          key: go-artifacts-${{ runner.os }}-${{ matrix.scenario }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
+          restore-keys: go-artifacts-${{ runner.os }}-${{ matrix.scenario }}-

-      - name: Build
-        run: |
-          make vmcluster-crossbuild
+      - name: run tests
+        run: make ${{ matrix.scenario}}
+
+      - name: Publish coverage
+        uses: codecov/codecov-action@v3
+        with:
+          file: ./coverage.txt
--- a/.github/workflows/sync-docs.yml
+++ b/.github/workflows/sync-docs.yml
@@ -6,6 +6,9 @@ on:
    paths:
      - 'docs/**'
  workflow_dispatch: {}
+env:
+  PAGEFIND_VERSION: "1.0.4"
+  HUGO_VERSION: "latest"
 permissions:
  contents: read  # This is required for actions/checkout and to commit back image update
  deployments: write
@@ -15,16 +18,25 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Code checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          path: main
      - name: Checkout private code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          repository: VictoriaMetrics/vmdocs
          token: ${{ secrets.VM_BOT_GH_TOKEN }}
          path: docs
-
+      - uses: peaceiris/actions-hugo@v2
+        with:
+          hugo-version: ${{env.HUGO_VERSION}}
+          extended: true
+      - name: Install PageFind #install the static search engine for index build
+        uses: supplypike/setup-bin@v3
+        with:
+          uri: "https://github.com/CloudCannon/pagefind/releases/download/v${{env.PAGEFIND_VERSION}}/pagefind-v${{env.PAGEFIND_VERSION}}-x86_64-unknown-linux-musl.tar.gz"
+          name: "pagefind"
+          version: ${{env.PAGEFIND_VERSION}}
      - name: Import GPG key
        uses: crazy-max/ghaction-import-gpg@v5
        with:
@@ -45,6 +57,7 @@ jobs:
          rm -rf content
          cp -r ../main/docs content
          make clean-after-copy
+          make build-search-index
          git config --global user.name "${{ steps.import-gpg.outputs.email }}"
          git config --global user.email "${{ steps.import-gpg.outputs.email }}"
          git add .
--- a/.github/workflows/update-sandbox.yml
+++ b/.github/workflows/update-sandbox.yml
@@ -1,80 +0,0 @@
-name: sandbox-release
-on:
-  release:
-    types: [published]
-permissions:
-  contents: write
-jobs:
-  deploy-sandbox:
-    runs-on: ubuntu-latest
-    steps:
-      - name: check inputs
-        if: github.event.release.tag_name == ''
-        run: exit 1
-    
-      - name: Check out code
-        uses: actions/checkout@v3
-        with:
-          repository: VictoriaMetrics/ops
-          token: ${{ secrets.VM_BOT_GH_TOKEN }}
-
-      - name: Import GPG key
-        id: import-gpg
-        uses: crazy-max/ghaction-import-gpg@v5
-        with:
-          gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }}
-          passphrase: ${{ secrets.VM_BOT_PASSPHRASE }}
-          git_user_signingkey: true
-          git_commit_gpgsign: true
-
-      - name: update image tag
-        uses: fjogeleit/yaml-update-action@main
-        with:
-          valueFile: 'gcp-test/sandbox/manifests/benchmark-vm/vmcluster.yaml'
-          commitChange: false
-          createPR: false
-          changes: |
-            {
-              "gcp-test/sandbox/manifests/benchmark-vm/vmcluster.yaml": {
-                "spec.vminsert.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster",
-                "spec.vmselect.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster",
-                "spec.vmstorage.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster"
-              },
-              "gcp-test/sandbox/manifests/benchmark-vm/vmsingle.yaml": {
-                "spec.image.tag": "${{ github.event.release.tag_name }}-enterprise"
-              },
-              "gcp-test/sandbox/manifests/monitoring/monitoring-vmagent.yaml": {
-                "spec.image.tag": "${{ github.event.release.tag_name }}"
-              },
-              "gcp-test/sandbox/manifests/monitoring/monitoring-vmcluster.yaml": {
-                "spec.vminsert.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster",
-                "spec.vmselect.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster",
-                "spec.vmstorage.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster"
-              },
-              "gcp-test/sandbox/manifests/monitoring/vmalert.yaml": {
-                "spec.image.tag": "${{ github.event.release.tag_name }}-enterprise"
-              }
-            }
-
-      - name: commit changes
-        run: |
-          git config --global user.name "${{ steps.import-gpg.outputs.email }}"
-          git config --global user.email "${{ steps.import-gpg.outputs.email }}"
-          git add .
-          git commit -S -m "Deploy image tag ${RELEASE_TAG} to sandbox"
-        env:
-          RELEASE_TAG: ${{ github.event.release.tag_name }}
-
-      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v5
-        with:
-          author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>
-          branch: release-automation
-          token: ${{ secrets.VM_BOT_GH_TOKEN }}
-          delete-branch: true
-          title: "release ${{ github.event.release.tag_name }}"
-          body: |
-            Release [${{ github.event.release.tag_name }}](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/${{ github.event.release.tag_name }}) to sandbox
-        
-            > Auto-generated by `Github Actions Bot`
-    
--- a/49
+++ b/49
@@ -1,5 +1,7 @@
 PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics

+MAKE_CONCURRENCY ?= $(shell cat /proc/cpuinfo | grep -c processor)
+MAKE_PARALLEL := $(MAKE) -j $(MAKE_CONCURRENCY)
 DATEINFO_TAG ?= $(shell date -u +'%Y%m%d-%H%M%S')
 BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
 	      git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -d' ' -f2 | cut -c 1-8)))
@@ -15,7 +17,9 @@ GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TA
 .PHONY: $(MAKECMDGOALS)

 include app/*/Makefile
+include docs/Makefile
 include deployment/*/Makefile
+include dashboards/Makefile
 include package/release/Makefile

 all: \
@@ -71,16 +75,18 @@ vmcluster-windows-amd64: \
 	vmselect-windows-amd64 \
 	vmstorage-windows-amd64

-vmcluster-crossbuild: \
-	vmcluster-linux-amd64 \
-	vmcluster-linux-arm64 \
-	vmcluster-linux-arm \
-	vmcluster-linux-ppc64le \
-	vmcluster-linux-386 \
-	vmcluster-freebsd-amd64 \
-	vmcluster-openbsd-amd64
+crossbuild: vmcluster-crossbuild

-publish: package-base \
+vmcluster-crossbuild:
+	$(MAKE_PARALLEL) vmcluster-linux-amd64 \
+		vmcluster-linux-arm64 \
+		vmcluster-linux-arm \
+		vmcluster-linux-ppc64le \
+		vmcluster-linux-386 \
+		vmcluster-freebsd-amd64 \
+		vmcluster-openbsd-amd64
+
+publish: \
 	publish-vminsert \
 	publish-vmselect \
 	publish-vmstorage
@@ -91,13 +97,14 @@ package: \
 	package-vmstorage

 publish-release:
-	git checkout $(TAG) && LATEST_TAG=stable $(MAKE) release publish && \
-		git checkout $(TAG)-cluster && LATEST_TAG=cluster-stable $(MAKE) release publish && \
-		git checkout $(TAG)-enterprise && LATEST_TAG=enterprise-stable $(MAKE) release publish && \
-		git checkout $(TAG)-enterprise-cluster && LATEST_TAG=enterprise-cluster-stable $(MAKE) release publish
+	rm -rf bin/*
+	git checkout $(TAG) && $(MAKE) release && LATEST_TAG=stable $(MAKE) publish && \
+		git checkout $(TAG)-cluster && $(MAKE) release && LATEST_TAG=cluster-stable $(MAKE) publish && \
+		git checkout $(TAG)-enterprise && $(MAKE) release && LATEST_TAG=enterprise-stable $(MAKE) publish && \
+		git checkout $(TAG)-enterprise-cluster && $(MAKE) release && LATEST_TAG=enterprise-cluster-stable $(MAKE) publish

-release: \
-	release-vmcluster
+release:
+	$(MAKE_PARALLEL) release-vmcluster

 release-vmcluster: \
 	release-vmcluster-linux-amd64 \
@@ -198,7 +205,7 @@ benchmark-pure:
 vendor-update:
 	go get -u -d ./lib/...
 	go get -u -d ./app/...
-	go mod tidy -compat=1.19
+	go mod tidy -compat=1.20
 	go mod vendor

 app-local:
@@ -224,7 +231,7 @@ golangci-lint: install-golangci-lint
 	golangci-lint run

 install-golangci-lint:
-	which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.51.2
+	which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.55.1

 govulncheck: install-govulncheck
 	govulncheck ./...
@@ -266,11 +273,3 @@ copy-docs:
 # The rest of docs is ordered manually.
 docs-sync:
 	SRC=README.md DST=docs/Cluster-VictoriaMetrics.md OLD_URL='/Cluster-VictoriaMetrics.html' ORDER=2 TITLE='Cluster version' $(MAKE) copy-docs
-	SRC=app/vmagent/README.md DST=docs/vmagent.md OLD_URL='/vmagent.html' ORDER=3 TITLE=vmagent $(MAKE) copy-docs
-	SRC=app/vmalert/README.md DST=docs/vmalert.md OLD_URL='/vmalert.html' ORDER=4 TITLE=vmalert $(MAKE) copy-docs
-	SRC=app/vmauth/README.md DST=docs/vmauth.md OLD_URL='/vmauth.html' ORDER=5 TITLE=vmauth $(MAKE) copy-docs
-	SRC=app/vmbackup/README.md DST=docs/vmbackup.md OLD_URL='/vmbackup.html' ORDER=6 TITLE=vmbackup $(MAKE) copy-docs
-	SRC=app/vmrestore/README.md DST=docs/vmrestore.md OLD_URL='/vmrestore.html' ORDER=7 TITLE=vmrestore $(MAKE) copy-docs
-	SRC=app/vmctl/README.md DST=docs/vmctl.md OLD_URL='/vmctl.html' ORDER=8 TITLE=vmctl $(MAKE) copy-docs
-	SRC=app/vmgateway/README.md DST=docs/vmgateway.md OLD_URL='/vmgateway.html' ORDER=9 TITLE=vmgateway $(MAKE) copy-docs
-	SRC=app/vmbackupmanager/README.md DST=docs/vmbackupmanager.md OLD_URL='/vmbackupmanager.html' ORDER=10 TITLE=vmbackupmanager $(MAKE) copy-docs
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Cluster version

-<img alt="VictoriaMetrics" src="logo.png" width="300">
+<img src="docs/logo.webp" width="300">

 VictoriaMetrics is a fast, cost-effective and scalable time series database. It can be used as a long-term remote storage for Prometheus.

@@ -11,7 +11,19 @@ with the number of CPU cores, RAM and available storage space.
 The single-node version is easier to configure and operate compared to the cluster version, so think twice before choosing the cluster version.
 See [this question](https://docs.victoriametrics.com/FAQ.html#which-victoriametrics-type-is-recommended-for-use-in-production---single-node-or-cluster) for more details.

-Join [our Slack](https://slack.victoriametrics.com/) or [contact us](mailto:info@victoriametrics.com) with consulting and support questions.
+There is also user-friendly database for logs - [VictoriaLogs](https://docs.victoriametrics.com/VictoriaLogs/).
+
+If you have questions about VictoriaMetrics, then feel free asking them at [VictoriaMetrics community Slack chat](https://slack.victoriametrics.com/).
+
+[Contact us](mailto:info@victoriametrics.com) if you need enterprise support for VictoriaMetrics. 
+See [features available in enterprise package](https://docs.victoriametrics.com/enterprise.html).
+Enterprise binaries can be downloaded and evaluated for free 
+from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
+See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).
+
+VictoriaMetrics is developed at a fast pace, so it is recommended periodically checking the [CHANGELOG](https://docs.victoriametrics.com/CHANGELOG.html) and performing [regular upgrades](#how-to-upgrade-victoriametrics).
+
+VictoriaMetrics has achieved security certifications for Database Software Development and Software-Based Monitoring Services. We apply strict security measures in everything we do. See our [Security page](https://victoriametrics.com/security/) for more details.

 ## Prominent features

@@ -34,7 +46,7 @@ This is a [shared nothing architecture](https://en.wikipedia.org/wiki/Shared-not
 It increases cluster availability, and simplifies cluster maintenance as well as cluster scaling.

 <p align="center">
-  <img src="docs/Cluster-VictoriaMetrics_cluster-scheme.png" width="800">
+  <img src="docs/Cluster-VictoriaMetrics_cluster-scheme.webp" width="800">
 </p>

 ## Multitenancy
@@ -87,13 +99,18 @@ while the `http_requests_total{path="/bar"} 34` would be stored in the tenant `a
 The `vm_account_id` and `vm_project_id` labels are extracted after applying the [relabeling](https://docs.victoriametrics.com/relabeling.html)
 set via `-relabelConfig` command-line flag, so these labels can be set at this stage.

+The `vm_account_id` and `vm_project_id` labels are also taken into account when ingesting data via non-http-based protocols
+such as [Graphite](https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd),
+[InfluxDB line protocol via TCP and UDP](https://docs.victoriametrics.com/#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) and
+[OpenTSDB telnet put protocol](https://docs.victoriametrics.com/#sending-data-via-telnet-put-protocol).
+
 **Security considerations:** it is recommended restricting access to `multitenant` endpoints only to trusted sources,
 since untrusted source may break per-tenant data by writing unwanted samples to arbitrary tenants.


 ## Binaries

-Compiled binaries for the cluster version are available in the `assets` section of the [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
+Compiled binaries for the cluster version are available in the `assets` section of the [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
 Also see archives containing the word `cluster`.

 Docker images for the cluster version are available here:
@@ -129,7 +146,7 @@ vmstorage-prod

 ### Development Builds

-1. [Install go](https://golang.org/doc/install). The minimum supported version is Go 1.18.
+1. [Install go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
 1. Run `make` from [the repository root](https://github.com/VictoriaMetrics/VictoriaMetrics). It should build `vmstorage`, `vmselect`
   and `vminsert` binaries and put them into the `bin` folder.

@@ -218,7 +235,7 @@ the following approaches for automatic discovery of `vmstorage` nodes:
  The list of discovered `vmstorage` nodes is automatically updated when the file contents changes.
  The update frequency can be controlled with `-storageNode.discoveryInterval` command-line flag.

- [dns+srv](https://en.wikipedia.org/wiki/SRV_record) - pass `dns+src:some-name` value to `-storageNode` command-line flag.
+- [dns+srv](https://en.wikipedia.org/wiki/SRV_record) - pass `dns+srv:some-name` value to `-storageNode` command-line flag.
  In this case the provided `dns+srv` names are resolved into tcp addresses of `vmstorage` nodes.
  The list of discovered `vmstorage` nodes is automatically updated at `vminsert` and `vmselect`
  when it changes behind the corresponding `dns+srv` names.
@@ -244,7 +261,7 @@ General security recommendations:
 - All the VictoriaMetrics cluster components must run in protected private network without direct access from untrusted networks such as Internet.
 - External clients must access `vminsert` and `vmselect` via auth proxy such as [vmauth](https://docs.victoriametrics.com/vmauth.html)
  or [vmgateway](https://docs.victoriametrics.com/vmgateway.html).
- The auth proxy must accept auth tokens from untrusted networks only via https in order to protect the auth tokens from eavesdropping.
+- The auth proxy must accept auth tokens from untrusted networks only via https in order to protect the auth tokens from MitM attacks.
 - It is recommended using distinct auth tokens for distinct [tenants](#multitenancy) in order to reduce potential damage in case of compromised auth token for some tenants.
 - Prefer using lists of allowed [API endpoints](#url-format), while disallowing access to other endpoints when configuring auth proxy in front of `vminsert` and `vmselect`.
  This minimizes attack surface.
@@ -267,7 +284,8 @@ When `vmselect` runs with `-clusternativeListenAddr` command-line option, then i

 See [these docs](https://gist.github.com/f41gh7/76ed8e5fb1ebb9737fe746bae9175ee6) on how to set up mTLS in VictoriaMetrics cluster.

-[Enterprise version of VictoriaMetrics](https://docs.victoriametrics.com/enterprise.html) can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
+[Enterprise version of VictoriaMetrics](https://docs.victoriametrics.com/enterprise.html) can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
+See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).

 ## Monitoring

@@ -283,7 +301,7 @@ or Prometheus to scrape `/metrics` pages from all the cluster components, so the
 with [the official Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/)
 or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11831). Graphs on these dashboards contain useful hints - hover the `i` icon at the top left corner of each graph in order to read it.

-It is recommended setting up alerts in [vmalert](https://docs.victoriametrics.com/vmalert.html) or in Prometheus from [this config](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/deployment/docker/alerts.yml).
+It is recommended setting up alerts in [vmalert](https://docs.victoriametrics.com/vmalert.html) or in Prometheus from [this list](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#alerts).
 See more details in the article [VictoriaMetrics Monitoring](https://victoriametrics.com/blog/victoriametrics-monitoring/).

 ## Cardinality limiter
@@ -299,7 +317,11 @@ See more details about cardinality limiter in [these docs](https://docs.victoria

 ## Troubleshooting

-See [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html).
+- If your VictoriaMetrics cluster experiences data ingestion delays during
+  [rolling restarts and configuration updates](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#updating--reconfiguring-cluster-nodes),
+  then see [these docs](#improving-re-routing-performance-during-restart).
+
+[Troubleshooting docs for single-node VictoriaMetrics](https://docs.victoriametrics.com/Troubleshooting.html) apply to VictoriaMetrics cluster as well.

 ## Readonly mode

@@ -329,9 +351,10 @@ Check practical examples of VictoriaMetrics API [here](https://docs.victoriametr
    - `prometheus/api/v1/import/native` - for importing data obtained via `api/v1/export/native` on `vmselect` (see below).
    - `prometheus/api/v1/import/csv` - for importing arbitrary CSV data. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-import-csv-data) for details.
    - `prometheus/api/v1/import/prometheus` - for importing data in [Prometheus text exposition format](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#text-based-format) and in [OpenMetrics format](https://github.com/OpenObservability/OpenMetrics/blob/master/specification/OpenMetrics.md). This endpoint also supports [Pushgateway protocol](https://github.com/prometheus/pushgateway#url). See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-import-data-in-prometheus-exposition-format) for details.
-    - `opentemetry/api/v1/push` - for ingesting data via [OpenTelemetry protocol for metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/ffddc289462dfe0c2041e3ca42a7b1df805706de/specification/metrics/data-model.md). See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#sending-data-via-opentelemetry).
+    - `opentelemetry/api/v1/push` - for ingesting data via [OpenTelemetry protocol for metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/ffddc289462dfe0c2041e3ca42a7b1df805706de/specification/metrics/data-model.md). See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#sending-data-via-opentelemetry).
    - `datadog/api/v1/series` - for ingesting data with [DataDog submit metrics API](https://docs.datadoghq.com/api/latest/metrics/#submit-metrics). See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-datadog-agent) for details.
    - `influx/write` and `influx/api/v2/write` - for ingesting data with [InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/). See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) for details.
+    - `newrelic/infra/v2/metrics/events/bulk` - for accepting data from [NewRelic infrastructure agent](https://docs.newrelic.com/docs/infrastructure/install-infrastructure-agent). See [these docs](https://docs.victoriametrics.com/#how-to-send-data-from-newrelic-agent) for details.
    - `opentsdb/api/put` - for accepting [OpenTSDB HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html). This handler is disabled by default. It is exposed on a distinct TCP address set via `-opentsdbHTTPListenAddr` command-line flag. See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#sending-opentsdb-data-via-http-apiput-requests) for details.

 - URLs for [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/): `http://<vmselect>:8481/select/<accountID>/prometheus/<suffix>`, where:
@@ -445,8 +468,22 @@ This strategy allows upgrading the cluster without downtime if the following con
 - The updated config / upgraded binary is compatible with the remaining components in the cluster.
  See the [CHANGELOG](https://docs.victoriametrics.com/CHANGELOG.html) for compatibility notes between different releases.

-  If at least a single condition isn't met, then the rolling restart may result in cluster unavailability
-  during the config update / version upgrade. In this case the following strategy is recommended.
+If at least a single condition isn't met, then the rolling restart may result in cluster unavailability
+during the config update / version upgrade. In this case the following strategy is recommended.
+
+#### Improving re-routing performance during restart
+
+`vmstorage` nodes may experience increased usage for CPU, RAM and disk IO during
+[rolling restarts](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#no-downtime-strategy),
+since they need to process higher load when some of `vmstorage` nodes are temporarily unavailable in the cluster.
+It is possible to reduce resource usage spikes by running more `vminsert` nodes and by passing bigger values
+to `-storage.vminsertConnsShutdownDuration` (available from [v1.95.0](https://docs.victoriametrics.com/CHANGELOG.html#v1950))
+command-line flag at `vmstorage` nodes.
+In this case `vmstorage` increases the interval between gradual closing of `vminsert` connections during graceful shutdown.
+This reduces data ingestion slowdown during rollout restarts.
+
+Make sure that the `-storage.vminsertConnsShutdownDuration` is smaller than the graceful shutdown timeout configured at the system which manages `vmstorage`
+(e.g. Docker, Kubernetes, systemd, etc.). Otherwise the system may kill `vmstorage` node before it finishes gradual closing of `vminsert` connections.

 ### Minimum downtime strategy

@@ -720,13 +757,24 @@ For example, the following config sets retention to 5 days for time series with

 See also [these docs](https://docs.victoriametrics.com/#retention-filters) for additional details on retention filters.

-Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
+Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
+See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).

 ## Downsampling

-Downsampling is available in [enterprise version of VictoriaMetrics](https://docs.victoriametrics.com/enterprise.html). It is configured with `-downsampling.period` command-line flag. The same flag value must be passed to both `vmstorage` and `vmselect` nodes. See [these docs](https://docs.victoriametrics.com/#downsampling) for details.
+Downsampling is available in [enterprise version of VictoriaMetrics](https://docs.victoriametrics.com/enterprise.html).
+It is configured with `-downsampling.period` command-line flag according to [these docs](https://docs.victoriametrics.com/#downsampling).

-Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
+The same flag value must be passed to both `vmstorage` and `vmselect` nodes. Configuring `vmselect` node with `-downsampling.period`
+command-line flag makes query results more consistent, because `vmselect` uses the maximum configured downsampling interval
+on the requested time range if this time range covers multiple downsampling levels.
+For example, if `-downsampling.period=30d:5m` and the query requests the last 60 days of data, then `vmselect`
+downsamples all the [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples) on the requested time range
+using 5 minute interval. If `-downsampling.period` command-line flag isn't set at `vmselect`,
+then query results can be less consistent because of mixing raw and downsampled data.
+
+Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
+See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).

 ## Profiling

@@ -771,6 +819,17 @@ For accessing vmalerts UI through vmselect configure `-vmalert.proxyURL` flag an

 ## Community and contributions

+Feel free asking any questions regarding VictoriaMetrics:
+
+* [Slack](https://slack.victoriametrics.com/)
+* [Twitter](https://twitter.com/VictoriaMetrics/)
+* [Linkedin](https://www.linkedin.com/company/victoriametrics/)
+* [Reddit](https://www.reddit.com/r/VictoriaMetrics/)
+* [Telegram-en](https://t.me/VictoriaMetrics_en)
+* [Telegram-ru](https://t.me/VictoriaMetrics_ru1)
+* [Google groups](https://groups.google.com/forum/#!forum/victorametrics-users)
+* [Mastodon](https://mastodon.social/@victoriametrics/)
+
 We are open to third-party pull requests provided they follow the [KISS design principle](https://en.wikipedia.org/wiki/KISS_principle):

 - Prefer simple code and architecture.
@@ -810,21 +869,23 @@ Below is the output for `/path/to/vminsert -help`:
  -cacheExpireDuration duration
     Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
  -cluster.tls
-     Whether to use TLS for connections to -storageNode. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in enterprise version of VictoriaMetrics
+     Whether to use TLS for connections to -storageNode. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -cluster.tlsCAFile string
-     Path to TLS CA file to use for verifying certificates provided by -storageNode if -cluster.tls flag is set. By default system CA is used. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in enterprise version of VictoriaMetrics
+     Path to TLS CA file to use for verifying certificates provided by -storageNode if -cluster.tls flag is set. By default system CA is used. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -cluster.tlsCertFile string
-     Path to client-side TLS certificate file to use when connecting to -storageNode if -cluster.tls flag is set. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in enterprise version of VictoriaMetrics
+     Path to client-side TLS certificate file to use when connecting to -storageNode if -cluster.tls flag is set. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -cluster.tlsInsecureSkipVerify
-     Whether to skip verification of TLS certificates provided by -storageNode nodes if -cluster.tls flag is set. Note that disabled TLS certificate verification breaks security. This flag is available only in enterprise version of VictoriaMetrics
+     Whether to skip verification of TLS certificates provided by -storageNode nodes if -cluster.tls flag is set. Note that disabled TLS certificate verification breaks security. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -cluster.tlsKeyFile string
-     Path to client-side TLS key file to use when connecting to -storageNode if -cluster.tls flag is set. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in enterprise version of VictoriaMetrics
+     Path to client-side TLS key file to use when connecting to -storageNode if -cluster.tls flag is set. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
+  -clusternative.vminsertConnsShutdownDuration duration
+     The time needed for gradual closing of upstream vminsert connections during graceful shutdown. Bigger duration reduces spikes in CPU, RAM and disk IO load on the remaining lower-level clusters during rolling restart. Smaller duration reduces the time needed to close all the upstream vminsert connections, thus reducing the time for graceful shutdown. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#improving-re-routing-performance-during-restart (default 25s)
  -clusternativeListenAddr string
     TCP address to listen for data from other vminsert nodes in multi-level cluster setup. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multi-level-cluster-setup . Usually :8400 should be set to match default vmstorage port for vminsert. Disabled work if empty
  -csvTrimTimestamp duration
     Trim timestamps when importing csv data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms)
  -datadog.maxInsertRequestSize size
-     The maximum size in bytes of a single DataDog POST request to /api/v1/series
+     The maximum size in bytes of a single DataDog POST request to /api/v1/series, /api/v2/series, /api/beta/sketches
     Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
  -datadog.sanitizeMetricName
     Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
@@ -841,7 +902,9 @@ Below is the output for `/path/to/vminsert -help`:
  -envflag.prefix string
     Prefix for environment variables if -envflag.enable is set
  -eula
-     By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
+     Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
+  -filestream.disableFadvise
+     Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
  -flagsAuthKey string
     Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
  -fs.disableMmap
@@ -856,6 +919,12 @@ Below is the output for `/path/to/vminsert -help`:
     Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
  -http.disableResponseCompression
     Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
+  -http.header.csp string
+     Value for 'Content-Security-Policy' header
+  -http.header.frameOptions string
+     Value for 'X-Frame-Options' header
+  -http.header.hsts string
+     Value for 'Strict-Transport-Security' header
  -http.idleConnTimeout duration
     Timeout for incoming idle http connections (default 1m0s)
  -http.maxGracefulShutdownDuration duration
@@ -874,7 +943,7 @@ Below is the output for `/path/to/vminsert -help`:
     Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing
  -import.maxLineLen size
     The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export
-     Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600)
+     Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10485760)
  -influx.databaseNames array
     Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb
     Supports an array of values separated by comma or specified via multiple flags.
@@ -903,6 +972,12 @@ Below is the output for `/path/to/vminsert -help`:
     Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
  -internStringMaxLen int
     The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
+  -license string
+     Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
+  -license.forceOffline
+     Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
+  -licenseFile string
+     Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
  -loggerDisableTimestamps
     Whether to disable writing timestamps in logs
  -loggerErrorsPerSecondLimit int
@@ -913,6 +988,8 @@ Below is the output for `/path/to/vminsert -help`:
     Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
  -loggerLevel string
     Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
+  -loggerMaxArgLen int
+     The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 500)
  -loggerOutput string
     Output for the logs. Supported values: stderr, stdout (default "stderr")
  -loggerTimezone string
@@ -920,7 +997,7 @@ Below is the output for `/path/to/vminsert -help`:
  -loggerWarnsPerSecondLimit int
     Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
  -maxConcurrentInserts int
-     The maximum number of concurrent insert requests. The default value should work for most cases, since it minimizes memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
+     The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
  -maxInsertRequestSize size
     The maximum size in bytes of a single Prometheus remote_write API request
     Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
@@ -935,6 +1012,9 @@ Below is the output for `/path/to/vminsert -help`:
     Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
  -metricsAuthKey string
     Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
+  -newrelic.maxInsertRequestSize size
+     The maximum size in bytes of a single NewRelic request to /newrelic/infra/v2/metrics/events/bulk
+     Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
  -opentsdbHTTPListenAddr string
     TCP address to listen for OpenTSDB HTTP put requests. Usually :4242 must be set. Doesn't work if empty. See also -opentsdbHTTPListenAddr.useProxyProtocol
  -opentsdbHTTPListenAddr.useProxyProtocol
@@ -993,7 +1073,9 @@ Below is the output for `/path/to/vminsert -help`:
  -version
     Show VictoriaMetrics version
  -vmstorageDialTimeout duration
-     Timeout for establishing RPC connections from vminsert to vmstorage (default 5s)
+     Timeout for establishing RPC connections from vminsert to vmstorage. See also -vmstorageUserTimeout (default 3s)
+  -vmstorageUserTimeout duration
+     Network timeout for RPC connections from vminsert to vmstorage (Linux only). Lower values speed up re-rerouting recovery when some of vmstorage nodes become unavailable because of networking issues. Read more about TCP_USER_TIMEOUT at https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/ . See also -vmstorageDialTimeout (default 3s)
 ```

 ### List of command-line flags for vmselect
@@ -1006,15 +1088,15 @@ Below is the output for `/path/to/vmselect -help`:
  -cacheExpireDuration duration
     Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
  -cluster.tls
-     Whether to use TLS for connections to -storageNode. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in enterprise version of VictoriaMetrics
+     Whether to use TLS for connections to -storageNode. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -cluster.tlsCAFile string
-     Path to TLS CA file to use for verifying certificates provided by -storageNode if -cluster.tls flag is set. By default system CA is used. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in enterprise version of VictoriaMetrics
+     Path to TLS CA file to use for verifying certificates provided by -storageNode if -cluster.tls flag is set. By default system CA is used. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -cluster.tlsCertFile string
-     Path to client-side TLS certificate file to use when connecting to -storageNode if -cluster.tls flag is set. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in enterprise version of VictoriaMetrics
+     Path to client-side TLS certificate file to use when connecting to -storageNode if -cluster.tls flag is set. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -cluster.tlsInsecureSkipVerify
-     Whether to skip verification of TLS certificates provided by -storageNode nodes if -cluster.tls flag is set. Note that disabled TLS certificate verification breaks security. This flag is available only in enterprise version of VictoriaMetrics
+     Whether to skip verification of TLS certificates provided by -storageNode nodes if -cluster.tls flag is set. Note that disabled TLS certificate verification breaks security. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -cluster.tlsKeyFile string
-     Path to client-side TLS key file to use when connecting to -storageNode if -cluster.tls flag is set. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in enterprise version of VictoriaMetrics
+     Path to client-side TLS key file to use when connecting to -storageNode if -cluster.tls flag is set. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -clusternative.disableCompression
     Whether to disable compression of the data sent to vmselect via -clusternativeListenAddr. This reduces CPU usage at the cost of higher network bandwidth usage
  -clusternative.maxConcurrentRequests int
@@ -1028,18 +1110,18 @@ Below is the output for `/path/to/vmselect -help`:
  -clusternative.maxTagValues int
     The maximum number of tag values returned per search at -clusternativeListenAddr (default 100000)
  -clusternative.tls
-     Whether to use TLS when accepting connections at -clusternativeListenAddr. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection
+     Whether to use TLS when accepting connections at -clusternativeListenAddr. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -clusternative.tlsCAFile string
-     Path to TLS CA file to use for verifying certificates provided by vmselect, which connects at -clusternativeListenAddr if -clusternative.tls flag is set. By default system CA is used. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection
+     Path to TLS CA file to use for verifying certificates provided by vmselect, which connects at -clusternativeListenAddr if -clusternative.tls flag is set. By default system CA is used. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -clusternative.tlsCertFile string
-     Path to server-side TLS certificate file to use when accepting connections at -clusternativeListenAddr if -clusternative.tls flag is set. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection
+     Path to server-side TLS certificate file to use when accepting connections at -clusternativeListenAddr if -clusternative.tls flag is set. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -clusternative.tlsCipherSuites array
-     Optional list of TLS cipher suites used for connections at -clusternativeListenAddr if -clusternative.tls flag is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
+     Optional list of TLS cipher suites used for connections at -clusternativeListenAddr if -clusternative.tls flag is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
     Supports an array of values separated by comma or specified via multiple flags.
  -clusternative.tlsInsecureSkipVerify
-     Whether to skip verification of TLS certificates provided by vmselect, which connects to -clusternativeListenAddr if -clusternative.tls flag is set. Note that disabled TLS certificate verification breaks security
+     Whether to skip verification of TLS certificates provided by vmselect, which connects to -clusternativeListenAddr if -clusternative.tls flag is set. Note that disabled TLS certificate verification breaks security. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -clusternative.tlsKeyFile string
-     Path to server-side TLS key file to use when accepting connections at -clusternativeListenAddr if -clusternative.tls flag is set. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection
+     Path to server-side TLS key file to use when accepting connections at -clusternativeListenAddr if -clusternative.tls flag is set. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#mtls-protection . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
  -clusternativeListenAddr string
     TCP address to listen for requests from other vmselect nodes in multi-level cluster setup. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multi-level-cluster-setup . Usually :8401 should be set to match default vmstorage port for vmselect. Disabled work if empty
  -dedup.minScrapeInterval duration
@@ -1056,7 +1138,9 @@ Below is the output for `/path/to/vmselect -help`:
  -envflag.prefix string
     Prefix for environment variables if -envflag.enable is set
  -eula
-     By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
+     Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
+  -filestream.disableFadvise
+     Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
  -flagsAuthKey string
     Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
  -fs.disableMmap
@@ -1065,6 +1149,12 @@ Below is the output for `/path/to/vmselect -help`:
     Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
  -http.disableResponseCompression
     Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
+  -http.header.csp string
+     Value for 'Content-Security-Policy' header
+  -http.header.frameOptions string
+     Value for 'X-Frame-Options' header
+  -http.header.hsts string
+     Value for 'Strict-Transport-Security' header
  -http.idleConnTimeout duration
     Timeout for incoming idle http connections (default 1m0s)
  -http.maxGracefulShutdownDuration duration
@@ -1087,6 +1177,12 @@ Below is the output for `/path/to/vmselect -help`:
     Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
  -internStringMaxLen int
     The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
+  -license string
+     Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
+  -license.forceOffline
+     Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
+  -licenseFile string
+     Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
  -loggerDisableTimestamps
     Whether to disable writing timestamps in logs
  -loggerErrorsPerSecondLimit int
@@ -1097,6 +1193,8 @@ Below is the output for `/path/to/vmselect -help`:
     Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
  -loggerLevel string
     Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
+  -loggerMaxArgLen int
+     The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 500)
  -loggerOutput string
     Output for the logs. Supported values: stderr, stdout (default "stderr")
  -loggerTimezone string
@@ -1137,7 +1235,7 @@ Below is the output for `/path/to/vmselect -help`:
  -search.latencyOffset duration
     The time when data points become visible in query results after the collection. It can be overridden on per-query basis via latency_offset arg. Too small value can result in incomplete last points for query results (default 30s)
  -search.logQueryMemoryUsage size
-     Log queries, which require more memory than specified by this flag. This may help detecting and optimizing heavy queries. Query logging is disabled by default. See also -search.logSlowQueryDuration and -search.maxMemoryPerQuery
+     Log query and increment vm_memory_intensive_queries_total metric each time the query requires more memory than specified by this flag. This may help detecting and optimizing heavy queries. Query logging is disabled by default. See also -search.logSlowQueryDuration and -search.maxMemoryPerQuery
     Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
  -search.logSlowQueryDuration duration
     Log queries with execution time exceeding this value. Zero disables slow query logging. See also -search.logQueryMemoryUsage (default 5s)
@@ -1191,8 +1289,13 @@ Below is the output for `/path/to/vmselect -help`:
     The maximum number of tag value suffixes returned from /metrics/find (default 100000)
  -search.maxUniqueTimeseries int
     The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage (default 300000)
+  -search.maxWorkersPerQuery int
+     The maximum number of CPU cores a single query can use. The default value should work good for most cases. The flag can be set to lower values for improving performance of big number of concurrently executed queries. The flag can be set to bigger values for improving performance of heavy queries, which scan big number of time series (>10K) and/or big number of samples (>100M). There is no sense in setting this flag to values bigger than the number of CPU cores available on the system (default 4)
  -search.minStalenessInterval duration
     The minimum interval for staleness calculations. This flag could be useful for removing gaps on graphs generated from time series with irregular intervals between samples. See also '-search.maxStalenessInterval'
+  -search.minWindowForInstantRollupOptimization value
+     Enable cache-based optimization for repeated queries to /api/v1/query (aka instant queries), which contain rollup functions with lookbehind window exceeding the given value
+     The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 6h)
  -search.noStaleMarkers
     Set this flag to true if the database doesn't contain Prometheus stale markers, so there is no need in spending additional CPU time on its handling. Staleness markers may exist only in data obtained from Prometheus scrape targets
  -search.queryStats.lastQueriesCount int
@@ -1233,7 +1336,9 @@ Below is the output for `/path/to/vmselect -help`:
  -vmalert.proxyURL string
     Optional URL for proxying requests to vmalert. For example, if -vmalert.proxyURL=http://vmalert:8880 , then alerting API requests such as /api/v1/rules from Grafana will be proxied to http://vmalert:8880/api/v1/rules
  -vmstorageDialTimeout duration
-     Timeout for establishing RPC connections from vmselect to vmstorage (default 5s)
+     Timeout for establishing RPC connections from vmselect to vmstorage. See also -vmstorageUserTimeout (default 3s)
+  -vmstorageUserTimeout duration
+     Network timeout for RPC connections from vmselect to vmstorage (Linux only). Lower values reduce the maximum query durations when some vmstorage nodes become unavailable because of networking issues. Read more about TCP_USER_TIMEOUT at https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/ . See also -vmstorageDialTimeout (default 3s)
  -vmui.customDashboardsPath string
     Optional path to vmui dashboards. See https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/app/vmui/packages/vmui/public/dashboards
 ```
@@ -1245,6 +1350,8 @@ Below is the output for `/path/to/vmstorage -help`:
 ```
  -bigMergeConcurrency int
     Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
+  -blockcache.missesBeforeCaching int
+     The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
  -cacheExpireDuration duration
     Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
  -cluster.tls
@@ -1276,7 +1383,9 @@ Below is the output for `/path/to/vmstorage -help`:
  -envflag.prefix string
     Prefix for environment variables if -envflag.enable is set
  -eula
-     By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
+     Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
+  -filestream.disableFadvise
+     Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
  -finalMergeDelay duration
     The delay before starting final merge for per-month partition after no new data is ingested into it. Final merge may require additional disk IO and CPU resources. Final merge may increase query speed and reduce disk space usage in some cases. Zero value disables final merge
  -flagsAuthKey string
@@ -1291,6 +1400,12 @@ Below is the output for `/path/to/vmstorage -help`:
     Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
  -http.disableResponseCompression
     Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
+  -http.header.csp string
+     Value for 'Content-Security-Policy' header
+  -http.header.frameOptions string
+     Value for 'X-Frame-Options' header
+  -http.header.hsts string
+     Value for 'Strict-Transport-Security' header
  -http.idleConnTimeout duration
     Timeout for incoming idle http connections (default 1m0s)
  -http.maxGracefulShutdownDuration duration
@@ -1315,6 +1430,12 @@ Below is the output for `/path/to/vmstorage -help`:
     Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
  -internStringMaxLen int
     The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
+  -license string
+     Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
+  -license.forceOffline
+     Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
+  -licenseFile string
+     Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
  -logNewSeries
     Whether to log new series. This option is for debug purposes only. It can lead to performance issues when big number of new series are ingested into VictoriaMetrics
  -loggerDisableTimestamps
@@ -1327,6 +1448,8 @@ Below is the output for `/path/to/vmstorage -help`:
     Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
  -loggerLevel string
     Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
+  -loggerMaxArgLen int
+     The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 500)
  -loggerOutput string
     Output for the logs. Supported values: stderr, stdout (default "stderr")
  -loggerTimezone string
@@ -1334,7 +1457,7 @@ Below is the output for `/path/to/vmstorage -help`:
  -loggerWarnsPerSecondLimit int
     Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
  -maxConcurrentInserts int
-     The maximum number of concurrent insert requests. The default value should work for most cases, since it minimizes memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
+     The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
  -memory.allowedBytes size
     Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
     Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
@@ -1361,7 +1484,7 @@ Below is the output for `/path/to/vmstorage -help`:
     Supports an array of values separated by comma or specified via multiple flags.
  -retentionPeriod value
     Data with timestamps outside the retentionPeriod is automatically deleted. The minimum retentionPeriod is 24h or 1d. See also -retentionFilter
-     The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 1)
+     The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 1)
  -retentionTimezoneOffset duration
     The offset for performing indexdb rotation. If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)
  -rpc.disableCompression
@@ -1386,7 +1509,7 @@ Below is the output for `/path/to/vmstorage -help`:
     The timeout for creating new snapshot. If set, make sure that timeout is lower than backup period
  -snapshotsMaxAge value
     Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted
-     The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 0)
+     The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 0)
  -storage.cacheSizeIndexDBDataBlocks size
     Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
     Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
@@ -1406,6 +1529,8 @@ Below is the output for `/path/to/vmstorage -help`:
  -storage.minFreeDiskSpaceBytes size
     The minimum free disk space at -storageDataPath after which the storage stops accepting new data
     Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10000000)
+  -storage.vminsertConnsShutdownDuration duration
+     The time needed for gradual closing of vminsert connections during graceful shutdown. Bigger duration reduces spikes in CPU, RAM and disk IO load on the remaining vmstorage nodes during rolling restart. Smaller duration reduces the time needed to close all the vminsert connections, thus reducing the time for graceful shutdown. See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#improving-re-routing-performance-during-restart (default 25s)
  -storageDataPath string
     Path to storage data (default "vmstorage-data")
  -tls
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -5,8 +5,8 @@
 | Version | Supported          |
 |---------|--------------------|
 | [latest release](https://docs.victoriametrics.com/CHANGELOG.html) | :white_check_mark: |
+| v1.93.x LTS release | :white_check_mark: |
 | v1.87.x LTS release | :white_check_mark: |
-| v1.79.x LTS release | :white_check_mark: |
 | other releases  | :x:                |

 ## Reporting a Vulnerability
--- a/app/vlinsert/elasticsearch/elasticsearch.go
+++ b/app/vlinsert/elasticsearch/elasticsearch.go
@@ -12,6 +12,8 @@ import (
 	"strings"
 	"time"

+	"github.com/VictoriaMetrics/metrics"
+
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bufferedwriter"
@@ -22,7 +24,6 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
-	"github.com/VictoriaMetrics/metrics"
 )

 var (
@@ -93,22 +94,32 @@ func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
 			httpserver.Errorf(w, r, "%s", err)
 			return true
 		}
+		if err := vlstorage.CanWriteData(); err != nil {
+			httpserver.Errorf(w, r, "%s", err)
+			return true
+		}
 		lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
 		processLogMessage := cp.GetProcessLogMessageFunc(lr)
 		isGzip := r.Header.Get("Content-Encoding") == "gzip"
 		n, err := readBulkRequest(r.Body, isGzip, cp.TimeField, cp.MsgField, processLogMessage)
-		if err != nil {
-			logger.Warnf("cannot decode log message #%d in /_bulk request: %s", n, err)
-			return true
-		}
 		vlstorage.MustAddRows(lr)
 		logstorage.PutLogRows(lr)
+		if err != nil {
+			logger.Warnf("cannot decode log message #%d in /_bulk request: %s, stream fields: %s", n, err, cp.StreamFields)
+			return true
+		}

 		tookMs := time.Since(startTime).Milliseconds()
 		bw := bufferedwriter.Get(w)
 		defer bufferedwriter.Put(bw)
 		WriteBulkResponse(bw, n, tookMs)
 		_ = bw.Flush()
+
+		// update bulkRequestDuration only for successfully parsed requests
+		// There is no need in updating bulkRequestDuration for request errors,
+		// since their timings are usually much smaller than the timing for successful request parsing.
+		bulkRequestDuration.UpdateDuration(startTime)
+
 		return true
 	default:
 		return false
@@ -116,7 +127,9 @@ func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
 }

 var (
-	bulkRequestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/elasticsearch/_bulk"}`)
+	bulkRequestsTotal   = metrics.NewCounter(`vl_http_requests_total{path="/insert/elasticsearch/_bulk"}`)
+	rowsIngestedTotal   = metrics.NewCounter(`vl_rows_ingested_total{type="elasticsearch_bulk"}`)
+	bulkRequestDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/elasticsearch/_bulk"}`)
 )

 func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string,
@@ -162,8 +175,6 @@ func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string,

 var lineBufferPool bytesutil.ByteBufferPool

-var rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="elasticsearch_bulk"}`)
-
 func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
 	processLogMessage func(timestamp int64, fields []logstorage.Field),
 ) (bool, error) {
@@ -214,6 +225,7 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
 	p.RenameField(msgField, "_msg")
 	processLogMessage(ts, p.Fields)
 	logjson.PutParser(p)
+
 	return true, nil
 }

--- a/app/vlinsert/elasticsearch/elasticsearch_test.go
+++ b/app/vlinsert/elasticsearch/elasticsearch_test.go
@@ -120,10 +120,10 @@ func compressData(s string) string {
 	var bb bytes.Buffer
 	zw := gzip.NewWriter(&bb)
 	if _, err := zw.Write([]byte(s)); err != nil {
-		panic(fmt.Errorf("unexpected error when compressing data: %s", err))
+		panic(fmt.Errorf("unexpected error when compressing data: %w", err))
 	}
 	if err := zw.Close(); err != nil {
-		panic(fmt.Errorf("unexpected error when closing gzip writer: %s", err))
+		panic(fmt.Errorf("unexpected error when closing gzip writer: %w", err))
 	}
 	return bb.String()
 }
--- a/app/vlinsert/elasticsearch/elasticsearch_timing_test.go
+++ b/app/vlinsert/elasticsearch/elasticsearch_timing_test.go
@@ -43,7 +43,7 @@ func benchmarkReadBulkRequest(b *testing.B, isGzip bool) {
 			r.Reset(dataBytes)
 			_, err := readBulkRequest(r, isGzip, timeField, msgField, processLogMessage)
 			if err != nil {
-				panic(fmt.Errorf("unexpected error: %s", err))
+				panic(fmt.Errorf("unexpected error: %w", err))
 			}
 		}
 	})
--- a/app/vlinsert/insertutils/common_params.go
+++ b/app/vlinsert/insertutils/common_params.go
@@ -3,12 +3,13 @@ package insertutils
 import (
 	"net/http"

+	"github.com/VictoriaMetrics/metrics"
+
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
-	"github.com/VictoriaMetrics/metrics"
 )

 // CommonParams contains common HTTP parameters used by log ingestion APIs.
@@ -73,12 +74,19 @@ func GetCommonParams(r *http.Request) (*CommonParams, error) {
 // GetProcessLogMessageFunc returns a function, which adds parsed log messages to lr.
 func (cp *CommonParams) GetProcessLogMessageFunc(lr *logstorage.LogRows) func(timestamp int64, fields []logstorage.Field) {
 	return func(timestamp int64, fields []logstorage.Field) {
+		if len(fields) > *MaxFieldsPerLine {
+			rf := logstorage.RowFormatter(fields)
+			logger.Warnf("dropping log line with %d fields; it exceeds -insert.maxFieldsPerLine=%d; %s", len(fields), *MaxFieldsPerLine, rf)
+			rowsDroppedTotalTooManyFields.Inc()
+			return
+		}
+
 		lr.MustAdd(cp.TenantID, timestamp, fields)
 		if cp.Debug {
 			s := lr.GetRowString(0)
 			lr.ResetKeepSettings()
 			logger.Infof("remoteAddr=%s; requestURI=%s; ignoring log entry because of `debug` query arg: %s", cp.DebugRemoteAddr, cp.DebugRequestURI, s)
-			rowsDroppedTotal.Inc()
+			rowsDroppedTotalDebug.Inc()
 			return
 		}
 		if lr.NeedFlush() {
@@ -88,4 +96,5 @@ func (cp *CommonParams) GetProcessLogMessageFunc(lr *logstorage.LogRows) func(ti
 	}
 }

-var rowsDroppedTotal = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`)
+var rowsDroppedTotalDebug = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`)
+var rowsDroppedTotalTooManyFields = metrics.NewCounter(`vl_rows_dropped_total{reason="too_many_fields"}`)
--- a/app/vlinsert/insertutils/flags.go
+++ b/app/vlinsert/insertutils/flags.go
@@ -1,10 +1,15 @@
 package insertutils

 import (
+	"flag"
+
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
 )

 var (
 	// MaxLineSizeBytes is the maximum length of a single line for /insert/* handlers
 	MaxLineSizeBytes = flagutil.NewBytes("insert.maxLineSizeBytes", 256*1024, "The maximum size of a single line, which can be read by /insert/* handlers")
+
+	// MaxFieldsPerLine is the maximum number of fields per line for /insert/* handlers
+	MaxFieldsPerLine = flag.Int("insert.maxFieldsPerLine", 1000, "The maximum number of log fields per line, which can be read by /insert/* handlers")
 )
--- a/app/vlinsert/jsonline/jsonline.go
+++ b/app/vlinsert/jsonline/jsonline.go
@@ -21,6 +21,7 @@ import (

 // RequestHandler processes jsonline insert requests
 func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
+	startTime := time.Now()
 	w.Header().Add("Content-Type", "application/json")

 	if r.Method != "POST" {
@@ -35,6 +36,10 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 		httpserver.Errorf(w, r, "%s", err)
 		return true
 	}
+	if err := vlstorage.CanWriteData(); err != nil {
+		httpserver.Errorf(w, r, "%s", err)
+		return true
+	}
 	lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
 	processLogMessage := cp.GetProcessLogMessageFunc(lr)

@@ -77,6 +82,11 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 	vlstorage.MustAddRows(lr)
 	logstorage.PutLogRows(lr)

+	// update jsonlineRequestDuration only for successfully parsed requests.
+	// There is no need in updating jsonlineRequestDuration for request errors,
+	// since their timings are usually much smaller than the timing for successful request parsing.
+	jsonlineRequestDuration.UpdateDuration(startTime)
+
 	return true
 }

@@ -109,6 +119,7 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, processLogMessage f
 	p.RenameField(msgField, "_msg")
 	processLogMessage(ts, p.Fields)
 	logjson.PutParser(p)
+
 	return true, nil
 }

@@ -144,6 +155,7 @@ func parseISO8601Timestamp(s string) (int64, error) {
 var lineBufferPool bytesutil.ByteBufferPool

 var (
-	requestsTotal     = metrics.NewCounter(`vl_http_requests_total{path="/insert/jsonline"}`)
-	rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="jsonline"}`)
+	requestsTotal           = metrics.NewCounter(`vl_http_requests_total{path="/insert/jsonline"}`)
+	rowsIngestedTotal       = metrics.NewCounter(`vl_rows_ingested_total{type="jsonline"}`)
+	jsonlineRequestDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/jsonline"}`)
 )
--- a/app/vlinsert/loki/loki.go
+++ b/app/vlinsert/loki/loki.go
@@ -5,29 +5,31 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
-	"github.com/VictoriaMetrics/metrics"
-)
-
-var (
-	lokiRequestsJSONTotal     = metrics.NewCounter(`vl_http_requests_total{path="/insert/loki/api/v1/push",format="json"}`)
-	lokiRequestsProtobufTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/loki/api/v1/push",format="protobuf"}`)
 )

 // RequestHandler processes Loki insert requests
-//
-// See https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki
 func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
-	if path != "/api/v1/push" {
+	switch path {
+	case "/api/v1/push":
+		return handleInsert(r, w)
+	case "/ready":
+		// See https://grafana.com/docs/loki/latest/api/#identify-ready-loki-instance
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte("ready"))
+		return true
+	default:
 		return false
 	}
+}
+
+// See https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki
+func handleInsert(r *http.Request, w http.ResponseWriter) bool {
 	contentType := r.Header.Get("Content-Type")
 	switch contentType {
 	case "application/json":
-		lokiRequestsJSONTotal.Inc()
 		return handleJSON(r, w)
 	default:
-		// Protobuf request body should be handled by default accoring to https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki
-		lokiRequestsProtobufTotal.Inc()
+		// Protobuf request body should be handled by default according to https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki
 		return handleProtobuf(r, w)
 	}
 }
--- a/app/vlinsert/loki/loki_json.go
+++ b/app/vlinsert/loki/loki_json.go
@@ -18,12 +18,11 @@ import (
 	"github.com/valyala/fastjson"
 )

-var (
-	rowsIngestedJSONTotal = metrics.NewCounter(`vl_rows_ingested_total{type="loki",format="json"}`)
-	parserPool            fastjson.ParserPool
-)
+var parserPool fastjson.ParserPool

 func handleJSON(r *http.Request, w http.ResponseWriter) bool {
+	startTime := time.Now()
+	lokiRequestsJSONTotal.Inc()
 	reader := r.Body
 	if r.Header.Get("Content-Encoding") == "gzip" {
 		zr, err := common.GetGzipReader(reader)
@@ -48,19 +47,36 @@ func handleJSON(r *http.Request, w http.ResponseWriter) bool {
 		httpserver.Errorf(w, r, "cannot parse common params from request: %s", err)
 		return true
 	}
+	if err := vlstorage.CanWriteData(); err != nil {
+		httpserver.Errorf(w, r, "%s", err)
+		return true
+	}
 	lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
 	processLogMessage := cp.GetProcessLogMessageFunc(lr)
 	n, err := parseJSONRequest(data, processLogMessage)
 	vlstorage.MustAddRows(lr)
 	logstorage.PutLogRows(lr)
 	if err != nil {
-		httpserver.Errorf(w, r, "cannot parse Loki request: %s", err)
+		httpserver.Errorf(w, r, "cannot parse Loki json request: %s", err)
 		return true
 	}
+
 	rowsIngestedJSONTotal.Add(n)
+
+	// update lokiRequestJSONDuration only for successfully parsed requests
+	// There is no need in updating lokiRequestJSONDuration for request errors,
+	// since their timings are usually much smaller than the timing for successful request parsing.
+	lokiRequestJSONDuration.UpdateDuration(startTime)
+
 	return true
 }

+var (
+	lokiRequestsJSONTotal   = metrics.NewCounter(`vl_http_requests_total{path="/insert/loki/api/v1/push",format="json"}`)
+	rowsIngestedJSONTotal   = metrics.NewCounter(`vl_rows_ingested_total{type="loki",format="json"}`)
+	lokiRequestJSONDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/loki/api/v1/push",format="json"}`)
+)
+
 func parseJSONRequest(data []byte, processLogMessage func(timestamp int64, fields []logstorage.Field)) (int, error) {
 	p := parserPool.Get()
 	defer parserPool.Put(p)
@@ -155,7 +171,6 @@ func parseJSONRequest(data []byte, processLogMessage func(timestamp int64, field
 				Value: bytesutil.ToUnsafeString(msg),
 			})
 			processLogMessage(ts, fields)
-
 		}
 		rowsIngested += len(lines)
 	}
--- a/app/vlinsert/loki/loki_json_timing_test.go
+++ b/app/vlinsert/loki/loki_json_timing_test.go
@@ -29,7 +29,7 @@ func benchmarkParseJSONRequest(b *testing.B, streams, rows, labels int) {
 		for pb.Next() {
 			_, err := parseJSONRequest(data, func(timestamp int64, fields []logstorage.Field) {})
 			if err != nil {
-				panic(fmt.Errorf("unexpected error: %s", err))
+				panic(fmt.Errorf("unexpected error: %w", err))
 			}
 		}
 	})
--- a/app/vlinsert/loki/loki_protobuf.go
+++ b/app/vlinsert/loki/loki_protobuf.go
@@ -19,12 +19,13 @@ import (
 )

 var (
-	rowsIngestedProtobufTotal = metrics.NewCounter(`vl_rows_ingested_total{type="loki",format="protobuf"}`)
-	bytesBufPool              bytesutil.ByteBufferPool
-	pushReqsPool              sync.Pool
+	bytesBufPool bytesutil.ByteBufferPool
+	pushReqsPool sync.Pool
 )

 func handleProtobuf(r *http.Request, w http.ResponseWriter) bool {
+	startTime := time.Now()
+	lokiRequestsProtobufTotal.Inc()
 	wcr := writeconcurrencylimiter.GetReader(r.Body)
 	data, err := io.ReadAll(wcr)
 	writeconcurrencylimiter.PutReader(wcr)
@@ -38,19 +39,36 @@ func handleProtobuf(r *http.Request, w http.ResponseWriter) bool {
 		httpserver.Errorf(w, r, "cannot parse common params from request: %s", err)
 		return true
 	}
+	if err := vlstorage.CanWriteData(); err != nil {
+		httpserver.Errorf(w, r, "%s", err)
+		return true
+	}
 	lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
 	processLogMessage := cp.GetProcessLogMessageFunc(lr)
 	n, err := parseProtobufRequest(data, processLogMessage)
 	vlstorage.MustAddRows(lr)
 	logstorage.PutLogRows(lr)
 	if err != nil {
-		httpserver.Errorf(w, r, "cannot parse loki request: %s", err)
+		httpserver.Errorf(w, r, "cannot parse Loki protobuf request: %s", err)
 		return true
 	}
+
 	rowsIngestedProtobufTotal.Add(n)
+
+	// update lokiRequestProtobufDuration only for successfully parsed requests
+	// There is no need in updating lokiRequestProtobufDuration for request errors,
+	// since their timings are usually much smaller than the timing for successful request parsing.
+	lokiRequestProtobufDuration.UpdateDuration(startTime)
+
 	return true
 }

+var (
+	lokiRequestsProtobufTotal   = metrics.NewCounter(`vl_http_requests_total{path="/insert/loki/api/v1/push",format="protobuf"}`)
+	rowsIngestedProtobufTotal   = metrics.NewCounter(`vl_rows_ingested_total{type="loki",format="protobuf"}`)
+	lokiRequestProtobufDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/loki/api/v1/push",format="protobuf"}`)
+)
+
 func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, fields []logstorage.Field)) (int, error) {
 	bb := bytesBufPool.Get()
 	defer bytesBufPool.Put(bb)
@@ -66,7 +84,7 @@ func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, f

 	err = req.Unmarshal(bb.B)
 	if err != nil {
-		return 0, fmt.Errorf("cannot parse request body: %s", err)
+		return 0, fmt.Errorf("cannot parse request body: %w", err)
 	}

 	var commonFields []logstorage.Field
@@ -79,7 +97,7 @@ func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, f
 		// Labels are same for all entries in the stream.
 		commonFields, err = parsePromLabels(commonFields[:0], stream.Labels)
 		if err != nil {
-			return rowsIngested, fmt.Errorf("cannot parse stream labels %q: %s", stream.Labels, err)
+			return rowsIngested, fmt.Errorf("cannot parse stream labels %q: %w", stream.Labels, err)
 		}
 		fields := commonFields

--- a/app/vlinsert/loki/loki_protobuf_timing_test.go
+++ b/app/vlinsert/loki/loki_protobuf_timing_test.go
@@ -6,8 +6,9 @@ import (
 	"testing"
 	"time"

-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
 	"github.com/golang/snappy"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
 )

 func BenchmarkParseProtobufRequest(b *testing.B) {
@@ -30,7 +31,7 @@ func benchmarkParseProtobufRequest(b *testing.B, streams, rows, labels int) {
 		for pb.Next() {
 			_, err := parseProtobufRequest(body, func(timestamp int64, fields []logstorage.Field) {})
 			if err != nil {
-				panic(fmt.Errorf("unexpected error: %s", err))
+				panic(fmt.Errorf("unexpected error: %w", err))
 			}
 		}
 	})
--- a/app/vlselect/main.go
+++ b/app/vlselect/main.go
@@ -88,6 +88,12 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
 		return true
 	}
 	if strings.HasPrefix(path, "/vmui/") {
+		if strings.HasPrefix(path, "/vmui/static/") {
+			// Allow clients caching static contents for long period of time, since it shouldn't change over time.
+			// Path to static contents (such as js and css) must be changed whenever its contents is changed.
+			// See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/
+			w.Header().Set("Cache-Control", "max-age=31536000")
+		}
 		r.URL.Path = path
 		vmuiFileServer.ServeHTTP(w, r)
 		return true
--- a/app/vlselect/vmui/asset-manifest.json
+++ b/app/vlselect/vmui/asset-manifest.json
@@ -1,14 +1,13 @@
 {
  "files": {
-    "main.css": "./static/css/main.5f91b1c5.css",
-    "main.js": "./static/js/main.7226aaff.js",
-    "static/js/522.b5ae4365.chunk.js": "./static/js/522.b5ae4365.chunk.js",
-    "static/media/Lato-Regular.ttf": "./static/media/Lato-Regular.d714fec1633b69a9c2e9.ttf",
-    "static/media/Lato-Bold.ttf": "./static/media/Lato-Bold.32360ba4b57802daa4d6.ttf",
+    "main.css": "./static/css/main.d1313636.css",
+    "main.js": "./static/js/main.1919fefe.js",
+    "static/js/522.da77e7b3.chunk.js": "./static/js/522.da77e7b3.chunk.js",
+    "static/media/MetricsQL.md": "./static/media/MetricsQL.8644fd7c964802dd34a9.md",
    "index.html": "./index.html"
  },
  "entrypoints": [
-    "static/css/main.5f91b1c5.css",
-    "static/js/main.7226aaff.js"
+    "static/css/main.d1313636.css",
+    "static/js/main.1919fefe.js"
  ]
 }
--- a/app/vlselect/vmui/index.html
+++ b/app/vlselect/vmui/index.html
@@ -1 +1 @@
-<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1,user-scalable=no"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.7226aaff.js"></script><link href="./static/css/main.5f91b1c5.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
+<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.1919fefe.js"></script><link href="./static/css/main.d1313636.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
--- a/app/vlselect/vmui/static/css/main.5f91b1c5.css
+++ b/app/vlselect/vmui/static/css/main.5f91b1c5.css
--- a/app/vlselect/vmui/static/css/main.d1313636.css
+++ b/app/vlselect/vmui/static/css/main.d1313636.css
--- a/app/vlselect/vmui/static/js/522.da77e7b3.chunk.js
+++ b/app/vlselect/vmui/static/js/522.da77e7b3.chunk.js
--- a/app/vlselect/vmui/static/js/main.1919fefe.js
+++ b/app/vlselect/vmui/static/js/main.1919fefe.js
--- a/app/vlselect/vmui/static/js/main.1919fefe.js.LICENSE.txt
+++ b/app/vlselect/vmui/static/js/main.1919fefe.js.LICENSE.txt
@@ -7,7 +7,7 @@
 /*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */

 /**
- * @remix-run/router v1.7.2
+ * @remix-run/router v1.10.0
 *
 * Copyright (c) Remix Software Inc.
 *
@@ -18,7 +18,7 @@
 */

 /**
- * React Router DOM v6.14.2
+ * React Router DOM v6.17.0
 *
 * Copyright (c) Remix Software Inc.
 *
@@ -29,7 +29,7 @@
 */

 /**
- * React Router v6.14.2
+ * React Router v6.17.0
 *
 * Copyright (c) Remix Software Inc.
 *
--- a/app/vlselect/vmui/static/js/main.7226aaff.js
+++ b/app/vlselect/vmui/static/js/main.7226aaff.js
--- a/app/vlselect/vmui/static/media/Lato-Bold.32360ba4b57802daa4d6.ttf
+++ b/app/vlselect/vmui/static/media/Lato-Bold.32360ba4b57802daa4d6.ttf
--- a/app/vlselect/vmui/static/media/Lato-Regular.d714fec1633b69a9c2e9.ttf
+++ b/app/vlselect/vmui/static/media/Lato-Regular.d714fec1633b69a9c2e9.ttf
--- a/app/vlselect/vmui/static/media/MetricsQL.8644fd7c964802dd34a9.md
+++ b/app/vlselect/vmui/static/media/MetricsQL.8644fd7c964802dd34a9.md
--- a/app/vlstorage/main.go
+++ b/app/vlstorage/main.go
@@ -3,14 +3,17 @@ package vlstorage
 import (
 	"flag"
 	"fmt"
+	"net/http"
 	"sync"
 	"time"

+	"github.com/VictoriaMetrics/metrics"
+
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
-	"github.com/VictoriaMetrics/metrics"
 )

 var (
@@ -29,6 +32,8 @@ var (
 		"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields ; see also -logIngestedRows")
 	logIngestedRows = flag.Bool("logIngestedRows", false, "Whether to log all the ingested log entries; this can be useful for debugging of data ingestion; "+
 		"see https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/ ; see also -logNewStreams")
+	minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 10e6, "The minimum free disk space at -storageDataPath after which "+
+		"the storage stops accepting new data")
 )

 // Init initializes vlstorage.
@@ -39,15 +44,16 @@ func Init() {
 		logger.Panicf("BUG: Init() has been already called")
 	}

-	if retentionPeriod.Msecs < 24*3600*1000 {
+	if retentionPeriod.Duration() < 24*time.Hour {
 		logger.Fatalf("-retentionPeriod cannot be smaller than a day; got %s", retentionPeriod)
 	}
 	cfg := &logstorage.StorageConfig{
-		Retention:       time.Millisecond * time.Duration(retentionPeriod.Msecs),
-		FlushInterval:   *inmemoryDataFlushInterval,
-		FutureRetention: time.Millisecond * time.Duration(futureRetention.Msecs),
-		LogNewStreams:   *logNewStreams,
-		LogIngestedRows: *logIngestedRows,
+		Retention:             retentionPeriod.Duration(),
+		FlushInterval:         *inmemoryDataFlushInterval,
+		FutureRetention:       futureRetention.Duration(),
+		LogNewStreams:         *logNewStreams,
+		LogIngestedRows:       *logIngestedRows,
+		MinFreeDiskSpaceBytes: minFreeDiskSpaceBytes.N,
 	}
 	logger.Infof("opening storage at -storageDataPath=%s", *storageDataPath)
 	startTime := time.Now()
@@ -74,7 +80,21 @@ func Stop() {
 var strg *logstorage.Storage
 var storageMetrics *metrics.Set

+// CanWriteData returns non-nil error if it cannot write data to vlstorage.
+func CanWriteData() error {
+	if strg.IsReadOnly() {
+		return &httpserver.ErrorWithStatusCode{
+			Err: fmt.Errorf("cannot add rows into storage in read-only mode; the storage can be in read-only mode "+
+				"because of lack of free disk space at -storageDataPath=%s", *storageDataPath),
+			StatusCode: http.StatusTooManyRequests,
+		}
+	}
+	return nil
+}
+
 // MustAddRows adds lr to vlstorage
+//
+// It is advised to call CanWriteData() before calling MustAddRows()
 func MustAddRows(lr *logstorage.LogRows) {
 	strg.MustAddRows(lr)
 }
@@ -107,6 +127,12 @@ func initStorageMetrics(strg *logstorage.Storage) *metrics.Set {
 	ms.NewGauge(fmt.Sprintf(`vl_free_disk_space_bytes{path=%q}`, *storageDataPath), func() float64 {
 		return float64(fs.MustGetFreeSpace(*storageDataPath))
 	})
+	ms.NewGauge(fmt.Sprintf(`vl_storage_is_read_only{path=%q}`, *storageDataPath), func() float64 {
+		if m().IsReadOnly {
+			return 1
+		}
+		return 0
+	})

 	ms.NewGauge(`vl_active_merges{type="inmemory"}`, func() float64 {
 		return float64(m().InmemoryActiveMerges)
--- a/app/vmagent/README.md
+++ b/app/vmagent/README.md
--- a/app/vmagent/csvimport/request_handler.go
+++ b/app/vmagent/csvimport/request_handler.go
@@ -65,7 +65,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
 	ctx.WriteRequest.Timeseries = tssDst
 	ctx.Labels = labels
 	ctx.Samples = samples
-	remotewrite.Push(at, &ctx.WriteRequest)
+	if !remotewrite.TryPush(at, &ctx.WriteRequest) {
+		return remotewrite.ErrQueueFullHTTPRetry
+	}
 	rowsInserted.Add(len(rows))
 	if at != nil {
 		rowsTenantInserted.Get(at).Add(len(rows))
--- a/app/vmagent/datadog/request_handler.go
+++ b/app/vmagent/datadog/request_handler.go
@@ -8,7 +8,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
 	parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
-	parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog/stream"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
 	"github.com/VictoriaMetrics/metrics"
@@ -29,12 +29,12 @@ func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
 		return err
 	}
 	ce := req.Header.Get("Content-Encoding")
-	return stream.Parse(req.Body, ce, func(series []parser.Series) error {
+	return stream.Parse(req.Body, ce, func(series []datadog.Series) error {
 		return insertRows(at, series, extraLabels)
 	})
 }

-func insertRows(at *auth.Token, series []parser.Series, extraLabels []prompbmarshal.Label) error {
+func insertRows(at *auth.Token, series []datadog.Series, extraLabels []prompbmarshal.Label) error {
 	ctx := common.GetPushCtx()
 	defer common.PutPushCtx(ctx)

@@ -63,7 +63,7 @@ func insertRows(at *auth.Token, series []parser.Series, extraLabels []prompbmars
 			})
 		}
 		for _, tag := range ss.Tags {
-			name, value := parser.SplitTag(tag)
+			name, value := datadog.SplitTag(tag)
 			if name == "host" {
 				name = "exported_host"
 			}
@@ -88,7 +88,9 @@ func insertRows(at *auth.Token, series []parser.Series, extraLabels []prompbmars
 	ctx.WriteRequest.Timeseries = tssDst
 	ctx.Labels = labels
 	ctx.Samples = samples
-	remotewrite.Push(at, &ctx.WriteRequest)
+	if !remotewrite.TryPush(at, &ctx.WriteRequest) {
+		return remotewrite.ErrQueueFullHTTPRetry
+	}
 	rowsInserted.Add(rowsTotal)
 	if at != nil {
 		rowsTenantInserted.Get(at).Add(rowsTotal)
--- a/app/vmagent/graphite/request_handler.go
+++ b/app/vmagent/graphite/request_handler.go
@@ -5,6 +5,7 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
 	parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite/stream"
@@ -20,10 +21,21 @@ var (
 //
 // See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
 func InsertHandler(r io.Reader) error {
-	return stream.Parse(r, insertRows)
+	return stream.Parse(r, false, func(rows []parser.Row) error {
+		return insertRows(nil, rows)
+	})
 }

-func insertRows(rows []parser.Row) error {
+// InsertHandlerForReader processes remote write for graphite plaintext protocol.
+//
+// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
+func InsertHandlerForReader(at *auth.Token, r io.Reader, isGzipped bool) error {
+	return stream.Parse(r, isGzipped, func(rows []parser.Row) error {
+		return insertRows(at, rows)
+	})
+}
+
+func insertRows(at *auth.Token, rows []parser.Row) error {
 	ctx := common.GetPushCtx()
 	defer common.PutPushCtx(ctx)

@@ -56,7 +68,9 @@ func insertRows(rows []parser.Row) error {
 	ctx.WriteRequest.Timeseries = tssDst
 	ctx.Labels = labels
 	ctx.Samples = samples
-	remotewrite.Push(nil, &ctx.WriteRequest)
+	if !remotewrite.TryPush(at, &ctx.WriteRequest) {
+		return remotewrite.ErrQueueFullHTTPRetry
+	}
 	rowsInserted.Add(len(rows))
 	rowsPerInsert.Update(float64(len(rows)))
 	return nil
--- a/app/vmagent/influx/request_handler.go
+++ b/app/vmagent/influx/request_handler.go
@@ -36,9 +36,9 @@ var (
 // InsertHandlerForReader processes remote write for influx line protocol.
 //
 // See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
-func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
+func InsertHandlerForReader(at *auth.Token, r io.Reader, isGzipped bool) error {
 	return stream.Parse(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
-		return insertRows(nil, db, rows, nil)
+		return insertRows(at, db, rows, nil)
 	})
 }

@@ -130,7 +130,9 @@ func insertRows(at *auth.Token, db string, rows []parser.Row, extraLabels []prom
 	ctx.ctx.Labels = labels
 	ctx.ctx.Samples = samples
 	ctx.commonLabels = commonLabels
-	remotewrite.Push(at, &ctx.ctx.WriteRequest)
+	if !remotewrite.TryPush(at, &ctx.ctx.WriteRequest) {
+		return remotewrite.ErrQueueFullHTTPRetry
+	}
 	rowsInserted.Add(rowsTotal)
 	if at != nil {
 		rowsTenantInserted.Get(at).Add(rowsTotal)
--- a/app/vmagent/main.go
+++ b/app/vmagent/main.go
@@ -16,6 +16,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/graphite"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/influx"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/native"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/newrelic"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentelemetry"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdb"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdbhttp"
@@ -123,7 +124,7 @@ func main() {
 	common.StartUnmarshalWorkers()
 	if len(*influxListenAddr) > 0 {
 		influxServer = influxserver.MustStart(*influxListenAddr, *influxUseProxyProtocol, func(r io.Reader) error {
-			return influx.InsertHandlerForReader(r, false)
+			return influx.InsertHandlerForReader(nil, r, false)
 		})
 	}
 	if len(*graphiteListenAddr) > 0 {
@@ -138,7 +139,7 @@ func main() {
 		opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, *opentsdbHTTPUseProxyProtocol, httpInsertHandler)
 	}

-	promscrape.Init(remotewrite.Push)
+	promscrape.Init(remotewrite.PushDropSamplesOnFailure)

 	if len(*httpListenAddr) > 0 {
 		go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
@@ -208,7 +209,7 @@ func getAuthTokenFromPath(path string) (*auth.Token, error) {
 	if p.Suffix != "opentsdb/api/put" {
 		return nil, fmt.Errorf("unsupported path requested: %q; expecting 'opentsdb/api/put'", p.Suffix)
 	}
-	return auth.NewToken(p.AuthToken)
+	return auth.NewTokenPossibleMultitenant(p.AuthToken)
 }

 func requestHandler(w http.ResponseWriter, r *http.Request) bool {
@@ -251,7 +252,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
 		w.WriteHeader(statusCode)
 		return true
 	}
-	if strings.HasPrefix(path, "datadog/") {
+	if strings.HasPrefix(path, "/datadog/") {
 		// Trim suffix from paths starting from /datadog/ in order to support legacy DataDog agent.
 		// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2670
 		path = strings.TrimSuffix(path, "/")
@@ -318,6 +319,29 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
 		}
 		w.WriteHeader(http.StatusOK)
 		return true
+	case "/newrelic":
+		newrelicCheckRequest.Inc()
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(202)
+		fmt.Fprintf(w, `{"status":"ok"}`)
+		return true
+	case "/newrelic/inventory/deltas":
+		newrelicInventoryRequests.Inc()
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(202)
+		fmt.Fprintf(w, `{"payload":{"version": 1, "state": {}, "reset": "false"}}`)
+		return true
+	case "/newrelic/infra/v2/metrics/events/bulk":
+		newrelicWriteRequests.Inc()
+		if err := newrelic.InsertHandlerForHTTP(nil, r); err != nil {
+			newrelicWriteErrors.Inc()
+			httpserver.Errorf(w, r, "%s", err)
+			return true
+		}
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(202)
+		fmt.Fprintf(w, `{"status":"ok"}`)
+		return true
 	case "/datadog/api/v1/series":
 		datadogWriteRequests.Inc()
 		if err := datadog.InsertHandlerForHTTP(nil, r); err != nil {
@@ -518,6 +542,29 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
 		}
 		w.WriteHeader(http.StatusOK)
 		return true
+	case "newrelic":
+		newrelicCheckRequest.Inc()
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(202)
+		fmt.Fprintf(w, `{"status":"ok"}`)
+		return true
+	case "newrelic/inventory/deltas":
+		newrelicInventoryRequests.Inc()
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(202)
+		fmt.Fprintf(w, `{"payload":{"version": 1, "state": {}, "reset": "false"}}`)
+		return true
+	case "newrelic/infra/v2/metrics/events/bulk":
+		newrelicWriteRequests.Inc()
+		if err := newrelic.InsertHandlerForHTTP(at, r); err != nil {
+			newrelicWriteErrors.Inc()
+			httpserver.Errorf(w, r, "%s", err)
+			return true
+		}
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(202)
+		fmt.Fprintf(w, `{"status":"ok"}`)
+		return true
 	case "datadog/api/v1/series":
 		datadogWriteRequests.Inc()
 		if err := datadog.InsertHandlerForHTTP(at, r); err != nil {
@@ -590,6 +637,12 @@ var (
 	opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/api/v1/push", protocol="opentelemetry"}`)
 	opentelemetryPushErrors   = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/api/v1/push", protocol="opentelemetry"}`)

+	newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
+	newrelicWriteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
+
+	newrelicInventoryRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/inventory/deltas", protocol="newrelic"}`)
+	newrelicCheckRequest      = metrics.NewCounter(`vm_http_requests_total{path="/newrelic", protocol="newrelic"}`)
+
 	promscrapeTargetsRequests          = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`)
 	promscrapeServiceDiscoveryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/service-discovery"}`)

--- a/app/vmagent/native/request_handler.go
+++ b/app/vmagent/native/request_handler.go
@@ -84,6 +84,8 @@ func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompbmarshal
 	ctx.WriteRequest.Timeseries = tssDst
 	ctx.Labels = labels
 	ctx.Samples = samples
-	remotewrite.Push(at, &ctx.WriteRequest)
+	if !remotewrite.TryPush(at, &ctx.WriteRequest) {
+		return remotewrite.ErrQueueFullHTTPRetry
+	}
 	return nil
 }
--- a/app/vmagent/newrelic/request_handler.go
+++ b/app/vmagent/newrelic/request_handler.go
@@ -0,0 +1,88 @@
+package newrelic
+
+import (
+	"net/http"
+
+	"github.com/VictoriaMetrics/metrics"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
+	parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/newrelic"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/newrelic/stream"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
+)
+
+var (
+	rowsInserted       = metrics.NewCounter(`vmagent_rows_inserted_total{type="newrelic"}`)
+	rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="newrelic"}`)
+	rowsPerInsert      = metrics.NewHistogram(`vmagent_rows_per_insert{type="newrelic"}`)
+)
+
+// InsertHandlerForHTTP processes remote write for NewRelic POST /infra/v2/metrics/events/bulk request.
+func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
+	extraLabels, err := parserCommon.GetExtraLabels(req)
+	if err != nil {
+		return err
+	}
+	ce := req.Header.Get("Content-Encoding")
+	isGzip := ce == "gzip"
+	return stream.Parse(req.Body, isGzip, func(rows []newrelic.Row) error {
+		return insertRows(at, rows, extraLabels)
+	})
+}
+
+func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompbmarshal.Label) error {
+	ctx := common.GetPushCtx()
+	defer common.PutPushCtx(ctx)
+
+	samplesCount := 0
+	tssDst := ctx.WriteRequest.Timeseries[:0]
+	labels := ctx.Labels[:0]
+	samples := ctx.Samples[:0]
+	for i := range rows {
+		r := &rows[i]
+		tags := r.Tags
+		srcSamples := r.Samples
+		for j := range srcSamples {
+			s := &srcSamples[j]
+			labelsLen := len(labels)
+			labels = append(labels, prompbmarshal.Label{
+				Name:  "__name__",
+				Value: bytesutil.ToUnsafeString(s.Name),
+			})
+			for k := range tags {
+				t := &tags[k]
+				labels = append(labels, prompbmarshal.Label{
+					Name:  bytesutil.ToUnsafeString(t.Key),
+					Value: bytesutil.ToUnsafeString(t.Value),
+				})
+			}
+			samples = append(samples, prompbmarshal.Sample{
+				Value:     s.Value,
+				Timestamp: r.Timestamp,
+			})
+			tssDst = append(tssDst, prompbmarshal.TimeSeries{
+				Labels:  labels[labelsLen:],
+				Samples: samples[len(samples)-1:],
+			})
+			labels = append(labels, extraLabels...)
+		}
+		samplesCount += len(srcSamples)
+	}
+	ctx.WriteRequest.Timeseries = tssDst
+	ctx.Labels = labels
+	ctx.Samples = samples
+	if !remotewrite.TryPush(at, &ctx.WriteRequest) {
+		return remotewrite.ErrQueueFullHTTPRetry
+	}
+	rowsInserted.Add(len(rows))
+	if at != nil {
+		rowsTenantInserted.Get(at).Add(samplesCount)
+	}
+	rowsPerInsert.Update(float64(samplesCount))
+	return nil
+}
--- a/app/vmagent/opentelemetry/request_handler.go
+++ b/app/vmagent/opentelemetry/request_handler.go
@@ -59,7 +59,9 @@ func insertRows(at *auth.Token, tss []prompbmarshal.TimeSeries, extraLabels []pr
 	ctx.WriteRequest.Timeseries = tssDst
 	ctx.Labels = labels
 	ctx.Samples = samples
-	remotewrite.Push(at, &ctx.WriteRequest)
+	if !remotewrite.TryPush(at, &ctx.WriteRequest) {
+		return remotewrite.ErrQueueFullHTTPRetry
+	}
 	rowsInserted.Add(rowsTotal)
 	if at != nil {
 		rowsTenantInserted.Get(at).Add(rowsTotal)
--- a/app/vmagent/opentsdb/request_handler.go
+++ b/app/vmagent/opentsdb/request_handler.go
@@ -56,7 +56,9 @@ func insertRows(rows []parser.Row) error {
 	ctx.WriteRequest.Timeseries = tssDst
 	ctx.Labels = labels
 	ctx.Samples = samples
-	remotewrite.Push(nil, &ctx.WriteRequest)
+	if !remotewrite.TryPush(nil, &ctx.WriteRequest) {
+		return remotewrite.ErrQueueFullHTTPRetry
+	}
 	rowsInserted.Add(len(rows))
 	rowsPerInsert.Update(float64(len(rows)))
 	return nil
--- a/app/vmagent/opentsdbhttp/request_handler.go
+++ b/app/vmagent/opentsdbhttp/request_handler.go
@@ -64,7 +64,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
 	ctx.WriteRequest.Timeseries = tssDst
 	ctx.Labels = labels
 	ctx.Samples = samples
-	remotewrite.Push(at, &ctx.WriteRequest)
+	if !remotewrite.TryPush(at, &ctx.WriteRequest) {
+		return remotewrite.ErrQueueFullHTTPRetry
+	}
 	rowsInserted.Add(len(rows))
 	rowsPerInsert.Update(float64(len(rows)))
 	return nil
--- a/app/vmagent/prometheusimport/request_handler.go
+++ b/app/vmagent/prometheusimport/request_handler.go
@@ -32,7 +32,7 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
 		return err
 	}
 	isGzipped := req.Header.Get("Content-Encoding") == "gzip"
-	return stream.Parse(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
+	return stream.Parse(req.Body, defaultTimestamp, isGzipped, true, func(rows []parser.Row) error {
 		return insertRows(at, rows, extraLabels)
 	}, func(s string) {
 		httpserver.LogError(req, s)
@@ -73,7 +73,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
 	ctx.WriteRequest.Timeseries = tssDst
 	ctx.Labels = labels
 	ctx.Samples = samples
-	remotewrite.Push(at, &ctx.WriteRequest)
+	if !remotewrite.TryPush(at, &ctx.WriteRequest) {
+		return remotewrite.ErrQueueFullHTTPRetry
+	}
 	rowsInserted.Add(len(rows))
 	if at != nil {
 		rowsTenantInserted.Get(at).Add(len(rows))
--- a/app/vmagent/promremotewrite/request_handler.go
+++ b/app/vmagent/promremotewrite/request_handler.go
@@ -69,7 +69,9 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, extraLabels []pr
 	ctx.WriteRequest.Timeseries = tssDst
 	ctx.Labels = labels
 	ctx.Samples = samples
-	remotewrite.Push(at, &ctx.WriteRequest)
+	if !remotewrite.TryPush(at, &ctx.WriteRequest) {
+		return remotewrite.ErrQueueFullHTTPRetry
+	}
 	rowsInserted.Add(rowsTotal)
 	if at != nil {
 		rowsTenantInserted.Get(at).Add(rowsTotal)
--- a/app/vmagent/remotewrite/client.go
+++ b/app/vmagent/remotewrite/client.go
@@ -2,6 +2,7 @@ package remotewrite

 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -105,12 +106,15 @@ type client struct {
 func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
 	authCfg, err := getAuthConfig(argIdx)
 	if err != nil {
-		logger.Panicf("FATAL: cannot initialize auth config for remoteWrite.url=%q: %s", remoteWriteURL, err)
+		logger.Fatalf("cannot initialize auth config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
+	}
+	tlsCfg, err := authCfg.NewTLSConfig()
+	if err != nil {
+		logger.Fatalf("cannot initialize tls config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
 	}
-	tlsCfg := authCfg.NewTLSConfig()
 	awsCfg, err := getAWSAPIConfig(argIdx)
 	if err != nil {
-		logger.Fatalf("FATAL: cannot initialize AWS Config for remoteWrite.url=%q: %s", remoteWriteURL, err)
+		logger.Fatalf("cannot initialize AWS Config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
 	}
 	tr := &http.Transport{
 		DialContext:         statDial,
@@ -301,7 +305,7 @@ func (c *client) runWorker() {
 				continue
 			}
 			// Return unsent block to the queue.
-			c.fq.MustWriteBlock(block)
+			c.fq.MustWriteBlockIgnoreDisabledPQ(block)
 			return
 		case <-c.stopCh:
 			// c must be stopped. Wait for a while in the hope the block will be sent.
@@ -310,11 +314,11 @@ func (c *client) runWorker() {
 			case ok := <-ch:
 				if !ok {
 					// Return unsent block to the queue.
-					c.fq.MustWriteBlock(block)
+					c.fq.MustWriteBlockIgnoreDisabledPQ(block)
 				}
 			case <-time.After(graceDuration):
 				// Return unsent block to the queue.
-				c.fq.MustWriteBlock(block)
+				c.fq.MustWriteBlockIgnoreDisabledPQ(block)
 			}
 			return
 		}
@@ -322,12 +326,42 @@ func (c *client) runWorker() {
 }

 func (c *client) doRequest(url string, body []byte) (*http.Response, error) {
+	req, err := c.newRequest(url, body)
+	if err != nil {
+		return nil, err
+	}
+	resp, err := c.hc.Do(req)
+	if err == nil {
+		return resp, nil
+	}
+	if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
+		return nil, err
+	}
+	// It is likely connection became stale or timed out during the first request.
+	// Make another attempt in hope request will succeed.
+	// If not, the error should be handled by the caller as usual.
+	// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
+	req, err = c.newRequest(url, body)
+	if err != nil {
+		return nil, fmt.Errorf("second attempt: %w", err)
+	}
+	resp, err = c.hc.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("second attempt: %w", err)
+	}
+	return resp, nil
+}
+
+func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
 	reqBody := bytes.NewBuffer(body)
 	req, err := http.NewRequest(http.MethodPost, url, reqBody)
 	if err != nil {
 		logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", url, err)
 	}
-	c.authCfg.SetHeaders(req, true)
+	err = c.authCfg.SetHeaders(req, true)
+	if err != nil {
+		return nil, err
+	}
 	h := req.Header
 	h.Set("User-Agent", "vmagent")
 	h.Set("Content-Type", "application/x-protobuf")
@@ -341,11 +375,10 @@ func (c *client) doRequest(url string, body []byte) (*http.Response, error) {
 	if c.awsCfg != nil {
 		sigv4Hash := awsapi.HashHex(body)
 		if err := c.awsCfg.SignRequest(req, sigv4Hash); err != nil {
-			// there is no need in retry, request will be rejected by client.Do and retried by code below
-			logger.Warnf("cannot sign remoteWrite request with AWS sigv4: %s", err)
+			return nil, fmt.Errorf("cannot sign remoteWrite request with AWS sigv4: %w", err)
 		}
 	}
-	return c.hc.Do(req)
+	return req, nil
 }

 // sendBlockHTTP sends the given block to c.remoteWriteURL.
--- a/app/vmagent/remotewrite/pendingseries.go
+++ b/app/vmagent/remotewrite/pendingseries.go
@@ -37,9 +37,9 @@ type pendingSeries struct {
 	periodicFlusherWG sync.WaitGroup
 }

-func newPendingSeries(pushBlock func(block []byte), isVMRemoteWrite bool, significantFigures, roundDigits int) *pendingSeries {
+func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite bool, significantFigures, roundDigits int) *pendingSeries {
 	var ps pendingSeries
-	ps.wr.pushBlock = pushBlock
+	ps.wr.fq = fq
 	ps.wr.isVMRemoteWrite = isVMRemoteWrite
 	ps.wr.significantFigures = significantFigures
 	ps.wr.roundDigits = roundDigits
@@ -57,10 +57,11 @@ func (ps *pendingSeries) MustStop() {
 	ps.periodicFlusherWG.Wait()
 }

-func (ps *pendingSeries) Push(tss []prompbmarshal.TimeSeries) {
+func (ps *pendingSeries) TryPush(tss []prompbmarshal.TimeSeries) bool {
 	ps.mu.Lock()
-	ps.wr.push(tss)
+	ok := ps.wr.tryPush(tss)
 	ps.mu.Unlock()
+	return ok
 }

 func (ps *pendingSeries) periodicFlusher() {
@@ -70,18 +71,20 @@ func (ps *pendingSeries) periodicFlusher() {
 	}
 	ticker := time.NewTicker(*flushInterval)
 	defer ticker.Stop()
-	mustStop := false
-	for !mustStop {
+	for {
 		select {
 		case <-ps.stopCh:
-			mustStop = true
+			ps.mu.Lock()
+			ps.wr.mustFlushOnStop()
+			ps.mu.Unlock()
+			return
 		case <-ticker.C:
 			if fasttime.UnixTimestamp()-atomic.LoadUint64(&ps.wr.lastFlushTime) < uint64(flushSeconds) {
 				continue
 			}
 		}
 		ps.mu.Lock()
-		ps.wr.flush()
+		_ = ps.wr.tryFlush()
 		ps.mu.Unlock()
 	}
 }
@@ -90,16 +93,16 @@ type writeRequest struct {
 	// Move lastFlushTime to the top of the struct in order to guarantee atomic access on 32-bit architectures.
 	lastFlushTime uint64

-	// pushBlock is called when whe write request is ready to be sent.
-	pushBlock func(block []byte)
+	// The queue to send blocks to.
+	fq *persistentqueue.FastQueue

 	// Whether to encode the write request with VictoriaMetrics remote write protocol.
 	isVMRemoteWrite bool

-	// How many significant figures must be left before sending the writeRequest to pushBlock.
+	// How many significant figures must be left before sending the writeRequest to fq.
 	significantFigures int

-	// How many decimal digits after point must be left before sending the writeRequest to pushBlock.
+	// How many decimal digits after point must be left before sending the writeRequest to fq.
 	roundDigits int

 	wr prompbmarshal.WriteRequest
@@ -112,7 +115,7 @@ type writeRequest struct {
 }

 func (wr *writeRequest) reset() {
-	// Do not reset lastFlushTime, pushBlock, isVMRemoteWrite, significantFigures and roundDigits, since they are re-used.
+	// Do not reset lastFlushTime, fq, isVMRemoteWrite, significantFigures and roundDigits, since they are re-used.

 	wr.wr.Timeseries = nil

@@ -130,23 +133,40 @@ func (wr *writeRequest) reset() {
 	wr.buf = wr.buf[:0]
 }

-func (wr *writeRequest) flush() {
+// mustFlushOnStop force pushes wr data into wr.fq
+//
+// This is needed in order to properly save in-memory data to persistent queue on graceful shutdown.
+func (wr *writeRequest) mustFlushOnStop() {
 	wr.wr.Timeseries = wr.tss
-	wr.adjustSampleValues()
-	atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
-	pushWriteRequest(&wr.wr, wr.pushBlock, wr.isVMRemoteWrite)
+	if !tryPushWriteRequest(&wr.wr, wr.mustWriteBlock, wr.isVMRemoteWrite) {
+		logger.Panicf("BUG: final flush must always return true")
+	}
 	wr.reset()
 }

-func (wr *writeRequest) adjustSampleValues() {
-	samples := wr.samples
-	if n := wr.significantFigures; n > 0 {
+func (wr *writeRequest) mustWriteBlock(block []byte) bool {
+	wr.fq.MustWriteBlockIgnoreDisabledPQ(block)
+	return true
+}
+
+func (wr *writeRequest) tryFlush() bool {
+	wr.wr.Timeseries = wr.tss
+	atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
+	if !tryPushWriteRequest(&wr.wr, wr.fq.TryWriteBlock, wr.isVMRemoteWrite) {
+		return false
+	}
+	wr.reset()
+	return true
+}
+
+func adjustSampleValues(samples []prompbmarshal.Sample, significantFigures, roundDigits int) {
+	if n := significantFigures; n > 0 {
 		for i := range samples {
 			s := &samples[i]
 			s.Value = decimal.RoundToSignificantFigures(s.Value, n)
 		}
 	}
-	if n := wr.roundDigits; n < 100 {
+	if n := roundDigits; n < 100 {
 		for i := range samples {
 			s := &samples[i]
 			s.Value = decimal.RoundToDecimalDigits(s.Value, n)
@@ -154,21 +174,27 @@ func (wr *writeRequest) adjustSampleValues() {
 	}
 }

-func (wr *writeRequest) push(src []prompbmarshal.TimeSeries) {
+func (wr *writeRequest) tryPush(src []prompbmarshal.TimeSeries) bool {
 	tssDst := wr.tss
 	maxSamplesPerBlock := *maxRowsPerBlock
 	// Allow up to 10x of labels per each block on average.
 	maxLabelsPerBlock := 10 * maxSamplesPerBlock
 	for i := range src {
-		tssDst = append(tssDst, prompbmarshal.TimeSeries{})
-		wr.copyTimeSeries(&tssDst[len(tssDst)-1], &src[i])
 		if len(wr.samples) >= maxSamplesPerBlock || len(wr.labels) >= maxLabelsPerBlock {
 			wr.tss = tssDst
-			wr.flush()
+			if !wr.tryFlush() {
+				return false
+			}
 			tssDst = wr.tss
 		}
+		tsSrc := &src[i]
+		adjustSampleValues(tsSrc.Samples, wr.significantFigures, wr.roundDigits)
+		tssDst = append(tssDst, prompbmarshal.TimeSeries{})
+		wr.copyTimeSeries(&tssDst[len(tssDst)-1], tsSrc)
 	}
+
 	wr.tss = tssDst
+	return true
 }

 func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
@@ -196,10 +222,10 @@ func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
 	wr.buf = buf
 }

-func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byte), isVMRemoteWrite bool) {
+func tryPushWriteRequest(wr *prompbmarshal.WriteRequest, tryPushBlock func(block []byte) bool, isVMRemoteWrite bool) bool {
 	if len(wr.Timeseries) == 0 {
 		// Nothing to push
-		return
+		return true
 	}
 	bb := writeRequestBufPool.Get()
 	bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
@@ -212,11 +238,13 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
 		}
 		writeRequestBufPool.Put(bb)
 		if len(zb.B) <= persistentqueue.MaxBlockSize {
-			pushBlock(zb.B)
+			if !tryPushBlock(zb.B) {
+				return false
+			}
 			blockSizeRows.Update(float64(len(wr.Timeseries)))
 			blockSizeBytes.Update(float64(len(zb.B)))
 			snappyBufPool.Put(zb)
-			return
+			return true
 		}
 		snappyBufPool.Put(zb)
 	} else {
@@ -229,23 +257,36 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
 		samples := wr.Timeseries[0].Samples
 		if len(samples) == 1 {
 			logger.Warnf("dropping a sample for metric with too long labels exceeding -remoteWrite.maxBlockSize=%d bytes", maxUnpackedBlockSize.N)
-			return
+			return true
 		}
 		n := len(samples) / 2
 		wr.Timeseries[0].Samples = samples[:n]
-		pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
+		if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
+			wr.Timeseries[0].Samples = samples
+			return false
+		}
 		wr.Timeseries[0].Samples = samples[n:]
-		pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
+		if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
+			wr.Timeseries[0].Samples = samples
+			return false
+		}
 		wr.Timeseries[0].Samples = samples
-		return
+		return true
 	}
 	timeseries := wr.Timeseries
 	n := len(timeseries) / 2
 	wr.Timeseries = timeseries[:n]
-	pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
+	if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
+		wr.Timeseries = timeseries
+		return false
+	}
 	wr.Timeseries = timeseries[n:]
-	pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
+	if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
+		wr.Timeseries = timeseries
+		return false
+	}
 	wr.Timeseries = timeseries
+	return true
 }

 var (
--- a/app/vmagent/remotewrite/pendingseries_test.go
+++ b/app/vmagent/remotewrite/pendingseries_test.go
@@ -26,13 +26,16 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
 		t.Helper()
 		wr := newTestWriteRequest(rowsCount, 20)
 		pushBlockLen := 0
-		pushBlock := func(block []byte) {
+		pushBlock := func(block []byte) bool {
 			if pushBlockLen > 0 {
 				panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
 			}
 			pushBlockLen = len(block)
+			return true
+		}
+		if !tryPushWriteRequest(wr, pushBlock, isVMRemoteWrite) {
+			t.Fatalf("cannot push data to to remote storage")
 		}
-		pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
 		if math.Abs(float64(pushBlockLen-expectedBlockLen)/float64(expectedBlockLen)*100) > tolerancePrc {
 			t.Fatalf("unexpected block len for rowsCount=%d, isVMRemoteWrite=%v; got %d bytes; expecting %d bytes +- %.0f%%",
 				rowsCount, isVMRemoteWrite, pushBlockLen, expectedBlockLen, tolerancePrc)
--- a/app/vmagent/remotewrite/relabel.go
+++ b/app/vmagent/remotewrite/relabel.go
@@ -3,6 +3,7 @@ package remotewrite
 import (
 	"flag"
 	"fmt"
+	"strconv"
 	"strings"
 	"sync"

@@ -92,6 +93,7 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, pcs *pro
 		// Nothing to change.
 		return tss
 	}
+	rctx.reset()
 	tssDst := tss[:0]
 	labels := rctx.labels[:0]
 	for i := range tss {
@@ -120,6 +122,7 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
 	if len(extraLabels) == 0 {
 		return
 	}
+	rctx.reset()
 	labels := rctx.labels[:0]
 	for i := range tss {
 		ts := &tss[i]
@@ -127,9 +130,6 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
 		labels = append(labels, ts.Labels...)
 		for j := range extraLabels {
 			extraLabel := extraLabels[j]
-			if *usePromCompatibleNaming {
-				extraLabel.Name = promrelabel.SanitizeLabelName(extraLabel.Name)
-			}
 			tmp := promrelabel.GetLabelByName(labels[labelsLen:], extraLabel.Name)
 			if tmp != nil {
 				tmp.Value = extraLabel.Value
@@ -142,6 +142,34 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
 	rctx.labels = labels
 }

+func (rctx *relabelCtx) tenantToLabels(tss []prompbmarshal.TimeSeries, accountID, projectID uint32) {
+	rctx.reset()
+	accountIDStr := strconv.FormatUint(uint64(accountID), 10)
+	projectIDStr := strconv.FormatUint(uint64(projectID), 10)
+	labels := rctx.labels[:0]
+	for i := range tss {
+		ts := &tss[i]
+		labelsLen := len(labels)
+		for _, label := range ts.Labels {
+			labelName := label.Name
+			if labelName == "vm_account_id" || labelName == "vm_project_id" {
+				continue
+			}
+			labels = append(labels, label)
+		}
+		labels = append(labels, prompbmarshal.Label{
+			Name:  "vm_account_id",
+			Value: accountIDStr,
+		})
+		labels = append(labels, prompbmarshal.Label{
+			Name:  "vm_project_id",
+			Value: projectIDStr,
+		})
+		ts.Labels = labels[labelsLen:]
+	}
+	rctx.labels = labels
+}
+
 type relabelCtx struct {
 	// pool for labels, which are used during the relabeling.
 	labels []prompbmarshal.Label
@@ -163,7 +191,7 @@ func getRelabelCtx() *relabelCtx {
 }

 func putRelabelCtx(rctx *relabelCtx) {
-	rctx.labels = rctx.labels[:0]
+	rctx.reset()
 	relabelCtxPool.Put(rctx)
 }

--- a/app/vmagent/remotewrite/relabel_test.go
+++ b/app/vmagent/remotewrite/relabel_test.go
@@ -40,6 +40,7 @@ func TestApplyRelabeling(t *testing.T) {

 func TestAppendExtraLabels(t *testing.T) {
 	f := func(extraLabels []prompbmarshal.Label, sTss, sExpTss string) {
+		t.Helper()
 		rctx := &relabelCtx{}
 		tss, expTss := parseSeries(sTss), parseSeries(sExpTss)
 		rctx.appendExtraLabels(tss, extraLabels)
@@ -55,7 +56,7 @@ func TestAppendExtraLabels(t *testing.T) {

 	oldVal := *usePromCompatibleNaming
 	*usePromCompatibleNaming = true
-	f([]prompbmarshal.Label{{Name: "foo.bar", Value: "baz"}}, "up", `up{foo_bar="baz"}`)
+	f([]prompbmarshal.Label{{Name: "foo.bar", Value: "baz"}}, "up", `up{foo.bar="baz"}`)
 	*usePromCompatibleNaming = oldVal
 }

--- a/app/vmagent/remotewrite/remotewrite.go
+++ b/app/vmagent/remotewrite/remotewrite.go
@@ -3,6 +3,7 @@ package remotewrite
 import (
 	"flag"
 	"fmt"
+	"net/http"
 	"net/url"
 	"path/filepath"
 	"strconv"
@@ -10,6 +11,8 @@ import (
 	"sync/atomic"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -23,6 +26,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
 	"github.com/VictoriaMetrics/metrics"
@@ -33,15 +37,22 @@ var (
 	remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support either VictoriaMetrics remote write protocol "+
 		"or Prometheus remote_write protocol. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
 		"Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems. "+
-		"The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set. "+
-		"See also -remoteWrite.multitenantURL")
+		"The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set")
 	remoteWriteMultitenantURLs = flagutil.NewArrayString("remoteWrite.multitenantURL", "Base path for multitenant remote storage URL to write data to. "+
 		"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://<vminsert>:8480 . "+
-		"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
+		"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. "+
+		"This flag is deprecated in favor of -enableMultitenantHandlers . See https://docs.victoriametrics.com/vmagent.html#multitenancy")
+	enableMultitenantHandlers = flag.Bool("enableMultitenantHandlers", false, "Whether to process incoming data via multitenant insert handlers according to "+
+		"https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format . By default incoming data is processed via single-node insert handlers "+
+		"according to https://docs.victoriametrics.com/#how-to-import-time-series-data ."+
+		"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details")
 	shardByURL = flag.Bool("remoteWrite.shardByURL", false, "Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . "+
 		"By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
-	tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
-		"See also -remoteWrite.maxDiskUsagePerURL")
+	shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+
+		"among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+
+		"even distribution of series over the specified -remoteWrite.url systems")
+	tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+
+		"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
 	keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
 		"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
 	queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
@@ -80,6 +91,11 @@ var (
 		"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/stream-aggregation.html")
 	streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before being aggregated. "+
 		"Only the last sample per each time series per each interval is aggregated if the interval is greater than zero")
+	disableOnDiskQueue = flag.Bool("remoteWrite.disableOnDiskQueue", false, "Whether to disable storing pending data to -remoteWrite.tmpDataPath "+
+		"when the configured remote storage systems cannot keep up with the data ingestion rate. See https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence ."+
+		"See also -remoteWrite.dropSamplesOnOverload")
+	dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+
+		"cannot be pushed into the configured remote storage systems in a timely manner. See https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence")
 )

 var (
@@ -92,11 +108,19 @@ var (

 	// Data without tenant id is written to defaultAuthToken if -remoteWrite.multitenantURL is specified.
 	defaultAuthToken = &auth.Token{}
+
+	// ErrQueueFullHTTPRetry must be returned when TryPush() returns false.
+	ErrQueueFullHTTPRetry = &httpserver.ErrorWithStatusCode{
+		Err: fmt.Errorf("remote storage systems cannot keep up with the data ingestion rate; retry the request later " +
+			"or remove -remoteWrite.disableOnDiskQueue from vmagent command-line flags, so it could save pending data to -remoteWrite.tmpDataPath; " +
+			"see https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence"),
+		StatusCode: http.StatusTooManyRequests,
+	}
 )

-// MultitenancyEnabled returns true if -remoteWrite.multitenantURL is specified.
+// MultitenancyEnabled returns true if -enableMultitenantHandlers or -remoteWrite.multitenantURL is specified.
 func MultitenancyEnabled() bool {
-	return len(*remoteWriteMultitenantURLs) > 0
+	return *enableMultitenantHandlers || len(*remoteWriteMultitenantURLs) > 0
 }

 // Contains the current relabelConfigs.
@@ -116,6 +140,8 @@ func InitSecretFlags() {
 	}
 }

+var shardByURLLabelsMap map[string]struct{}
+
 // Init initializes remotewrite.
 //
 // It must be called after flag.Parse().
@@ -152,6 +178,13 @@ func Init() {
 	if *queues <= 0 {
 		*queues = 1
 	}
+	if len(*shardByURLLabels) > 0 {
+		m := make(map[string]struct{}, len(*shardByURLLabels))
+		for _, label := range *shardByURLLabels {
+			m[label] = struct{}{}
+		}
+		shardByURLLabelsMap = m
+	}
 	initLabelsGlobal()

 	// Register SIGHUP handler for config reload before loadRelabelConfigs.
@@ -170,6 +203,7 @@ func Init() {
 	if len(*remoteWriteURLs) > 0 {
 		rwctxsDefault = newRemoteWriteCtxs(nil, *remoteWriteURLs)
 	}
+	dropDanglingQueues()

 	// Start config reloader.
 	configReloaderWG.Add(1)
@@ -187,6 +221,42 @@ func Init() {
 	}()
 }

+func dropDanglingQueues() {
+	if *keepDanglingQueues {
+		return
+	}
+	if len(*remoteWriteMultitenantURLs) > 0 {
+		// Do not drop dangling queues for *remoteWriteMultitenantURLs, since it is impossible to determine
+		// unused queues for multitenant urls - they are created on demand when new sample for the given
+		// tenant is pushed to remote storage.
+		return
+	}
+	// Remove dangling persistent queues, if any.
+	// This is required for the case when the number of queues has been changed or URL have been changed.
+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4014
+	//
+	existingQueues := make(map[string]struct{}, len(rwctxsDefault))
+	for _, rwctx := range rwctxsDefault {
+		existingQueues[rwctx.fq.Dirname()] = struct{}{}
+	}
+
+	queuesDir := filepath.Join(*tmpDataPath, persistentQueueDirname)
+	files := fs.MustReadDir(queuesDir)
+	removed := 0
+	for _, f := range files {
+		dirname := f.Name()
+		if _, ok := existingQueues[dirname]; !ok {
+			logger.Infof("removing dangling queue %q", dirname)
+			fullPath := filepath.Join(queuesDir, dirname)
+			fs.MustRemoveAll(fullPath)
+			removed++
+		}
+	}
+	if removed > 0 {
+		logger.Infof("removed %d dangling queues from %q, active queues: %d", removed, *tmpDataPath, len(rwctxsDefault))
+	}
+}
+
 func reloadRelabelConfigs() {
 	relabelConfigReloads.Inc()
 	logger.Infof("reloading relabel configs pointed by -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig")
@@ -258,35 +328,8 @@ func newRemoteWriteCtxs(at *auth.Token, urls []string) []*remoteWriteCtx {
 		if *showRemoteWriteURL {
 			sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
 		}
-		rwctxs[i] = newRemoteWriteCtx(i, at, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
+		rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
 	}
-
-	if !*keepDanglingQueues {
-		// Remove dangling queues, if any.
-		// This is required for the case when the number of queues has been changed or URL have been changed.
-		// See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4014
-		existingQueues := make(map[string]struct{}, len(rwctxs))
-		for _, rwctx := range rwctxs {
-			existingQueues[rwctx.fq.Dirname()] = struct{}{}
-		}
-
-		queuesDir := filepath.Join(*tmpDataPath, persistentQueueDirname)
-		files := fs.MustReadDir(queuesDir)
-		removed := 0
-		for _, f := range files {
-			dirname := f.Name()
-			if _, ok := existingQueues[dirname]; !ok {
-				logger.Infof("removing dangling queue %q", dirname)
-				fullPath := filepath.Join(queuesDir, dirname)
-				fs.MustRemoveAll(fullPath)
-				removed++
-			}
-		}
-		if removed > 0 {
-			logger.Infof("removed %d dangling queues from %q, active queues: %d", removed, *tmpDataPath, len(rwctxs))
-		}
-	}
-
 	return rwctxs
 }

@@ -295,7 +338,7 @@ var configReloaderWG sync.WaitGroup

 // Stop stops remotewrite.
 //
-// It is expected that nobody calls Push during and after the call to this func.
+// It is expected that nobody calls TryPush during and after the call to this func.
 func Stop() {
 	close(configReloaderStopCh)
 	configReloaderWG.Wait()
@@ -305,7 +348,7 @@ func Stop() {
 	}
 	rwctxsDefault = nil

-	// There is no need in locking rwctxsMapLock here, since nobody should call Push during the Stop call.
+	// There is no need in locking rwctxsMapLock here, since nobody should call TryPush during the Stop call.
 	for _, rwctxs := range rwctxsMap {
 		for _, rwctx := range rwctxs {
 			rwctx.MustStop()
@@ -321,24 +364,47 @@ func Stop() {
 	}
 }

-// Push sends wr to remote storage systems set via `-remoteWrite.url`.
+// PushDropSamplesOnFailure pushes wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
 //
-// If at is nil, then the data is pushed to the configured `-remoteWrite.url`.
-// If at isn't nil, the data is pushed to the configured `-remoteWrite.multitenantURL`.
+// If at is nil, then the data is pushed to the configured -remoteWrite.url.
+// If at isn't nil, the data is pushed to the configured -remoteWrite.multitenantURL.
 //
-// Note that wr may be modified by Push because of relabeling and rounding.
-func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
-	if at == nil && len(*remoteWriteMultitenantURLs) > 0 {
-		// Write data to default tenant if at isn't set while -remoteWrite.multitenantURL is set.
+// PushDropSamplesOnFailure can modify wr contents.
+func PushDropSamplesOnFailure(at *auth.Token, wr *prompbmarshal.WriteRequest) {
+	_ = tryPush(at, wr, true)
+}
+
+// TryPush tries sending wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
+//
+// If at is nil, then the data is pushed to the configured -remoteWrite.url.
+// If at isn't nil, the data is pushed to the configured -remoteWrite.multitenantURL.
+//
+// TryPush can modify wr contents, so the caller must re-initialize wr before calling TryPush() after unsuccessful attempt.
+// TryPush may send partial data from wr on unsuccessful attempt, so repeated call for the same wr may send the data multiple times.
+//
+// The caller must return ErrQueueFullHTTPRetry to the client, which sends wr, if TryPush returns false.
+func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool {
+	return tryPush(at, wr, *dropSamplesOnOverload)
+}
+
+func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailure bool) bool {
+	tss := wr.Timeseries
+
+	if at == nil && MultitenancyEnabled() {
+		// Write data to default tenant if at isn't set when multitenancy is enabled.
 		at = defaultAuthToken
 	}
+
+	var tenantRctx *relabelCtx
 	var rwctxs []*remoteWriteCtx
 	if at == nil {
 		rwctxs = rwctxsDefault
+	} else if len(*remoteWriteMultitenantURLs) == 0 {
+		// Convert at to (vm_account_id, vm_project_id) labels.
+		tenantRctx = getRelabelCtx()
+		defer putRelabelCtx(tenantRctx)
+		rwctxs = rwctxsDefault
 	} else {
-		if len(*remoteWriteMultitenantURLs) == 0 {
-			logger.Panicf("BUG: -remoteWrite.multitenantURL command-line flag must be set when __tenant_id__=%q label is set", at)
-		}
 		rwctxsMapLock.Lock()
 		tenantID := tenantmetrics.TenantID{
 			AccountID: at.AccountID,
@@ -352,18 +418,37 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
 		rwctxsMapLock.Unlock()
 	}

+	rowsCount := getRowsCount(tss)
+
+	if *disableOnDiskQueue {
+		// Quick check whether writes to configured remote storage systems are blocked.
+		// This allows saving CPU time spent on relabeling and block compression
+		// if some of remote storage systems cannot keep up with the data ingestion rate.
+		for _, rwctx := range rwctxs {
+			if rwctx.fq.IsWriteBlocked() {
+				pushFailures.Inc()
+				if dropSamplesOnFailure {
+					// Just drop samples
+					samplesDropped.Add(rowsCount)
+					return true
+				}
+				return false
+			}
+		}
+	}
+
 	var rctx *relabelCtx
 	rcs := allRelabelConfigs.Load()
 	pcsGlobal := rcs.global
 	if pcsGlobal.Len() > 0 {
 		rctx = getRelabelCtx()
+		defer putRelabelCtx(rctx)
 	}
-	tss := wr.Timeseries
-	rowsCount := getRowsCount(tss)
 	globalRowsPushedBeforeRelabel.Add(rowsCount)
 	maxSamplesPerBlock := *maxRowsPerBlock
 	// Allow up to 10x of labels per each block on average.
 	maxLabelsPerBlock := 10 * maxSamplesPerBlock
+
 	for len(tss) > 0 {
 		// Process big tss in smaller blocks in order to reduce the maximum memory usage
 		samplesCount := 0
@@ -371,7 +456,7 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
 		i := 0
 		for i < len(tss) {
 			samplesCount += len(tss[i].Samples)
-			labelsCount += len(tss[i].Labels)
+			labelsCount += len(tss[i].Samples) * len(tss[i].Labels)
 			i++
 			if samplesCount >= maxSamplesPerBlock || labelsCount >= maxLabelsPerBlock {
 				break
@@ -384,6 +469,9 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
 		} else {
 			tss = nil
 		}
+		if tenantRctx != nil {
+			tenantRctx.tenantToLabels(tssBlock, at.AccountID, at.ProjectID)
+		}
 		if rctx != nil {
 			rowsCountBeforeRelabel := getRowsCount(tssBlock)
 			tssBlock = rctx.applyRelabeling(tssBlock, pcsGlobal)
@@ -392,25 +480,35 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
 		}
 		sortLabelsIfNeeded(tssBlock)
 		tssBlock = limitSeriesCardinality(tssBlock)
-		pushBlockToRemoteStorages(rwctxs, tssBlock)
-		if rctx != nil {
-			rctx.reset()
+		if !tryPushBlockToRemoteStorages(rwctxs, tssBlock) {
+			if !*disableOnDiskQueue {
+				logger.Panicf("BUG: tryPushBlockToRemoteStorages must return true if -remoteWrite.disableOnDiskQueue isn't set")
+			}
+			pushFailures.Inc()
+			if dropSamplesOnFailure {
+				samplesDropped.Add(rowsCount)
+				return true
+			}
+			return false
 		}
 	}
-	if rctx != nil {
-		putRelabelCtx(rctx)
-	}
+	return true
 }

-func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarshal.TimeSeries) {
+var (
+	samplesDropped = metrics.NewCounter(`vmagent_remotewrite_samples_dropped_total`)
+	pushFailures   = metrics.NewCounter(`vmagent_remotewrite_push_failures_total`)
+)
+
+func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarshal.TimeSeries) bool {
 	if len(tssBlock) == 0 {
 		// Nothing to push
-		return
+		return true
 	}
+
 	if len(rwctxs) == 1 {
 		// Fast path - just push data to the configured single remote storage
-		rwctxs[0].Push(tssBlock)
-		return
+		return rwctxs[0].TryPush(tssBlock)
 	}

 	// We need to push tssBlock to multiple remote storages.
@@ -418,15 +516,28 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
 	if *shardByURL {
 		// Shard the data among rwctxs
 		tssByURL := make([][]prompbmarshal.TimeSeries, len(rwctxs))
+		tmpLabels := promutils.GetLabels()
 		for _, ts := range tssBlock {
-			h := getLabelsHash(ts.Labels)
+			hashLabels := ts.Labels
+			if len(shardByURLLabelsMap) > 0 {
+				hashLabels = tmpLabels.Labels[:0]
+				for _, label := range ts.Labels {
+					if _, ok := shardByURLLabelsMap[label.Name]; ok {
+						hashLabels = append(hashLabels, label)
+					}
+				}
+			}
+			h := getLabelsHash(hashLabels)
 			idx := h % uint64(len(tssByURL))
 			tssByURL[idx] = append(tssByURL[idx], ts)
 		}
+		promutils.PutLabels(tmpLabels)
+
 		// Push sharded data to remote storages in parallel in order to reduce
 		// the time needed for sending the data to multiple remote storage systems.
 		var wg sync.WaitGroup
 		wg.Add(len(rwctxs))
+		var anyPushFailed uint64
 		for i, rwctx := range rwctxs {
 			tssShard := tssByURL[i]
 			if len(tssShard) == 0 {
@@ -434,11 +545,13 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
 			}
 			go func(rwctx *remoteWriteCtx, tss []prompbmarshal.TimeSeries) {
 				defer wg.Done()
-				rwctx.Push(tss)
+				if !rwctx.TryPush(tss) {
+					atomic.StoreUint64(&anyPushFailed, 1)
+				}
 			}(rwctx, tssShard)
 		}
 		wg.Wait()
-		return
+		return atomic.LoadUint64(&anyPushFailed) == 0
 	}

 	// Replicate data among rwctxs.
@@ -446,13 +559,17 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
 	// the time needed for sending the data to multiple remote storage systems.
 	var wg sync.WaitGroup
 	wg.Add(len(rwctxs))
+	var anyPushFailed uint64
 	for _, rwctx := range rwctxs {
 		go func(rwctx *remoteWriteCtx) {
 			defer wg.Done()
-			rwctx.Push(tssBlock)
+			if !rwctx.TryPush(tssBlock) {
+				atomic.StoreUint64(&anyPushFailed, 1)
+			}
 		}(rwctx)
 	}
 	wg.Wait()
+	return atomic.LoadUint64(&anyPushFailed) == 0
 }

 // sortLabelsIfNeeded sorts labels if -sortLabels command-line flag is set.
@@ -559,7 +676,7 @@ type remoteWriteCtx struct {
 	rowsDroppedByRelabel   *metrics.Counter
 }

-func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
+func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
 	// strip query params, otherwise changing params resets pq
 	pqURL := *remoteWriteURL
 	pqURL.RawQuery = ""
@@ -572,13 +689,19 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
 		logger.Warnf("rounding the -remoteWrite.maxDiskUsagePerURL=%d to the minimum supported value: %d", maxPendingBytes, persistentqueue.DefaultChunkFileSize)
 		maxPendingBytes = persistentqueue.DefaultChunkFileSize
 	}
-	fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes)
+	fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, *disableOnDiskQueue)
 	_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
 		return float64(fq.GetPendingBytes())
 	})
 	_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
 		return float64(fq.GetInmemoryQueueLen())
 	})
+	_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queue_blocked{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
+		if fq.IsWriteBlocked() {
+			return 1
+		}
+		return 0
+	})

 	var c *client
 	switch remoteWriteURL.Scheme {
@@ -600,7 +723,7 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
 	}
 	pss := make([]*pendingSeries, pssLen)
 	for i := range pss {
-		pss[i] = newPendingSeries(fq.MustWriteBlock, c.useVMProto, sf, rd)
+		pss[i] = newPendingSeries(fq, c.useVMProto, sf, rd)
 	}

 	rwctx := &remoteWriteCtx{
@@ -617,7 +740,7 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
 	sasFile := streamAggrConfig.GetOptionalArg(argIdx)
 	if sasFile != "" {
 		dedupInterval := streamAggrDedupInterval.GetOptionalArg(argIdx)
-		sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
+		sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternalTrackDropped, dedupInterval)
 		if err != nil {
 			logger.Fatalf("cannot initialize stream aggregators from -remoteWrite.streamAggr.config=%q: %s", sasFile, err)
 		}
@@ -653,7 +776,7 @@ func (rwctx *remoteWriteCtx) MustStop() {
 	rwctx.rowsDroppedByRelabel = nil
 }

-func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
+func (rwctx *remoteWriteCtx) TryPush(tss []prompbmarshal.TimeSeries) bool {
 	// Apply relabeling
 	var rctx *relabelCtx
 	var v *[]prompbmarshal.TimeSeries
@@ -691,7 +814,9 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
 		}
 		matchIdxsPool.Put(matchIdxs)
 	}
-	rwctx.pushInternal(tss)
+
+	// Try pushing the data to remote storage
+	ok := rwctx.tryPushInternal(tss)

 	// Return back relabeling contexts to the pool
 	if rctx != nil {
@@ -699,6 +824,8 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
 		tssPool.Put(v)
 		putRelabelCtx(rctx)
 	}
+
+	return ok
 }

 var matchIdxsPool bytesutil.ByteBufferPool
@@ -718,36 +845,64 @@ func dropAggregatedSeries(src []prompbmarshal.TimeSeries, matchIdxs []byte, drop
 	return dst
 }

-func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
+func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompbmarshal.TimeSeries) {
+	if rwctx.tryPushInternal(tss) {
+		return
+	}
+	if !*disableOnDiskQueue {
+		logger.Panicf("BUG: tryPushInternal must return true if -remoteWrite.disableOnDiskQueue isn't set")
+	}
+	pushFailures.Inc()
+	if *dropSamplesOnOverload {
+		rowsCount := getRowsCount(tss)
+		samplesDropped.Add(rowsCount)
+	}
+}
+
+func (rwctx *remoteWriteCtx) tryPushInternal(tss []prompbmarshal.TimeSeries) bool {
+	var rctx *relabelCtx
+	var v *[]prompbmarshal.TimeSeries
 	if len(labelsGlobal) > 0 {
-		rctx := getRelabelCtx()
-		defer putRelabelCtx(rctx)
+		// Make a copy of tss before adding extra labels in order to prevent
+		// from affecting time series for other remoteWrite.url configs.
+		rctx = getRelabelCtx()
+		v = tssPool.Get().(*[]prompbmarshal.TimeSeries)
+		tss = append(*v, tss...)
 		rctx.appendExtraLabels(tss, labelsGlobal)
 	}

 	pss := rwctx.pss
 	idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
-	pss[idx].Push(tss)
+
+	ok := pss[idx].TryPush(tss)
+
+	if rctx != nil {
+		*v = prompbmarshal.ResetTimeSeries(tss)
+		tssPool.Put(v)
+		putRelabelCtx(rctx)
+	}
+
+	return ok
 }

 func (rwctx *remoteWriteCtx) reinitStreamAggr() {
-	sas := rwctx.sas.Load()
-	if sas == nil {
+	sasFile := streamAggrConfig.GetOptionalArg(rwctx.idx)
+	if sasFile == "" {
 		// There is no stream aggregation for rwctx
 		return
 	}

-	sasFile := streamAggrConfig.GetOptionalArg(rwctx.idx)
 	logger.Infof("reloading stream aggregation configs pointed by -remoteWrite.streamAggr.config=%q", sasFile)
 	metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_total{path=%q}`, sasFile)).Inc()
 	dedupInterval := streamAggrDedupInterval.GetOptionalArg(rwctx.idx)
-	sasNew, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
+	sasNew, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternalTrackDropped, dedupInterval)
 	if err != nil {
 		metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_errors_total{path=%q}`, sasFile)).Inc()
 		metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, sasFile)).Set(0)
 		logger.Errorf("cannot reload stream aggregation config from -remoteWrite.streamAggr.config=%q; continue using the previously loaded config; error: %s", sasFile, err)
 		return
 	}
+	sas := rwctx.sas.Load()
 	if !sasNew.Equal(sas) {
 		sasOld := rwctx.sas.Swap(sasNew)
 		sasOld.MustStop()
--- a/app/vmagent/remotewrite/statconn.go
+++ b/app/vmagent/remotewrite/statconn.go
@@ -27,7 +27,7 @@ var (
 	stdDialerOnce sync.Once
 )

-func statDial(ctx context.Context, networkUnused, addr string) (conn net.Conn, err error) {
+func statDial(ctx context.Context, _, addr string) (conn net.Conn, err error) {
 	network := netutil.GetTCPNetwork()
 	d := getStdDialer()
 	conn, err = d.DialContext(ctx, network, addr)
--- a/app/vmagent/vmimport/request_handler.go
+++ b/app/vmagent/vmimport/request_handler.go
@@ -76,7 +76,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
 	ctx.WriteRequest.Timeseries = tssDst
 	ctx.Labels = labels
 	ctx.Samples = samples
-	remotewrite.Push(at, &ctx.WriteRequest)
+	if !remotewrite.TryPush(at, &ctx.WriteRequest) {
+		return remotewrite.ErrQueueFullHTTPRetry
+	}
 	rowsInserted.Add(rowsTotal)
 	if at != nil {
 		rowsTenantInserted.Get(at).Add(rowsTotal)
--- a/app/vmalert-tool/README.md
+++ b/app/vmalert-tool/README.md
@@ -0,0 +1,3 @@
+See vmalert-tool docs [here](https://docs.victoriametrics.com/vmalert-tool.html).
+
+vmalert-tool docs can be edited at [docs/vmalert-tool.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmalert-tool.md).
--- a/app/vmalert-tool/multiarch/Dockerfile
+++ b/app/vmalert-tool/multiarch/Dockerfile
@@ -0,0 +1,12 @@
+# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
+ARG certs_image
+ARG root_image
+FROM $certs_image as certs
+RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
+
+FROM $root_image
+COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+EXPOSE 8429
+ENTRYPOINT ["/vmalert-tool-prod"]
+ARG TARGETARCH
+COPY vmalert-tool-linux-${TARGETARCH}-prod ./vmalert-tool-prod
--- a/app/vmalert/README.md
+++ b/app/vmalert/README.md
--- a/app/vmalert/config/config.go
+++ b/app/vmalert/config/config.go
@@ -19,10 +19,14 @@ import (
 // Group contains list of Rules grouped into
 // entity with one name and evaluation interval
 type Group struct {
-	Type        Type `yaml:"type,omitempty"`
-	File        string
-	Name        string              `yaml:"name"`
-	Interval    *promutils.Duration `yaml:"interval,omitempty"`
+	Type       Type `yaml:"type,omitempty"`
+	File       string
+	Name       string              `yaml:"name"`
+	Interval   *promutils.Duration `yaml:"interval,omitempty"`
+	EvalOffset *promutils.Duration `yaml:"eval_offset,omitempty"`
+	// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
+	// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
+	EvalDelay   *promutils.Duration `yaml:"eval_delay,omitempty"`
 	Limit       int                 `yaml:"limit,omitempty"`
 	Rules       []Rule              `yaml:"rules"`
 	Concurrency int                 `yaml:"concurrency"`
@@ -38,6 +42,8 @@ type Group struct {
 	Headers []Header `yaml:"headers,omitempty"`
 	// NotifierHeaders contains optional HTTP headers sent to notifiers for generated notifications
 	NotifierHeaders []Header `yaml:"notifier_headers,omitempty"`
+	// EvalAlignment will make the timestamp of group query requests be aligned with interval
+	EvalAlignment *bool `yaml:"eval_alignment,omitempty"`
 	// Catches all undefined fields and must be empty after parsing.
 	XXX map[string]interface{} `yaml:",inline"`
 }
@@ -63,11 +69,27 @@ func (g *Group) UnmarshalYAML(unmarshal func(interface{}) error) error {
 	return nil
 }

-// Validate check for internal Group or Rule configuration errors
+// Validate checks configuration errors for group and internal rules
 func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool) error {
 	if g.Name == "" {
 		return fmt.Errorf("group name must be set")
 	}
+	if g.Interval.Duration() < 0 {
+		return fmt.Errorf("interval shouldn't be lower than 0")
+	}
+	if g.EvalOffset.Duration() < 0 {
+		return fmt.Errorf("eval_offset shouldn't be lower than 0")
+	}
+	// if `eval_offset` is set, interval won't use global evaluationInterval flag and must bigger than offset.
+	if g.EvalOffset.Duration() > g.Interval.Duration() {
+		return fmt.Errorf("eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
+	}
+	if g.Limit < 0 {
+		return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
+	}
+	if g.Concurrency < 0 {
+		return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)
+	}

 	uniqueRules := map[uint64]struct{}{}
 	for _, r := range g.Rules {
@@ -76,26 +98,26 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
 			ruleName = r.Alert
 		}
 		if _, ok := uniqueRules[r.ID]; ok {
-			return fmt.Errorf("%q is a duplicate within the group %q", r.String(), g.Name)
+			return fmt.Errorf("%q is a duplicate in group", r.String())
 		}
 		uniqueRules[r.ID] = struct{}{}
 		if err := r.Validate(); err != nil {
-			return fmt.Errorf("invalid rule %q.%q: %w", g.Name, ruleName, err)
+			return fmt.Errorf("invalid rule %q: %w", ruleName, err)
 		}
 		if validateExpressions {
 			// its needed only for tests.
 			// because correct types must be inherited after unmarshalling.
 			exprValidator := g.Type.ValidateExpr
 			if err := exprValidator(r.Expr); err != nil {
-				return fmt.Errorf("invalid expression for rule %q.%q: %w", g.Name, ruleName, err)
+				return fmt.Errorf("invalid expression for rule  %q: %w", ruleName, err)
 			}
 		}
 		if validateTplFn != nil {
 			if err := validateTplFn(r.Annotations); err != nil {
-				return fmt.Errorf("invalid annotations for rule %q.%q: %w", g.Name, ruleName, err)
+				return fmt.Errorf("invalid annotations for rule  %q: %w", ruleName, err)
 			}
 			if err := validateTplFn(r.Labels); err != nil {
-				return fmt.Errorf("invalid labels for rule %q.%q: %w", g.Name, ruleName, err)
+				return fmt.Errorf("invalid labels for rule  %q: %w", ruleName, err)
 			}
 		}
 	}
@@ -214,7 +236,7 @@ func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExp

 	files, err := readFromFS(pathPatterns)
 	if err != nil {
-		return nil, fmt.Errorf("failed to read from the config: %s", err)
+		return nil, fmt.Errorf("failed to read from the config: %w", err)
 	}
 	return parse(files, validateTplFn, validateExpressions)
 }
@@ -223,11 +245,11 @@ func ParseSilent(pathPatterns []string, validateTplFn ValidateTplFn, validateExp
 func Parse(pathPatterns []string, validateTplFn ValidateTplFn, validateExpressions bool) ([]Group, error) {
 	files, err := readFromFS(pathPatterns)
 	if err != nil {
-		return nil, fmt.Errorf("failed to read from the config: %s", err)
+		return nil, fmt.Errorf("failed to read from the config: %w", err)
 	}
 	groups, err := parse(files, validateTplFn, validateExpressions)
 	if err != nil {
-		return nil, fmt.Errorf("failed to parse %s: %s", pathPatterns, err)
+		return nil, fmt.Errorf("failed to parse %s: %w", pathPatterns, err)
 	}
 	if len(groups) < 1 {
 		cLogger.Warnf("no groups found in %s", strings.Join(pathPatterns, ";"))
--- a/app/vmalert/config/config_test.go
+++ b/app/vmalert/config/config_test.go
@@ -68,6 +68,10 @@ func TestParseBad(t *testing.T) {
 		path   []string
 		expErr string
 	}{
+		{
+			[]string{"testdata/rules/rules_interval_bad.rules"},
+			"eval_offset should be smaller than interval",
+		},
 		{
 			[]string{"testdata/rules/rules0-bad.rules"},
 			"unexpected token",
@@ -102,7 +106,7 @@ func TestParseBad(t *testing.T) {
 		},
 		{
 			[]string{"http://unreachable-url"},
-			"failed to read",
+			"failed to",
 		},
 	}
 	for _, tc := range testCases {
@@ -141,6 +145,35 @@ func TestGroup_Validate(t *testing.T) {
 			group:  &Group{},
 			expErr: "group name must be set",
 		},
+		{
+			group: &Group{
+				Name:     "negative interval",
+				Interval: promutils.NewDuration(-1),
+			},
+			expErr: "interval shouldn't be lower than 0",
+		},
+		{
+			group: &Group{
+				Name:       "wrong eval_offset",
+				Interval:   promutils.NewDuration(time.Minute),
+				EvalOffset: promutils.NewDuration(2 * time.Minute),
+			},
+			expErr: "eval_offset should be smaller than interval",
+		},
+		{
+			group: &Group{
+				Name:  "wrong limit",
+				Limit: -1,
+			},
+			expErr: "invalid limit",
+		},
+		{
+			group: &Group{
+				Name:        "wrong concurrency",
+				Concurrency: -1,
+			},
+			expErr: "invalid concurrency",
+		},
 		{
 			group: &Group{
 				Name: "test",
--- a/app/vmalert/config/fsurl/url.go
+++ b/app/vmalert/config/fsurl/url.go
@@ -49,7 +49,7 @@ func (fs *FS) Read(files []string) (map[string][]byte, error) {
 				path, resp.StatusCode, http.StatusOK, data)
 		}
 		if err != nil {
-			return nil, fmt.Errorf("cannot read %q: %s", path, err)
+			return nil, fmt.Errorf("cannot read %q: %w", path, err)
 		}
 		result[path] = data
 	}
--- a/app/vmalert/config/testdata/rules/rules3-good.rules
+++ b/app/vmalert/config/testdata/rules/rules3-good.rules
@@ -15,6 +15,7 @@ groups:
    interval: 2s
    concurrency: 2
    type: prometheus
+    eval_delay: 30s
    rules:
      - alert: Conns
        expr: sum(vm_tcplistener_conns) by (instance) > 1
--- a/app/vmalert/config/testdata/rules/rules_interval_bad.rules
+++ b/app/vmalert/config/testdata/rules/rules_interval_bad.rules
@@ -0,0 +1,13 @@
+groups:
+  - name: groupTest
+    ## default interval is 1min, eval_offset shouldn't be greater than interval
+    eval_offset: 2m
+    rules:
+      - alert: VMRows
+        for: 2s
+        expr: sum(rate(vm_http_request_errors_total[2s])) > 0
+        labels:
+          label: bar
+          host: "{{ $labels.instance }}"
+        annotations:
+          summary: "{{ $value }}"
--- a/app/vmalert/datasource/faker.go
+++ b/app/vmalert/datasource/faker.go
@@ -0,0 +1,131 @@
+package datasource
+
+import (
+	"context"
+	"net/http"
+	"sync"
+	"time"
+)
+
+// FakeQuerier is a mock querier that return predefined results and error message
+type FakeQuerier struct {
+	sync.Mutex
+	metrics []Metric
+	err     error
+}
+
+// SetErr sets query error message
+func (fq *FakeQuerier) SetErr(err error) {
+	fq.Lock()
+	fq.err = err
+	fq.Unlock()
+}
+
+// Reset reset querier's error message and results
+func (fq *FakeQuerier) Reset() {
+	fq.Lock()
+	fq.err = nil
+	fq.metrics = fq.metrics[:0]
+	fq.Unlock()
+}
+
+// Add appends metrics to querier result metrics
+func (fq *FakeQuerier) Add(metrics ...Metric) {
+	fq.Lock()
+	fq.metrics = append(fq.metrics, metrics...)
+	fq.Unlock()
+}
+
+// BuildWithParams return FakeQuerier itself
+func (fq *FakeQuerier) BuildWithParams(_ QuerierParams) Querier {
+	return fq
+}
+
+// QueryRange performs query
+func (fq *FakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) (Result, error) {
+	req, _, err := fq.Query(ctx, q, time.Now())
+	return req, err
+}
+
+// Query returns metrics restored in querier
+func (fq *FakeQuerier) Query(_ context.Context, _ string, _ time.Time) (Result, *http.Request, error) {
+	fq.Lock()
+	defer fq.Unlock()
+	if fq.err != nil {
+		return Result{}, nil, fq.err
+	}
+	cp := make([]Metric, len(fq.metrics))
+	copy(cp, fq.metrics)
+	req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
+	return Result{Data: cp}, req, nil
+}
+
+// FakeQuerierWithRegistry can store different results for different query expr
+type FakeQuerierWithRegistry struct {
+	sync.Mutex
+	registry map[string][]Metric
+}
+
+// Set stores query result for given key
+func (fqr *FakeQuerierWithRegistry) Set(key string, metrics ...Metric) {
+	fqr.Lock()
+	if fqr.registry == nil {
+		fqr.registry = make(map[string][]Metric)
+	}
+	fqr.registry[key] = metrics
+	fqr.Unlock()
+}
+
+// Reset clean querier's results registry
+func (fqr *FakeQuerierWithRegistry) Reset() {
+	fqr.Lock()
+	fqr.registry = nil
+	fqr.Unlock()
+}
+
+// BuildWithParams returns itself
+func (fqr *FakeQuerierWithRegistry) BuildWithParams(_ QuerierParams) Querier {
+	return fqr
+}
+
+// QueryRange performs query
+func (fqr *FakeQuerierWithRegistry) QueryRange(ctx context.Context, q string, _, _ time.Time) (Result, error) {
+	req, _, err := fqr.Query(ctx, q, time.Now())
+	return req, err
+}
+
+// Query returns metrics restored in querier registry
+func (fqr *FakeQuerierWithRegistry) Query(_ context.Context, expr string, _ time.Time) (Result, *http.Request, error) {
+	fqr.Lock()
+	defer fqr.Unlock()
+
+	req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
+	metrics, ok := fqr.registry[expr]
+	if !ok {
+		return Result{}, req, nil
+	}
+	cp := make([]Metric, len(metrics))
+	copy(cp, metrics)
+	return Result{Data: cp}, req, nil
+}
+
+// FakeQuerierWithDelay mock querier with given delay duration
+type FakeQuerierWithDelay struct {
+	FakeQuerier
+	Delay time.Duration
+}
+
+// Query returns query result after delay duration
+func (fqd *FakeQuerierWithDelay) Query(ctx context.Context, expr string, ts time.Time) (Result, *http.Request, error) {
+	timer := time.NewTimer(fqd.Delay)
+	select {
+	case <-ctx.Done():
+	case <-timer.C:
+	}
+	return fqd.FakeQuerier.Query(ctx, expr, ts)
+}
+
+// BuildWithParams returns itself
+func (fqd *FakeQuerierWithDelay) BuildWithParams(_ QuerierParams) Querier {
+	return fqd
+}
--- a/app/vmalert/datasource/init.go
+++ b/app/vmalert/datasource/init.go
@@ -10,13 +10,14 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 )

 var (
 	addr = flag.String("datasource.url", "", "Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. "+
 		"E.g. http://127.0.0.1:8428 . See also -remoteRead.disablePathAppend and -datasource.showURL")
 	appendTypePrefix  = flag.Bool("datasource.appendTypePrefix", false, "Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to the vmselect URL.")
-	showDatasourceURL = flag.Bool("datasource.showURL", false, "Whether to show -datasource.url in the exported metrics. "+
+	showDatasourceURL = flag.Bool("datasource.showURL", false, "Whether to avoid stripping sensitive information such as auth headers or passwords from URLs in log messages or UI and exported metrics. "+
 		"It is hidden by default, since it can contain sensitive info such as auth key")

 	headers = flag.String("datasource.headers", "", "Optional HTTP extraHeaders to send with each request to the corresponding -datasource.url. "+
@@ -42,12 +43,16 @@ var (
 	oauth2TokenURL         = flag.String("datasource.oauth2.tokenUrl", "", "Optional OAuth2 tokenURL to use for -datasource.url.")
 	oauth2Scopes           = flag.String("datasource.oauth2.scopes", "", "Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'")

-	lookBack  = flag.Duration("datasource.lookback", 0, `Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
+	lookBack = flag.Duration("datasource.lookback", 0, `Will be deprecated soon, please adjust "-search.latencyOffset"  at datasource side `+
+		`or specify "latency_offset" in rule group's params. Lookback defines how far into the past to look when evaluating queries. `+
+		`For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.`)
 	queryStep = flag.Duration("datasource.queryStep", 5*time.Minute, "How far a value can fallback to when evaluating queries. "+
 		"For example, if -datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query. "+
 		"If set to 0, rule's evaluation interval will be used instead.")
-	queryTimeAlignment = flag.Bool("datasource.queryTimeAlignment", true, `Whether to align "time" parameter with evaluation interval.`+
-		"Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257")
+	queryTimeAlignment = flag.Bool("datasource.queryTimeAlignment", true, `Deprecated: please use "eval_alignment" in rule group instead. `+
+		`Whether to align "time" parameter with evaluation interval. `+
+		"Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. "+
+		"See more details at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257")
 	maxIdleConnections = flag.Int("datasource.maxIdleConnections", 100, `Defines the number of idle (keep-alive connections) to each configured datasource. Consider setting this value equal to the value: groups_total * group.concurrency. Too low a value may result in a high number of sockets in TIME_WAIT state.`)
 	disableKeepAlive   = flag.Bool("datasource.disableKeepAlive", false, `Whether to disable long-lived connections to the datasource. `+
 		`If true, disables HTTP keep-alives and will only use the connection to the server for a single HTTP request.`)
@@ -62,6 +67,11 @@ func InitSecretFlags() {
 	}
 }

+// ShowDatasourceURL whether to show -datasource.url with sensitive information
+func ShowDatasourceURL() bool {
+	return *showDatasourceURL
+}
+
 // Param represents an HTTP GET param
 type Param struct {
 	Key, Value string
@@ -74,6 +84,12 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
 	if *addr == "" {
 		return nil, fmt.Errorf("datasource.url is empty")
 	}
+	if !*queryTimeAlignment {
+		logger.Warnf("flag `-datasource.queryTimeAlignment` is deprecated and will be removed in next releases. Please use `eval_alignment` in rule group instead.")
+	}
+	if *lookBack != 0 {
+		logger.Warnf("flag `-datasource.lookback` will be deprecated soon. Please use `-rule.evalDelay` command-line flag instead. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155 for details.")
+	}

 	tr, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
 	if err != nil {
@@ -100,6 +116,10 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
 	if err != nil {
 		return nil, fmt.Errorf("failed to configure auth: %w", err)
 	}
+	_, err = authCfg.GetAuthHeader()
+	if err != nil {
+		return nil, fmt.Errorf("failed to set request auth header to datasource %q: %w", *addr, err)
+	}

 	return &VMStorage{
 		c:                &http.Client{Transport: tr},
--- a/app/vmalert/datasource/vm.go
+++ b/app/vmalert/datasource/vm.go
@@ -37,11 +37,14 @@ type VMStorage struct {
 	appendTypePrefix bool
 	lookBack         time.Duration
 	queryStep        time.Duration
+	dataSourceType   datasourceType

-	dataSourceType     datasourceType
+	// evaluationInterval will help setting request's `step` param.
 	evaluationInterval time.Duration
-	extraParams        url.Values
-	extraHeaders       []keyValue
+	// extraParams contains params to be attached to each HTTP request
+	extraParams url.Values
+	// extraHeaders are headers to be attached to each HTTP request
+	extraHeaders []keyValue

 	// whether to print additional log messages
 	// for each sent request
@@ -91,8 +94,15 @@ func (s *VMStorage) ApplyParams(params QuerierParams) *VMStorage {
 			s.extraParams = url.Values{}
 		}
 		for k, vl := range params.QueryParams {
-			for _, v := range vl { // custom query params are prior to default ones
-				s.extraParams.Set(k, v)
+			// custom query params are prior to default ones
+			if s.extraParams.Has(k) {
+				s.extraParams.Del(k)
+			}
+			for _, v := range vl {
+				// don't use .Set() instead of Del/Add since it is allowed
+				// for GET params to be duplicated
+				// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4908
+				s.extraParams.Add(k, v)
 			}
 		}
 	}
@@ -127,33 +137,35 @@ func NewVMStorage(baseURL string, authCfg *promauth.Config, lookBack time.Durati

 // Query executes the given query and returns parsed response
 func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error) {
-	req, err := s.newRequestPOST()
+	req, err := s.newQueryRequest(query, ts)
 	if err != nil {
 		return Result{}, nil, err
 	}
-
-	switch s.dataSourceType {
-	case "", datasourcePrometheus:
-		s.setPrometheusInstantReqParams(req, query, ts)
-	case datasourceGraphite:
-		s.setGraphiteReqParams(req, query, ts)
-	default:
-		return Result{}, nil, fmt.Errorf("engine not found: %q", s.dataSourceType)
-	}
-
 	resp, err := s.do(ctx, req)
 	if err != nil {
-		return Result{}, req, err
+		if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
+			// Return unexpected error to the caller.
+			return Result{}, nil, err
+		}
+		// Something in the middle between client and datasource might be closing
+		// the connection. So we do a one more attempt in hope request will succeed.
+		req, err = s.newQueryRequest(query, ts)
+		if err != nil {
+			return Result{}, nil, fmt.Errorf("second attempt: %w", err)
+		}
+		resp, err = s.do(ctx, req)
+		if err != nil {
+			return Result{}, nil, fmt.Errorf("second attempt: %w", err)
+		}
 	}
-	defer func() {
-		_ = resp.Body.Close()
-	}()

+	// Process the received response.
 	parseFn := parsePrometheusResponse
 	if s.dataSourceType != datasourcePrometheus {
 		parseFn = parseGraphiteResponse
 	}
 	result, err := parseFn(req, resp)
+	_ = resp.Body.Close()
 	return result, req, err
 }

@@ -164,56 +176,96 @@ func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end tim
 	if s.dataSourceType != datasourcePrometheus {
 		return res, fmt.Errorf("%q is not supported for QueryRange", s.dataSourceType)
 	}
-	req, err := s.newRequestPOST()
-	if err != nil {
-		return res, err
-	}
 	if start.IsZero() {
 		return res, fmt.Errorf("start param is missing")
 	}
 	if end.IsZero() {
 		return res, fmt.Errorf("end param is missing")
 	}
-	s.setPrometheusRangeReqParams(req, query, start, end)
-	resp, err := s.do(ctx, req)
+	req, err := s.newQueryRangeRequest(query, start, end)
 	if err != nil {
 		return res, err
 	}
-	defer func() {
-		_ = resp.Body.Close()
-	}()
-	return parsePrometheusResponse(req, resp)
+	resp, err := s.do(ctx, req)
+	if err != nil {
+		if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
+			// Return unexpected error to the caller.
+			return res, err
+		}
+		// Something in the middle between client and datasource might be closing
+		// the connection. So we do a one more attempt in hope request will succeed.
+		req, err = s.newQueryRangeRequest(query, start, end)
+		if err != nil {
+			return res, fmt.Errorf("second attempt: %w", err)
+		}
+		resp, err = s.do(ctx, req)
+		if err != nil {
+			return res, fmt.Errorf("second attempt: %w", err)
+		}
+	}
+
+	// Process the received response.
+	res, err = parsePrometheusResponse(req, resp)
+	_ = resp.Body.Close()
+	return res, err
 }

 func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response, error) {
+	ru := req.URL.Redacted()
+	if *showDatasourceURL {
+		ru = req.URL.String()
+	}
 	if s.debug {
-		logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, req.URL.RawQuery)
+		logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, ru)
 	}
 	resp, err := s.c.Do(req.WithContext(ctx))
-	if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
-		// something in the middle between client and datasource might be closing
-		// the connection. So we do a one more attempt in hope request will succeed.
-		resp, err = s.c.Do(req.WithContext(ctx))
-	}
 	if err != nil {
-		return nil, fmt.Errorf("error getting response from %s: %w", req.URL.Redacted(), err)
+		return nil, fmt.Errorf("error getting response from %s: %w", ru, err)
 	}
 	if resp.StatusCode != http.StatusOK {
 		body, _ := io.ReadAll(resp.Body)
 		_ = resp.Body.Close()
-		return nil, fmt.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, req.URL.Redacted(), body)
+		return nil, fmt.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, ru, body)
 	}
 	return resp, nil
 }

-func (s *VMStorage) newRequestPOST() (*http.Request, error) {
+func (s *VMStorage) newQueryRangeRequest(query string, start, end time.Time) (*http.Request, error) {
+	req, err := s.newRequest()
+	if err != nil {
+		return nil, fmt.Errorf("cannot create query_range request to datasource %q: %w", s.datasourceURL, err)
+	}
+	s.setPrometheusRangeReqParams(req, query, start, end)
+	return req, nil
+}
+
+func (s *VMStorage) newQueryRequest(query string, ts time.Time) (*http.Request, error) {
+	req, err := s.newRequest()
+	if err != nil {
+		return nil, fmt.Errorf("cannot create query request to datasource %q: %w", s.datasourceURL, err)
+	}
+	switch s.dataSourceType {
+	case "", datasourcePrometheus:
+		s.setPrometheusInstantReqParams(req, query, ts)
+	case datasourceGraphite:
+		s.setGraphiteReqParams(req, query, ts)
+	default:
+		logger.Panicf("BUG: engine not found: %q", s.dataSourceType)
+	}
+	return req, nil
+}
+
+func (s *VMStorage) newRequest() (*http.Request, error) {
 	req, err := http.NewRequest(http.MethodPost, s.datasourceURL, nil)
 	if err != nil {
-		return nil, err
+		logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", s.datasourceURL, err)
 	}
 	req.Header.Set("Content-Type", "application/json")
 	if s.authCfg != nil {
-		s.authCfg.SetHeaders(req, true)
+		err = s.authCfg.SetHeaders(req, true)
+		if err != nil {
+			return nil, err
+		}
 	}
 	for _, h := range s.extraHeaders {
 		req.Header.Set(h.key, h.value)
--- a/app/vmalert/datasource/vm_prom_api.go
+++ b/app/vmalert/datasource/vm_prom_api.go
@@ -112,14 +112,14 @@ func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result
 		return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
 	}
 	if r.Status != statusSuccess {
-		return res, fmt.Errorf("unknown status: %s, Expected success or error ", r.Status)
+		return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
 	}
 	var parseFn func() ([]Metric, error)
 	switch r.Data.ResultType {
 	case rtVector:
 		var pi promInstant
 		if err := json.Unmarshal(r.Data.Result, &pi.Result); err != nil {
-			return res, fmt.Errorf("umarshal err %s; \n %#v", err, string(r.Data.Result))
+			return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
 		}
 		parseFn = pi.metrics
 	case rtMatrix:
@@ -164,10 +164,6 @@ func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string,
 	if s.lookBack > 0 {
 		timestamp = timestamp.Add(-s.lookBack)
 	}
-	if *queryTimeAlignment && s.evaluationInterval > 0 {
-		// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1232
-		timestamp = timestamp.Truncate(s.evaluationInterval)
-	}
 	q.Set("time", timestamp.Format(time.RFC3339))
 	if !*disableStepParam && s.evaluationInterval > 0 { // set step as evaluationInterval by default
 		// always convert to seconds to keep compatibility with older
--- a/app/vmalert/datasource/vm_test.go
+++ b/app/vmalert/datasource/vm_test.go
@@ -506,8 +506,7 @@ func TestRequestParams(t *testing.T) {
 			},
 			func(t *testing.T, r *http.Request) {
 				evalInterval := 15 * time.Second
-				tt := timestamp.Truncate(evalInterval)
-				exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {tt.Format(time.RFC3339)}}
+				exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {timestamp.Format(time.RFC3339)}}
 				checkEqualString(t, exp.Encode(), r.URL.RawQuery)
 			},
 		},
@@ -521,7 +520,6 @@ func TestRequestParams(t *testing.T) {
 			func(t *testing.T, r *http.Request) {
 				evalInterval := 15 * time.Second
 				tt := timestamp.Add(-time.Minute)
-				tt = tt.Truncate(evalInterval)
 				exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {tt.Format(time.RFC3339)}}
 				checkEqualString(t, exp.Encode(), r.URL.RawQuery)
 			},
@@ -549,8 +547,7 @@ func TestRequestParams(t *testing.T) {
 			},
 			func(t *testing.T, r *http.Request) {
 				evalInterval := 3 * time.Hour
-				tt := timestamp.Truncate(evalInterval)
-				exp := url.Values{"query": {query}, "step": {fmt.Sprintf("%ds", int(evalInterval.Seconds()))}, "time": {tt.Format(time.RFC3339)}}
+				exp := url.Values{"query": {query}, "step": {fmt.Sprintf("%ds", int(evalInterval.Seconds()))}, "time": {timestamp.Format(time.RFC3339)}}
 				checkEqualString(t, exp.Encode(), r.URL.RawQuery)
 			},
 		},
@@ -596,6 +593,17 @@ func TestRequestParams(t *testing.T) {
 				checkEqualString(t, exp.Encode(), r.URL.RawQuery)
 			},
 		},
+		{
+			"allow duplicates in query params",
+			false,
+			storage.Clone().ApplyParams(QuerierParams{
+				QueryParams: url.Values{"extra_labels": {"env=dev", "foo=bar"}},
+			}),
+			func(t *testing.T, r *http.Request) {
+				exp := url.Values{"query": {query}, "round_digits": {"10"}, "extra_labels": {"env=dev", "foo=bar"}, "time": {timestamp.Format(time.RFC3339)}}
+				checkEqualString(t, exp.Encode(), r.URL.RawQuery)
+			},
+		},
 		{
 			"graphite extra params",
 			false,
@@ -629,9 +637,9 @@ func TestRequestParams(t *testing.T) {

 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			req, err := tc.vm.newRequestPOST()
+			req, err := tc.vm.newRequest()
 			if err != nil {
-				t.Fatalf("unexpected error: %s", err)
+				t.Fatal(err)
 			}
 			switch tc.vm.dataSourceType {
 			case "", datasourcePrometheus:
@@ -727,9 +735,9 @@ func TestHeaders(t *testing.T) {
 	for _, tt := range testCases {
 		t.Run(tt.name, func(t *testing.T) {
 			vm := tt.vmFn()
-			req, err := vm.newRequestPOST()
+			req, err := vm.newQueryRequest("foo", time.Now())
 			if err != nil {
-				t.Fatalf("unexpected error: %s", err)
+				t.Fatal(err)
 			}
 			tt.checkFn(t, req)
 		})
--- a/app/vmalert/main.go
+++ b/app/vmalert/main.go
@@ -18,6 +18,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remoteread"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
@@ -46,8 +47,8 @@ all files with prefix rule_ in folder dir.
 See https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage
 `)

-	ruleTemplatesPath = flagutil.NewArrayString("rule.templates", `Path or glob pattern to location with go template definitions
-	for rules annotations templating. Flag can be specified multiple times.
+	ruleTemplatesPath = flagutil.NewArrayString("rule.templates", `Path or glob pattern to location with go template definitions `+
+		`for rules annotations templating. Flag can be specified multiple times.
 Examples:
 -rule.templates="/path/to/file". Path to a single file with go templates
 -rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
@@ -66,11 +67,6 @@ absolute path to all .tpl files in root.

 	validateTemplates   = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
 	validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
-	maxResolveDuration  = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
-		"which by default is 4 times evaluationInterval of the parent group.")
-	resendDelay            = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier")
-	ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
-		"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")

 	externalURL         = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier. By default, hostname is used as address.")
 	externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager `+
@@ -82,12 +78,8 @@ absolute path to all .tpl files in root.
 	externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
 		"Pass multiple -label flags in order to add multiple label sets.")

-	remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
-		" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
 	remoteReadIgnoreRestoreErrors = flag.Bool("remoteRead.ignoreRestoreErrors", true, "Whether to ignore errors from remote storage when restoring alerts state on startup. DEPRECATED - this flag has no effect and will be removed in the next releases.")

-	disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
-
 	dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
 )

@@ -101,6 +93,7 @@ func main() {
 	remoteread.InitSecretFlags()
 	remotewrite.InitSecretFlags()
 	datasource.InitSecretFlags()
+	notifier.InitSecretFlags()
 	buildinfo.Init()
 	logger.Init()
 	pushmetrics.Init()
@@ -228,7 +221,7 @@ func newManager(ctx context.Context) (*manager, error) {
 		return nil, fmt.Errorf("failed to init notifier: %w", err)
 	}
 	manager := &manager{
-		groups:         make(map[uint64]*Group),
+		groups:         make(map[uint64]*rule.Group),
 		querierBuilder: q,
 		notifiers:      nts,
 		labels:         labels,
@@ -237,7 +230,9 @@ func newManager(ctx context.Context) (*manager, error) {
 	if err != nil {
 		return nil, fmt.Errorf("failed to init remoteWrite: %w", err)
 	}
-	manager.rw = rw
+	if rw != nil {
+		manager.rw = rw
+	}

 	rr, err := remoteread.Init()
 	if err != nil {
--- a/app/vmalert/main_test.go
+++ b/app/vmalert/main_test.go
@@ -8,11 +8,19 @@ import (
 	"testing"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
 )

+func init() {
+	// Disable rand sleep on group start during tests in order to speed up test execution.
+	// Rand sleep is needed only in prod code.
+	rule.SkipRandSleepOnGroupStart = true
+}
+
 func TestGetExternalURL(t *testing.T) {
 	expURL := "https://vicotriametrics.com/path"
 	u, err := getExternalURL(expURL, "", false)
@@ -98,10 +106,10 @@ groups:
 	ctx, cancel := context.WithCancel(context.Background())

 	m := &manager{
-		querierBuilder: &fakeQuerier{},
-		groups:         make(map[uint64]*Group),
+		querierBuilder: &datasource.FakeQuerier{},
+		groups:         make(map[uint64]*rule.Group),
 		labels:         map[string]string{},
-		notifiers:      func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} },
+		notifiers:      func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
 		rw:             &remotewrite.Client{},
 	}

--- a/app/vmalert/manager.go
+++ b/app/vmalert/manager.go
@@ -3,14 +3,13 @@ package main
 import (
 	"context"
 	"fmt"
-	"net/url"
-	"sort"
 	"sync"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 )

@@ -19,7 +18,7 @@ type manager struct {
 	querierBuilder datasource.QuerierBuilder
 	notifiers      func() []notifier.Notifier

-	rw *remotewrite.Client
+	rw remotewrite.RWClient
 	// remote read builder.
 	rr datasource.QuerierBuilder

@@ -27,28 +26,28 @@ type manager struct {
 	labels map[string]string

 	groupsMu sync.RWMutex
-	groups   map[uint64]*Group
+	groups   map[uint64]*rule.Group
 }

-// RuleAPI generates APIRule object from alert by its ID(hash)
-func (m *manager) RuleAPI(gID, rID uint64) (APIRule, error) {
+// ruleAPI generates apiRule object from alert by its ID(hash)
+func (m *manager) ruleAPI(gID, rID uint64) (apiRule, error) {
 	m.groupsMu.RLock()
 	defer m.groupsMu.RUnlock()

 	g, ok := m.groups[gID]
 	if !ok {
-		return APIRule{}, fmt.Errorf("can't find group with id %d", gID)
+		return apiRule{}, fmt.Errorf("can't find group with id %d", gID)
 	}
 	for _, rule := range g.Rules {
 		if rule.ID() == rID {
-			return rule.ToAPI(), nil
+			return ruleToAPI(rule), nil
 		}
 	}
-	return APIRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
+	return apiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
 }

-// AlertAPI generates APIAlert object from alert by its ID(hash)
-func (m *manager) AlertAPI(gID, aID uint64) (*APIAlert, error) {
+// alertAPI generates apiAlert object from alert by its ID(hash)
+func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
 	m.groupsMu.RLock()
 	defer m.groupsMu.RUnlock()

@@ -56,12 +55,12 @@ func (m *manager) AlertAPI(gID, aID uint64) (*APIAlert, error) {
 	if !ok {
 		return nil, fmt.Errorf("can't find group with id %d", gID)
 	}
-	for _, rule := range g.Rules {
-		ar, ok := rule.(*AlertingRule)
+	for _, r := range g.Rules {
+		ar, ok := r.(*rule.AlertingRule)
 		if !ok {
 			continue
 		}
-		if apiAlert := ar.AlertAPI(aID); apiAlert != nil {
+		if apiAlert := alertToAPI(ar, aID); apiAlert != nil {
 			return apiAlert, nil
 		}
 	}
@@ -82,15 +81,15 @@ func (m *manager) close() {
 	m.wg.Wait()
 }

-func (m *manager) startGroup(ctx context.Context, g *Group, restore bool) error {
+func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
 	m.wg.Add(1)
 	id := g.ID()
 	go func() {
 		defer m.wg.Done()
 		if restore {
-			g.start(ctx, m.notifiers, m.rw, m.rr)
+			g.Start(ctx, m.notifiers, m.rw, m.rr)
 		} else {
-			g.start(ctx, m.notifiers, m.rw, nil)
+			g.Start(ctx, m.notifiers, m.rw, nil)
 		}
 	}()
 	m.groups[id] = g
@@ -99,7 +98,7 @@ func (m *manager) startGroup(ctx context.Context, g *Group, restore bool) error

 func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
 	var rrPresent, arPresent bool
-	groupsRegistry := make(map[uint64]*Group)
+	groupsRegistry := make(map[uint64]*rule.Group)
 	for _, cfg := range groupsCfg {
 		for _, r := range cfg.Rules {
 			if rrPresent && arPresent {
@@ -112,7 +111,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
 				arPresent = true
 			}
 		}
-		ng := newGroup(cfg, m.querierBuilder, *evaluationInterval, m.labels)
+		ng := rule.NewGroup(cfg, m.querierBuilder, *evaluationInterval, m.labels)
 		groupsRegistry[ng.ID()] = ng
 	}

@@ -124,8 +123,8 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
 	}

 	type updateItem struct {
-		old *Group
-		new *Group
+		old *rule.Group
+		new *rule.Group
 	}
 	var toUpdate []updateItem

@@ -135,7 +134,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
 		if !ok {
 			// old group is not present in new list,
 			// so must be stopped and deleted
-			og.close()
+			og.Close()
 			delete(m.groups, og.ID())
 			og = nil
 			continue
@@ -157,81 +156,13 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
 		var wg sync.WaitGroup
 		for _, item := range toUpdate {
 			wg.Add(1)
-			go func(old *Group, new *Group) {
-				old.updateCh <- new
+			go func(old *rule.Group, new *rule.Group) {
+				old.UpdateWith(new)
 				wg.Done()
 			}(item.old, item.new)
-			item.old.interruptEval()
+			item.old.InterruptEval()
 		}
 		wg.Wait()
 	}
 	return nil
 }
-
-func (g *Group) toAPI() APIGroup {
-	g.mu.RLock()
-	defer g.mu.RUnlock()
-
-	ag := APIGroup{
-		// encode as string to avoid rounding
-		ID: fmt.Sprintf("%d", g.ID()),
-
-		Name:            g.Name,
-		Type:            g.Type.String(),
-		File:            g.File,
-		Interval:        g.Interval.Seconds(),
-		LastEvaluation:  g.LastEvaluation,
-		Concurrency:     g.Concurrency,
-		Params:          urlValuesToStrings(g.Params),
-		Headers:         headersToStrings(g.Headers),
-		NotifierHeaders: headersToStrings(g.NotifierHeaders),
-
-		Labels: g.Labels,
-	}
-	ag.Rules = make([]APIRule, 0)
-	for _, r := range g.Rules {
-		ag.Rules = append(ag.Rules, r.ToAPI())
-	}
-	return ag
-}
-
-func urlValuesToStrings(values url.Values) []string {
-	if len(values) < 1 {
-		return nil
-	}
-
-	keys := make([]string, 0, len(values))
-	for k := range values {
-		keys = append(keys, k)
-	}
-	sort.Strings(keys)
-
-	var res []string
-	for _, k := range keys {
-		params := values[k]
-		for _, v := range params {
-			res = append(res, fmt.Sprintf("%s=%s", k, v))
-		}
-	}
-	return res
-}
-
-func headersToStrings(headers map[string]string) []string {
-	if len(headers) < 1 {
-		return nil
-	}
-
-	keys := make([]string, 0, len(headers))
-	for k := range headers {
-		keys = append(keys, k)
-	}
-	sort.Strings(keys)
-
-	var res []string
-	for _, k := range keys {
-		v := headers[k]
-		res = append(res, fmt.Sprintf("%s: %s", k, v))
-	}
-
-	return res
-}
--- a/app/vmalert/manager_test.go
+++ b/app/vmalert/manager_test.go
@@ -10,8 +10,10 @@ import (
 	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
 )

@@ -26,7 +28,7 @@ func TestMain(m *testing.M) {
 // successful cases of
 // starting with empty rules folder
 func TestManagerEmptyRulesDir(t *testing.T) {
-	m := &manager{groups: make(map[uint64]*Group)}
+	m := &manager{groups: make(map[uint64]*rule.Group)}
 	cfg := loadCfg(t, []string{"foo/bar"}, true, true)
 	if err := m.update(context.Background(), cfg, false); err != nil {
 		t.Fatalf("expected to load successfully with empty rules dir; got err instead: %v", err)
@@ -38,9 +40,9 @@ func TestManagerEmptyRulesDir(t *testing.T) {
 // Should be executed with -race flag
 func TestManagerUpdateConcurrent(t *testing.T) {
 	m := &manager{
-		groups:         make(map[uint64]*Group),
-		querierBuilder: &fakeQuerier{},
-		notifiers:      func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} },
+		groups:         make(map[uint64]*rule.Group),
+		querierBuilder: &datasource.FakeQuerier{},
+		notifiers:      func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
 	}
 	paths := []string{
 		"config/testdata/dir/rules0-good.rules",
@@ -91,7 +93,7 @@ func TestManagerUpdate(t *testing.T) {
 	}()

 	var (
-		VMRows = &AlertingRule{
+		VMRows = &rule.AlertingRule{
 			Name: "VMRows",
 			Expr: "vm_rows > 0",
 			For:  10 * time.Second,
@@ -104,7 +106,7 @@ func TestManagerUpdate(t *testing.T) {
 				"description": "{{$labels}}",
 			},
 		}
-		Conns = &AlertingRule{
+		Conns = &rule.AlertingRule{
 			Name: "Conns",
 			Expr: "sum(vm_tcplistener_conns) by(instance) > 1",
 			Annotations: map[string]string{
@@ -112,7 +114,7 @@ func TestManagerUpdate(t *testing.T) {
 				"description": "It is {{ $value }} connections for {{$labels.instance}}",
 			},
 		}
-		ExampleAlertAlwaysFiring = &AlertingRule{
+		ExampleAlertAlwaysFiring = &rule.AlertingRule{
 			Name: "ExampleAlertAlwaysFiring",
 			Expr: "sum by(job) (up == 1)",
 		}
@@ -122,20 +124,20 @@ func TestManagerUpdate(t *testing.T) {
 		name       string
 		initPath   string
 		updatePath string
-		want       []*Group
+		want       []*rule.Group
 	}{
 		{
 			name:       "update good rules",
 			initPath:   "config/testdata/rules/rules0-good.rules",
 			updatePath: "config/testdata/dir/rules1-good.rules",
-			want: []*Group{
+			want: []*rule.Group{
 				{
 					File:     "config/testdata/dir/rules1-good.rules",
 					Name:     "duplicatedGroupDiffFiles",
 					Type:     config.NewPrometheusType(),
 					Interval: defaultEvalInterval,
-					Rules: []Rule{
-						&AlertingRule{
+					Rules: []rule.Rule{
+						&rule.AlertingRule{
 							Name:   "VMRows",
 							Expr:   "vm_rows > 0",
 							For:    5 * time.Minute,
@@ -153,64 +155,68 @@ func TestManagerUpdate(t *testing.T) {
 			name:       "update good rules from 1 to 2 groups",
 			initPath:   "config/testdata/dir/rules/rules1-good.rules",
 			updatePath: "config/testdata/rules/rules0-good.rules",
-			want: []*Group{
+			want: []*rule.Group{
 				{
 					File:     "config/testdata/rules/rules0-good.rules",
 					Name:     "groupGorSingleAlert",
 					Type:     config.NewPrometheusType(),
-					Rules:    []Rule{VMRows},
 					Interval: defaultEvalInterval,
+					Rules:    []rule.Rule{VMRows},
 				},
 				{
 					File:     "config/testdata/rules/rules0-good.rules",
 					Interval: defaultEvalInterval,
 					Type:     config.NewPrometheusType(),
-					Name:     "TestGroup", Rules: []Rule{
+					Name:     "TestGroup",
+					Rules: []rule.Rule{
 						Conns,
 						ExampleAlertAlwaysFiring,
-					}},
+					},
+				},
 			},
 		},
 		{
 			name:       "update with one bad rule file",
 			initPath:   "config/testdata/rules/rules0-good.rules",
 			updatePath: "config/testdata/dir/rules2-bad.rules",
-			want: []*Group{
+			want: []*rule.Group{
 				{
 					File:     "config/testdata/rules/rules0-good.rules",
 					Name:     "groupGorSingleAlert",
 					Type:     config.NewPrometheusType(),
 					Interval: defaultEvalInterval,
-					Rules:    []Rule{VMRows},
+					Rules:    []rule.Rule{VMRows},
 				},
 				{
 					File:     "config/testdata/rules/rules0-good.rules",
 					Interval: defaultEvalInterval,
 					Name:     "TestGroup",
 					Type:     config.NewPrometheusType(),
-					Rules: []Rule{
+					Rules: []rule.Rule{
 						Conns,
 						ExampleAlertAlwaysFiring,
-					}},
+					},
+				},
 			},
 		},
 		{
 			name:       "update empty dir rules from 0 to 2 groups",
 			initPath:   "config/testdata/empty/*",
 			updatePath: "config/testdata/rules/rules0-good.rules",
-			want: []*Group{
+			want: []*rule.Group{
 				{
 					File:     "config/testdata/rules/rules0-good.rules",
 					Name:     "groupGorSingleAlert",
 					Type:     config.NewPrometheusType(),
 					Interval: defaultEvalInterval,
-					Rules:    []Rule{VMRows},
+					Rules:    []rule.Rule{VMRows},
 				},
 				{
 					File:     "config/testdata/rules/rules0-good.rules",
 					Interval: defaultEvalInterval,
 					Type:     config.NewPrometheusType(),
-					Name:     "TestGroup", Rules: []Rule{
+					Name:     "TestGroup",
+					Rules: []rule.Rule{
 						Conns,
 						ExampleAlertAlwaysFiring,
 					},
@@ -222,9 +228,9 @@ func TestManagerUpdate(t *testing.T) {
 		t.Run(tc.name, func(t *testing.T) {
 			ctx, cancel := context.WithCancel(context.TODO())
 			m := &manager{
-				groups:         make(map[uint64]*Group),
-				querierBuilder: &fakeQuerier{},
-				notifiers:      func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} },
+				groups:         make(map[uint64]*rule.Group),
+				querierBuilder: &datasource.FakeQuerier{},
+				notifiers:      func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
 			}

 			cfgInit := loadCfg(t, []string{tc.initPath}, true, true)
@@ -253,18 +259,44 @@ func TestManagerUpdate(t *testing.T) {
 		})
 	}
 }
+func compareGroups(t *testing.T, a, b *rule.Group) {
+	t.Helper()
+	if a.Name != b.Name {
+		t.Fatalf("expected group name %q; got %q", a.Name, b.Name)
+	}
+	if a.File != b.File {
+		t.Fatalf("expected group %q file name %q; got %q", a.Name, a.File, b.File)
+	}
+	if a.Interval != b.Interval {
+		t.Fatalf("expected group %q interval %v; got %v", a.Name, a.Interval, b.Interval)
+	}
+	if len(a.Rules) != len(b.Rules) {
+		t.Fatalf("expected group %s to have %d rules; got: %d",
+			a.Name, len(a.Rules), len(b.Rules))
+	}
+	for i, r := range a.Rules {
+		got, want := r, b.Rules[i]
+		if a.ID() != b.ID() {
+			t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
+		}
+		if err := rule.CompareRules(t, want, got); err != nil {
+			t.Fatalf("comparison error: %s", err)
+		}
+	}
+}

 func TestManagerUpdateNegative(t *testing.T) {
 	testCases := []struct {
 		notifiers []notifier.Notifier
-		rw        *remotewrite.Client
+		rw        remotewrite.RWClient
 		cfg       config.Group
 		expErr    string
 	}{
 		{
 			nil,
 			nil,
-			config.Group{Name: "Recording rule only",
+			config.Group{
+				Name: "Recording rule only",
 				Rules: []config.Rule{
 					{Record: "record", Expr: "max(up)"},
 				},
@@ -274,7 +306,8 @@ func TestManagerUpdateNegative(t *testing.T) {
 		{
 			nil,
 			nil,
-			config.Group{Name: "Alerting rule only",
+			config.Group{
+				Name: "Alerting rule only",
 				Rules: []config.Rule{
 					{Alert: "alert", Expr: "up > 0"},
 				},
@@ -282,9 +315,10 @@ func TestManagerUpdateNegative(t *testing.T) {
 			"contains alerting rules",
 		},
 		{
-			[]notifier.Notifier{&fakeNotifier{}},
+			[]notifier.Notifier{&notifier.FakeNotifier{}},
 			nil,
-			config.Group{Name: "Recording and alerting rules",
+			config.Group{
+				Name: "Recording and alerting rules",
 				Rules: []config.Rule{
 					{Alert: "alert1", Expr: "up > 0"},
 					{Alert: "alert2", Expr: "up > 0"},
@@ -296,7 +330,8 @@ func TestManagerUpdateNegative(t *testing.T) {
 		{
 			nil,
 			&remotewrite.Client{},
-			config.Group{Name: "Recording and alerting rules",
+			config.Group{
+				Name: "Recording and alerting rules",
 				Rules: []config.Rule{
 					{Record: "record1", Expr: "max(up)"},
 					{Record: "record2", Expr: "max(up)"},
@@ -310,8 +345,8 @@ func TestManagerUpdateNegative(t *testing.T) {
 	for _, tc := range testCases {
 		t.Run(tc.cfg.Name, func(t *testing.T) {
 			m := &manager{
-				groups:         make(map[uint64]*Group),
-				querierBuilder: &fakeQuerier{},
+				groups:         make(map[uint64]*rule.Group),
+				querierBuilder: &datasource.FakeQuerier{},
 				rw:             tc.rw,
 			}
 			if tc.notifiers != nil {
@@ -340,21 +375,3 @@ func loadCfg(t *testing.T, path []string, validateAnnotations, validateExpressio
 	}
 	return cfg
 }
-
-func TestUrlValuesToStrings(t *testing.T) {
-	mapQueryParams := map[string][]string{
-		"param1": {"param1"},
-		"param2": {"anotherparam"},
-	}
-	expectedRes := []string{"param1=param1", "param2=anotherparam"}
-	res := urlValuesToStrings(mapQueryParams)
-
-	if len(res) != len(expectedRes) {
-		t.Errorf("Expected length %d, but got %d", len(expectedRes), len(res))
-	}
-	for ind, val := range expectedRes {
-		if val != res[ind] {
-			t.Errorf("Expected %v; but got %v", val, res[ind])
-		}
-	}
-}
--- a/app/vmalert/notifier/alertmanager.go
+++ b/app/vmalert/notifier/alertmanager.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"net/url"
 	"strings"
 	"time"

@@ -17,7 +18,7 @@ import (
 // AlertManager represents integration provider with Prometheus alert manager
 // https://github.com/prometheus/alertmanager
 type AlertManager struct {
-	addr    string
+	addr    *url.URL
 	argFunc AlertURLGenerator
 	client  *http.Client
 	timeout time.Duration
@@ -48,7 +49,12 @@ func (am *AlertManager) Close() {
 }

 // Addr returns address where alerts are sent.
-func (am AlertManager) Addr() string { return am.addr }
+func (am AlertManager) Addr() string {
+	if *showNotifierURL {
+		return am.addr.String()
+	}
+	return am.addr.Redacted()
+}

 // Send an alert or resolve message
 func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[string]string) error {
@@ -64,7 +70,7 @@ func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[st
 	b := &bytes.Buffer{}
 	writeamRequest(b, alerts, am.argFunc, am.relabelConfigs)

-	req, err := http.NewRequest(http.MethodPost, am.addr, b)
+	req, err := http.NewRequest(http.MethodPost, am.addr.String(), b)
 	if err != nil {
 		return err
 	}
@@ -82,7 +88,10 @@ func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[st
 	req = req.WithContext(ctx)

 	if am.authCfg != nil {
-		am.authCfg.SetHeaders(req, true)
+		err = am.authCfg.SetHeaders(req, true)
+		if err != nil {
+			return err
+		}
 	}
 	resp, err := am.client.Do(req)
 	if err != nil {
@@ -91,12 +100,16 @@ func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[st

 	defer func() { _ = resp.Body.Close() }()

+	amURL := am.addr.Redacted()
+	if *showNotifierURL {
+		amURL = am.addr.String()
+	}
 	if resp.StatusCode != http.StatusOK {
 		body, err := io.ReadAll(resp.Body)
 		if err != nil {
-			return fmt.Errorf("failed to read response from %q: %w", am.addr, err)
+			return fmt.Errorf("failed to read response from %q: %w", amURL, err)
 		}
-		return fmt.Errorf("invalid SC %d from %q; response body: %s", resp.StatusCode, am.addr, string(body))
+		return fmt.Errorf("invalid SC %d from %q; response body: %s", resp.StatusCode, amURL, string(body))
 	}
 	return nil
 }
@@ -136,8 +149,15 @@ func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg proma
 		return nil, fmt.Errorf("failed to configure auth: %w", err)
 	}

+	amURL, err := url.Parse(alertManagerURL)
+	if err != nil {
+		return nil, fmt.Errorf("provided incorrect notifier url: %w", err)
+	}
+	if !*showNotifierURL {
+		alertManagerURL = amURL.Redacted()
+	}
 	return &AlertManager{
-		addr:           alertManagerURL,
+		addr:           amURL,
 		argFunc:        fn,
 		authCfg:        aCfg,
 		relabelConfigs: relabelCfg,
--- a/app/vmalert/notifier/config.go
+++ b/app/vmalert/notifier/config.go
@@ -3,7 +3,6 @@ package notifier
 import (
 	"crypto/md5"
 	"fmt"
-	"gopkg.in/yaml.v2"
 	"net/url"
 	"os"
 	"path"
@@ -11,6 +10,8 @@ import (
 	"strings"
 	"time"

+	"gopkg.in/yaml.v2"
+
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
@@ -142,26 +143,23 @@ func parseLabels(target string, metaLabels *promutils.Labels, cfg *Config) (stri
 	if labels.Len() == 0 {
 		return "", nil, nil
 	}
-	schemeRelabeled := labels.Get("__scheme__")
-	if len(schemeRelabeled) == 0 {
-		schemeRelabeled = "http"
+	scheme := labels.Get("__scheme__")
+	if len(scheme) == 0 {
+		scheme = "http"
 	}
-	addressRelabeled := labels.Get("__address__")
-	if len(addressRelabeled) == 0 {
+	alertsPath := labels.Get("__alerts_path__")
+	if !strings.HasPrefix(alertsPath, "/") {
+		alertsPath = "/" + alertsPath
+	}
+	address := labels.Get("__address__")
+	if len(address) == 0 {
 		return "", nil, nil
 	}
-	if strings.Contains(addressRelabeled, "/") {
-		return "", nil, nil
-	}
-	addressRelabeled = addMissingPort(schemeRelabeled, addressRelabeled)
-	alertsPathRelabeled := labels.Get("__alerts_path__")
-	if !strings.HasPrefix(alertsPathRelabeled, "/") {
-		alertsPathRelabeled = "/" + alertsPathRelabeled
-	}
-	u := fmt.Sprintf("%s://%s%s", schemeRelabeled, addressRelabeled, alertsPathRelabeled)
+	address = addMissingPort(scheme, address)
+	u := fmt.Sprintf("%s://%s%s", scheme, address, alertsPath)
 	if _, err := url.Parse(u); err != nil {
 		return "", nil, fmt.Errorf("invalid url %q for scheme=%q (%q), target=%q, metrics_path=%q (%q): %w",
-			u, cfg.Scheme, schemeRelabeled, target, addressRelabeled, alertsPathRelabeled, err)
+			u, cfg.Scheme, scheme, target, address, alertsPath, err)
 	}
 	return u, labels, nil
 }
@@ -181,9 +179,24 @@ func addMissingPort(scheme, target string) string {
 func mergeLabels(target string, metaLabels *promutils.Labels, cfg *Config) *promutils.Labels {
 	// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
 	m := promutils.NewLabels(3 + metaLabels.Len())
-	m.Add("__address__", target)
-	m.Add("__scheme__", cfg.Scheme)
-	m.Add("__alerts_path__", path.Join("/", cfg.PathPrefix, alertManagerPath))
+	address := target
+	scheme := cfg.Scheme
+	alertsPath := path.Join("/", cfg.PathPrefix, alertManagerPath)
+	// try to extract optional scheme and alertsPath from __address__.
+	if strings.HasPrefix(address, "http://") {
+		scheme = "http"
+		address = address[len("http://"):]
+	} else if strings.HasPrefix(address, "https://") {
+		scheme = "https"
+		address = address[len("https://"):]
+	}
+	if n := strings.IndexByte(address, '/'); n >= 0 {
+		alertsPath = address[n:]
+		address = address[:n]
+	}
+	m.Add("__address__", address)
+	m.Add("__scheme__", scheme)
+	m.Add("__alerts_path__", alertsPath)
 	m.AddFrom(metaLabels)
 	return m
 }
--- a/app/vmalert/notifier/config_watcher.go
+++ b/app/vmalert/notifier/config_watcher.go
@@ -87,7 +87,7 @@ func (cw *configWatcher) reload(path string) error {
 func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
 	targets, errors := targetsFromLabels(labelsFn, cw.cfg, cw.genFn)
 	for _, err := range errors {
-		return fmt.Errorf("failed to init notifier for %q: %s", typeK, err)
+		return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
 	}

 	cw.setTargets(typeK, targets)
@@ -107,7 +107,7 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn
 			}
 			updateTargets, errors := targetsFromLabels(labelsFn, cw.cfg, cw.genFn)
 			for _, err := range errors {
-				logger.Errorf("failed to init notifier for %q: %s", typeK, err)
+				logger.Errorf("failed to init notifier for %q: %w", typeK, err)
 			}
 			cw.setTargets(typeK, updateTargets)
 		}
@@ -118,7 +118,7 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn
 func targetsFromLabels(labelsFn getLabels, cfg *Config, genFn AlertURLGenerator) ([]Target, []error) {
 	metaLabels, err := labelsFn()
 	if err != nil {
-		return nil, []error{fmt.Errorf("failed to get labels: %s", err)}
+		return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
 	}
 	var targets []Target
 	var errors []error
@@ -167,11 +167,11 @@ func (cw *configWatcher) start() error {
 			for _, target := range cfg.Targets {
 				address, labels, err := parseLabels(target, nil, cw.cfg)
 				if err != nil {
-					return fmt.Errorf("failed to parse labels for target %q: %s", target, err)
+					return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
 				}
 				notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
 				if err != nil {
-					return fmt.Errorf("failed to init alertmanager for addr %q: %s", address, err)
+					return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
 				}
 				targets = append(targets, Target{
 					Notifier: notifier,
@@ -189,14 +189,14 @@ func (cw *configWatcher) start() error {
 				sdc := &cw.cfg.ConsulSDConfigs[i]
 				targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
 				if err != nil {
-					return nil, fmt.Errorf("got labels err: %s", err)
+					return nil, fmt.Errorf("got labels err: %w", err)
 				}
 				labels = append(labels, targetLabels...)
 			}
 			return labels, nil
 		})
 		if err != nil {
-			return fmt.Errorf("failed to start consulSD discovery: %s", err)
+			return fmt.Errorf("failed to start consulSD discovery: %w", err)
 		}
 	}

@@ -207,14 +207,14 @@ func (cw *configWatcher) start() error {
 				sdc := &cw.cfg.DNSSDConfigs[i]
 				targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
 				if err != nil {
-					return nil, fmt.Errorf("got labels err: %s", err)
+					return nil, fmt.Errorf("got labels err: %w", err)
 				}
 				labels = append(labels, targetLabels...)
 			}
 			return labels, nil
 		})
 		if err != nil {
-			return fmt.Errorf("failed to start DNSSD discovery: %s", err)
+			return fmt.Errorf("failed to start DNSSD discovery: %w", err)
 		}
 	}
 	return nil
--- a/app/vmalert/notifier/config_watcher_test.go
+++ b/app/vmalert/notifier/config_watcher_test.go
@@ -318,3 +318,47 @@ func TestMergeHTTPClientConfigs(t *testing.T) {
 		t.Fatalf("expected BasicAuth tp be present")
 	}
 }
+
+func TestParseLabels(t *testing.T) {
+	testCases := []struct {
+		name            string
+		target          string
+		cfg             *Config
+		expectedAddress string
+		expectedErr     bool
+	}{
+		{
+			"invalid address",
+			"invalid:*//url",
+			&Config{},
+			"",
+			true,
+		},
+		{
+			"use some default params",
+			"alertmanager:9093",
+			&Config{PathPrefix: "test"},
+			"http://alertmanager:9093/test/api/v2/alerts",
+			false,
+		},
+		{
+			"use target address",
+			"https://alertmanager:9093/api/v1/alerts",
+			&Config{Scheme: "http", PathPrefix: "test"},
+			"https://alertmanager:9093/api/v1/alerts",
+			false,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			address, _, err := parseLabels(tc.target, nil, tc.cfg)
+			if err == nil == tc.expectedErr {
+				t.Fatalf("unexpected error; got %t; want %t", err != nil, tc.expectedErr)
+			}
+			if address != tc.expectedAddress {
+				t.Fatalf("unexpected address; got %q; want %q", address, tc.expectedAddress)
+			}
+		})
+	}
+}
--- a/app/vmalert/notifier/faker.go
+++ b/app/vmalert/notifier/faker.go
@@ -0,0 +1,59 @@
+package notifier
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"time"
+)
+
+// FakeNotifier is a mock notifier
+type FakeNotifier struct {
+	sync.Mutex
+	alerts []Alert
+	// records number of received alerts in total
+	counter int
+}
+
+// Close does nothing
+func (*FakeNotifier) Close() {}
+
+// Addr returns ""
+func (*FakeNotifier) Addr() string { return "" }
+
+// Send sets alerts and increases counter
+func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error {
+	fn.Lock()
+	defer fn.Unlock()
+	fn.counter += len(alerts)
+	fn.alerts = alerts
+	return nil
+}
+
+// GetCounter returns received alerts count
+func (fn *FakeNotifier) GetCounter() int {
+	fn.Lock()
+	defer fn.Unlock()
+	return fn.counter
+}
+
+// GetAlerts returns stored alerts
+func (fn *FakeNotifier) GetAlerts() []Alert {
+	fn.Lock()
+	defer fn.Unlock()
+	return fn.alerts
+}
+
+// FaultyNotifier is a mock notifier that Send() will return failed response
+type FaultyNotifier struct {
+	FakeNotifier
+}
+
+// Send returns failed response
+func (fn *FaultyNotifier) Send(ctx context.Context, _ []Alert, _ map[string]string) error {
+	d, ok := ctx.Deadline()
+	if ok {
+		time.Sleep(time.Until(d))
+	}
+	return fmt.Errorf("send failed")
+}
--- a/app/vmalert/notifier/init.go
+++ b/app/vmalert/notifier/init.go
@@ -19,9 +19,11 @@ var (

 	addrs = flagutil.NewArrayString("notifier.url", "Prometheus Alertmanager URL, e.g. http://127.0.0.1:9093. "+
 		"List all Alertmanager URLs if it runs in the cluster mode to ensure high availability.")
+	showNotifierURL = flag.Bool("notifier.showURL", false, "Whether to avoid stripping sensitive information such as passwords from URL in log messages or UI for -notifier.url. "+
+		"It is hidden by default, since it can contain sensitive info such as auth key")
 	blackHole = flag.Bool("notifier.blackhole", false, "Whether to blackhole alerting notifications. "+
 		"Enable this flag if you want vmalert to evaluate alerting rules without sending any notifications to external receivers (eg. alertmanager). "+
-		"`-notifier.url`, `-notifier.config` and `-notifier.blackhole` are mutually exclusive.")
+		"-notifier.url, -notifier.config and -notifier.blackhole are mutually exclusive.")

 	basicAuthUsername     = flagutil.NewArrayString("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
 	basicAuthPassword     = flagutil.NewArrayString("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
@@ -88,7 +90,7 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu
 	externalLabels = extLabels
 	eu, err := url.Parse(externalURL)
 	if err != nil {
-		return nil, fmt.Errorf("failed to parse external URL: %s", err)
+		return nil, fmt.Errorf("failed to parse external URL: %w", err)
 	}

 	templates.UpdateWithFuncs(templates.FuncsWithExternalURL(eu))
@@ -114,7 +116,7 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu
 	if len(*addrs) > 0 {
 		notifiers, err := notifiersFromFlags(gen)
 		if err != nil {
-			return nil, fmt.Errorf("failed to create notifier from flag values: %s", err)
+			return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
 		}
 		staticNotifiersFn = func() []Notifier {
 			return notifiers
@@ -124,11 +126,18 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu

 	cw, err = newWatcher(*configPath, gen)
 	if err != nil {
-		return nil, fmt.Errorf("failed to init config watcher: %s", err)
+		return nil, fmt.Errorf("failed to init config watcher: %w", err)
 	}
 	return cw.notifiers, nil
 }

+// InitSecretFlags must be called after flag.Parse and before any logging
+func InitSecretFlags() {
+	if !*showNotifierURL {
+		flagutil.RegisterSecretFlag("notifier.url")
+	}
+}
+
 func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
 	var notifiers []Notifier
 	for i, addr := range *addrs {
--- a/app/vmalert/notifier/testdata/static.good.yaml
+++ b/app/vmalert/notifier/testdata/static.good.yaml
@@ -5,6 +5,7 @@ static_configs:
  - targets:
      - localhost:9093
      - localhost:9095
+      - https://localhost:9093/test/api/v2/alerts
    basic_auth:
      username: foo
      password: bar
--- a/app/vmalert/remotewrite/client.go
+++ b/app/vmalert/remotewrite/client.go
@@ -0,0 +1,345 @@
+package remotewrite
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"flag"
+	"fmt"
+	"io"
+	"net/http"
+	"path"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/golang/snappy"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
+	"github.com/VictoriaMetrics/metrics"
+)
+
+const (
+	defaultConcurrency   = 4
+	defaultMaxBatchSize  = 1e3
+	defaultMaxQueueSize  = 1e5
+	defaultFlushInterval = 5 * time.Second
+	defaultWriteTimeout  = 30 * time.Second
+)
+
+var (
+	disablePathAppend = flag.Bool("remoteWrite.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.")
+	sendTimeout       = flag.Duration("remoteWrite.sendTimeout", 30*time.Second, "Timeout for sending data to the configured -remoteWrite.url.")
+	retryMinInterval  = flag.Duration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxInterval")
+	retryMaxTime      = flag.Duration("remoteWrite.retryMaxTime", time.Second*30, "The max time spent on retry attempts for the failed remote-write request. Change this value if it is expected for remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval")
+)
+
+// Client is an asynchronous HTTP client for writing
+// timeseries via remote write protocol.
+type Client struct {
+	addr          string
+	c             *http.Client
+	authCfg       *promauth.Config
+	input         chan prompbmarshal.TimeSeries
+	flushInterval time.Duration
+	maxBatchSize  int
+	maxQueueSize  int
+
+	wg     sync.WaitGroup
+	doneCh chan struct{}
+}
+
+// Config is config for remote write client.
+type Config struct {
+	// Addr of remote storage
+	Addr    string
+	AuthCfg *promauth.Config
+
+	// Concurrency defines number of readers that
+	// concurrently read from the queue and flush data
+	Concurrency int
+	// MaxBatchSize defines max number of timeseries
+	// to be flushed at once
+	MaxBatchSize int
+	// MaxQueueSize defines max length of input queue
+	// populated by Push method.
+	// Push will be rejected once queue is full.
+	MaxQueueSize int
+	// FlushInterval defines time interval for flushing batches
+	FlushInterval time.Duration
+	// Transport will be used by the underlying http.Client
+	Transport *http.Transport
+}
+
+// NewClient returns asynchronous client for
+// writing timeseries via remotewrite protocol.
+func NewClient(ctx context.Context, cfg Config) (*Client, error) {
+	if cfg.Addr == "" {
+		return nil, fmt.Errorf("config.Addr can't be empty")
+	}
+	if cfg.MaxBatchSize == 0 {
+		cfg.MaxBatchSize = defaultMaxBatchSize
+	}
+	if cfg.MaxQueueSize == 0 {
+		cfg.MaxQueueSize = defaultMaxQueueSize
+	}
+	if cfg.FlushInterval == 0 {
+		cfg.FlushInterval = defaultFlushInterval
+	}
+	if cfg.Transport == nil {
+		cfg.Transport = http.DefaultTransport.(*http.Transport).Clone()
+	}
+	cc := defaultConcurrency
+	if cfg.Concurrency > 0 {
+		cc = cfg.Concurrency
+	}
+	c := &Client{
+		c: &http.Client{
+			Timeout:   *sendTimeout,
+			Transport: cfg.Transport,
+		},
+		addr:          strings.TrimSuffix(cfg.Addr, "/"),
+		authCfg:       cfg.AuthCfg,
+		flushInterval: cfg.FlushInterval,
+		maxBatchSize:  cfg.MaxBatchSize,
+		maxQueueSize:  cfg.MaxQueueSize,
+		doneCh:        make(chan struct{}),
+		input:         make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
+	}
+
+	for i := 0; i < cc; i++ {
+		c.run(ctx)
+	}
+	return c, nil
+}
+
+// Push adds timeseries into queue for writing into remote storage.
+// Push returns and error if client is stopped or if queue is full.
+func (c *Client) Push(s prompbmarshal.TimeSeries) error {
+	rwTotal.Inc()
+	select {
+	case <-c.doneCh:
+		rwErrors.Inc()
+		droppedRows.Add(len(s.Samples))
+		droppedBytes.Add(s.Size())
+		return fmt.Errorf("client is closed")
+	case c.input <- s:
+		return nil
+	default:
+		rwErrors.Inc()
+		droppedRows.Add(len(s.Samples))
+		droppedBytes.Add(s.Size())
+		return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
+			"Queue size is controlled by -remoteWrite.maxQueueSize flag",
+			c.maxQueueSize)
+	}
+}
+
+// Close stops the client and waits for all goroutines
+// to exit.
+func (c *Client) Close() error {
+	if c.doneCh == nil {
+		return fmt.Errorf("client is already closed")
+	}
+	close(c.input)
+	close(c.doneCh)
+	c.wg.Wait()
+	return nil
+}
+
+func (c *Client) run(ctx context.Context) {
+	ticker := time.NewTicker(c.flushInterval)
+	wr := &prompbmarshal.WriteRequest{}
+	shutdown := func() {
+		for ts := range c.input {
+			wr.Timeseries = append(wr.Timeseries, ts)
+		}
+		lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
+		logger.Infof("shutting down remote write client and flushing remained %d series", len(wr.Timeseries))
+		c.flush(lastCtx, wr)
+		cancel()
+	}
+	c.wg.Add(1)
+	go func() {
+		defer c.wg.Done()
+		defer ticker.Stop()
+		for {
+			select {
+			case <-c.doneCh:
+				shutdown()
+				return
+			case <-ctx.Done():
+				shutdown()
+				return
+			case <-ticker.C:
+				c.flush(ctx, wr)
+			case ts, ok := <-c.input:
+				if !ok {
+					continue
+				}
+				wr.Timeseries = append(wr.Timeseries, ts)
+				if len(wr.Timeseries) >= c.maxBatchSize {
+					c.flush(ctx, wr)
+				}
+			}
+		}
+	}()
+}
+
+var (
+	rwErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
+	rwTotal  = metrics.NewCounter(`vmalert_remotewrite_total`)
+
+	sentRows            = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
+	sentBytes           = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
+	droppedRows         = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
+	droppedBytes        = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
+	sendDuration        = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
+	bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
+
+	_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
+		return float64(*concurrency)
+	})
+)
+
+// flush is a blocking function that marshals WriteRequest and sends
+// it to remote-write endpoint. Flush performs limited amount of retries
+// if request fails.
+func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
+	if len(wr.Timeseries) < 1 {
+		return
+	}
+	defer prompbmarshal.ResetWriteRequest(wr)
+	defer bufferFlushDuration.UpdateDuration(time.Now())
+
+	data, err := wr.Marshal()
+	if err != nil {
+		logger.Errorf("failed to marshal WriteRequest: %s", err)
+		return
+	}
+
+	b := snappy.Encode(nil, data)
+
+	retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
+	if retryInterval > maxRetryInterval {
+		retryInterval = maxRetryInterval
+	}
+	timeStart := time.Now()
+	defer func() {
+		sendDuration.Add(time.Since(timeStart).Seconds())
+	}()
+L:
+	for attempts := 0; ; attempts++ {
+		err := c.send(ctx, b)
+		if errors.Is(err, io.EOF) {
+			// Something in the middle between client and destination might be closing
+			// the connection. So we do a one more attempt in hope request will succeed.
+			err = c.send(ctx, b)
+		}
+		if err == nil {
+			sentRows.Add(len(wr.Timeseries))
+			sentBytes.Add(len(b))
+			return
+		}
+
+		_, isNotRetriable := err.(*nonRetriableError)
+		logger.Warnf("attempt %d to send request failed: %s (retriable: %v)", attempts+1, err, !isNotRetriable)
+
+		if isNotRetriable {
+			// exit fast if error isn't retriable
+			break
+		}
+
+		// check if request has been cancelled before backoff
+		select {
+		case <-ctx.Done():
+			logger.Errorf("interrupting retry attempt %d: context cancelled", attempts+1)
+			break L
+		default:
+		}
+
+		timeLeftForRetries := maxRetryInterval - time.Since(timeStart)
+		if timeLeftForRetries < 0 {
+			// the max retry time has passed, so we give up
+			break
+		}
+
+		if retryInterval > timeLeftForRetries {
+			retryInterval = timeLeftForRetries
+		}
+		// sleeping to prevent remote db hammering
+		time.Sleep(retryInterval)
+		retryInterval *= 2
+
+	}
+
+	rwErrors.Inc()
+	droppedRows.Add(len(wr.Timeseries))
+	droppedBytes.Add(len(b))
+	logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
+		len(wr.Timeseries))
+}
+
+func (c *Client) send(ctx context.Context, data []byte) error {
+	r := bytes.NewReader(data)
+	req, err := http.NewRequest(http.MethodPost, c.addr, r)
+	if err != nil {
+		return fmt.Errorf("failed to create new HTTP request: %w", err)
+	}
+
+	// RFC standard compliant headers
+	req.Header.Set("Content-Encoding", "snappy")
+	req.Header.Set("Content-Type", "application/x-protobuf")
+
+	// Prometheus compliant headers
+	req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
+
+	if c.authCfg != nil {
+		err = c.authCfg.SetHeaders(req, true)
+		if err != nil {
+			return &nonRetriableError{
+				err: err,
+			}
+		}
+	}
+	if !*disablePathAppend {
+		req.URL.Path = path.Join(req.URL.Path, "/api/v1/write")
+	}
+	resp, err := c.c.Do(req.WithContext(ctx))
+	if err != nil {
+		return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
+			req.URL.Redacted(), err, len(data), r.Size())
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	body, _ := io.ReadAll(resp.Body)
+
+	// according to https://prometheus.io/docs/concepts/remote_write_spec/
+	// Prometheus remote Write compatible receivers MUST
+	switch resp.StatusCode / 100 {
+	case 2:
+		// respond with HTTP 2xx status code when write is successful.
+		return nil
+	case 4:
+		if resp.StatusCode != http.StatusTooManyRequests {
+			// MUST NOT retry write requests on HTTP 4xx responses other than 429
+			return &nonRetriableError{
+				err: fmt.Errorf("unexpected response code %d for %s. Response body %q", resp.StatusCode, req.URL.Redacted(), body),
+			}
+		}
+		fallthrough
+	default:
+		return fmt.Errorf("unexpected response code %d for %s. Response body %q",
+			resp.StatusCode, req.URL.Redacted(), body)
+	}
+}
+
+type nonRetriableError struct {
+	err error
+}
+
+func (e *nonRetriableError) Error() string {
+	return e.err.Error()
+}
--- a/app/vmalert/remotewrite/remotewrite_test.go
+++ b/app/vmalert/remotewrite/remotewrite_test.go
--- a/app/vmalert/remotewrite/debug_client.go
+++ b/app/vmalert/remotewrite/debug_client.go
@@ -0,0 +1,97 @@
+package remotewrite
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"net/http"
+	"path"
+	"strings"
+	"sync"
+
+	"github.com/golang/snappy"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
+)
+
+// DebugClient won't push series periodically, but will write data to remote endpoint
+// immediately when Push() is called
+type DebugClient struct {
+	addr string
+	c    *http.Client
+
+	wg sync.WaitGroup
+}
+
+// NewDebugClient initiates and returns a new DebugClient
+func NewDebugClient() (*DebugClient, error) {
+	if *addr == "" {
+		return nil, nil
+	}
+
+	t, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create transport: %w", err)
+	}
+	c := &DebugClient{
+		c: &http.Client{
+			Timeout:   *sendTimeout,
+			Transport: t,
+		},
+		addr: strings.TrimSuffix(*addr, "/"),
+	}
+	return c, nil
+}
+
+// Push sends the given timeseries to the remote storage.
+func (c *DebugClient) Push(s prompbmarshal.TimeSeries) error {
+	c.wg.Add(1)
+	defer c.wg.Done()
+	wr := &prompbmarshal.WriteRequest{Timeseries: []prompbmarshal.TimeSeries{s}}
+	data, err := wr.Marshal()
+	if err != nil {
+		return fmt.Errorf("failed to marshal the given time series: %w", err)
+	}
+
+	return c.send(data)
+}
+
+// Close stops the DebugClient
+func (c *DebugClient) Close() error {
+	c.wg.Wait()
+	return nil
+}
+
+func (c *DebugClient) send(data []byte) error {
+	b := snappy.Encode(nil, data)
+	r := bytes.NewReader(b)
+	req, err := http.NewRequest(http.MethodPost, c.addr, r)
+	if err != nil {
+		return fmt.Errorf("failed to create new HTTP request: %w", err)
+	}
+
+	// RFC standard compliant headers
+	req.Header.Set("Content-Encoding", "snappy")
+	req.Header.Set("Content-Type", "application/x-protobuf")
+
+	// Prometheus compliant headers
+	req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
+
+	if !*disablePathAppend {
+		req.URL.Path = path.Join(req.URL.Path, "/api/v1/write")
+	}
+	resp, err := c.c.Do(req)
+	if err != nil {
+		return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
+			req.URL.Redacted(), err, len(data), r.Size())
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	if resp.StatusCode/100 == 2 {
+		return nil
+	}
+	body, _ := io.ReadAll(resp.Body)
+	return fmt.Errorf("unexpected response code %d for %s. Response body %q",
+		resp.StatusCode, req.URL.Redacted(), body)
+}
--- a/app/vmalert/remotewrite/debug_client_test.go
+++ b/app/vmalert/remotewrite/debug_client_test.go
@@ -0,0 +1,50 @@
+package remotewrite
+
+import (
+	"testing"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
+)
+
+func TestDebugClient_Push(t *testing.T) {
+	testSrv := newRWServer()
+	oldAddr := *addr
+	*addr = testSrv.URL
+	defer func() {
+		*addr = oldAddr
+	}()
+
+	client, err := NewDebugClient()
+	if err != nil {
+		t.Fatalf("failed to create debug client: %s", err)
+	}
+
+	const rowsN = 100
+	var sent int
+	for i := 0; i < rowsN; i++ {
+		s := prompbmarshal.TimeSeries{
+			Samples: []prompbmarshal.Sample{{
+				Value:     float64(i),
+				Timestamp: time.Now().Unix(),
+			}},
+		}
+		err := client.Push(s)
+		if err != nil {
+			t.Fatalf("unexpected err: %s", err)
+		}
+		if err == nil {
+			sent++
+		}
+	}
+	if sent == 0 {
+		t.Fatalf("0 series sent")
+	}
+	if err := client.Close(); err != nil {
+		t.Fatalf("failed to close client: %s", err)
+	}
+	got := testSrv.accepted()
+	if got != sent {
+		t.Fatalf("expected to have %d series; got %d", sent, got)
+	}
+}
--- a/app/vmalert/remotewrite/init.go
+++ b/app/vmalert/remotewrite/init.go
@@ -30,7 +30,7 @@ var (

 	maxQueueSize  = flag.Int("remoteWrite.maxQueueSize", 1e5, "Defines the max number of pending datapoints to remote write endpoint")
 	maxBatchSize  = flag.Int("remoteWrite.maxBatchSize", 1e3, "Defines max number of timeseries to be flushed at once")
-	concurrency   = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote querier")
+	concurrency   = flag.Int("remoteWrite.concurrency", 1, "Defines number of writers for concurrent writing into remote write endpoint")
 	flushInterval = flag.Duration("remoteWrite.flushInterval", 5*time.Second, "Defines interval of flushes to remote write endpoint")

 	tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
--- a/app/vmalert/remotewrite/remotewrite.go
+++ b/app/vmalert/remotewrite/remotewrite.go
@@ -1,322 +1,13 @@
 package remotewrite

 import (
-	"bytes"
-	"context"
-	"flag"
-	"fmt"
-	"io"
-	"net/http"
-	"path"
-	"strings"
-	"sync"
-	"time"
-
-	"github.com/golang/snappy"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
-	"github.com/VictoriaMetrics/metrics"
 )

-var (
-	disablePathAppend = flag.Bool("remoteWrite.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.")
-	sendTimeout       = flag.Duration("remoteWrite.sendTimeout", 30*time.Second, "Timeout for sending data to the configured -remoteWrite.url.")
-	retryMinInterval  = flag.Duration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxInterval")
-	retryMaxTime      = flag.Duration("remoteWrite.retryMaxTime", time.Second*30, "The max time spent on retry attempts for the failed remote-write request. Change this value if it is expected for remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval")
-)
-
-// Client is an asynchronous HTTP client for writing
-// timeseries via remote write protocol.
-type Client struct {
-	addr          string
-	c             *http.Client
-	authCfg       *promauth.Config
-	input         chan prompbmarshal.TimeSeries
-	flushInterval time.Duration
-	maxBatchSize  int
-	maxQueueSize  int
-
-	wg     sync.WaitGroup
-	doneCh chan struct{}
-}
-
-// Config is config for remote write.
-type Config struct {
-	// Addr of remote storage
-	Addr    string
-	AuthCfg *promauth.Config
-
-	// Concurrency defines number of readers that
-	// concurrently read from the queue and flush data
-	Concurrency int
-	// MaxBatchSize defines max number of timeseries
-	// to be flushed at once
-	MaxBatchSize int
-	// MaxQueueSize defines max length of input queue
-	// populated by Push method.
-	// Push will be rejected once queue is full.
-	MaxQueueSize int
-	// FlushInterval defines time interval for flushing batches
-	FlushInterval time.Duration
-	// Transport will be used by the underlying http.Client
-	Transport *http.Transport
-}
-
-const (
-	defaultConcurrency   = 4
-	defaultMaxBatchSize  = 1e3
-	defaultMaxQueueSize  = 1e5
-	defaultFlushInterval = 5 * time.Second
-	defaultWriteTimeout  = 30 * time.Second
-)
-
-// NewClient returns asynchronous client for
-// writing timeseries via remotewrite protocol.
-func NewClient(ctx context.Context, cfg Config) (*Client, error) {
-	if cfg.Addr == "" {
-		return nil, fmt.Errorf("config.Addr can't be empty")
-	}
-	if cfg.MaxBatchSize == 0 {
-		cfg.MaxBatchSize = defaultMaxBatchSize
-	}
-	if cfg.MaxQueueSize == 0 {
-		cfg.MaxQueueSize = defaultMaxQueueSize
-	}
-	if cfg.FlushInterval == 0 {
-		cfg.FlushInterval = defaultFlushInterval
-	}
-	if cfg.Transport == nil {
-		cfg.Transport = http.DefaultTransport.(*http.Transport).Clone()
-	}
-	cc := defaultConcurrency
-	if cfg.Concurrency > 0 {
-		cc = cfg.Concurrency
-	}
-	c := &Client{
-		c: &http.Client{
-			Timeout:   *sendTimeout,
-			Transport: cfg.Transport,
-		},
-		addr:          strings.TrimSuffix(cfg.Addr, "/"),
-		authCfg:       cfg.AuthCfg,
-		flushInterval: cfg.FlushInterval,
-		maxBatchSize:  cfg.MaxBatchSize,
-		maxQueueSize:  cfg.MaxQueueSize,
-		doneCh:        make(chan struct{}),
-		input:         make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
-	}
-
-	for i := 0; i < cc; i++ {
-		c.run(ctx)
-	}
-	return c, nil
-}
-
-// Push adds timeseries into queue for writing into remote storage.
-// Push returns and error if client is stopped or if queue is full.
-func (c *Client) Push(s prompbmarshal.TimeSeries) error {
-	select {
-	case <-c.doneCh:
-		return fmt.Errorf("client is closed")
-	case c.input <- s:
-		return nil
-	default:
-		return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
-			"Queue size is controlled by -remoteWrite.maxQueueSize flag",
-			c.maxQueueSize)
-	}
-}
-
-// Close stops the client and waits for all goroutines
-// to exit.
-func (c *Client) Close() error {
-	if c.doneCh == nil {
-		return fmt.Errorf("client is already closed")
-	}
-	close(c.input)
-	close(c.doneCh)
-	c.wg.Wait()
-	return nil
-}
-
-func (c *Client) run(ctx context.Context) {
-	ticker := time.NewTicker(c.flushInterval)
-	wr := &prompbmarshal.WriteRequest{}
-	shutdown := func() {
-		for ts := range c.input {
-			wr.Timeseries = append(wr.Timeseries, ts)
-		}
-		lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
-		logger.Infof("shutting down remote write client and flushing remained %d series", len(wr.Timeseries))
-		c.flush(lastCtx, wr)
-		cancel()
-	}
-	c.wg.Add(1)
-	go func() {
-		defer c.wg.Done()
-		defer ticker.Stop()
-		for {
-			select {
-			case <-c.doneCh:
-				shutdown()
-				return
-			case <-ctx.Done():
-				shutdown()
-				return
-			case <-ticker.C:
-				c.flush(ctx, wr)
-			case ts, ok := <-c.input:
-				if !ok {
-					continue
-				}
-				wr.Timeseries = append(wr.Timeseries, ts)
-				if len(wr.Timeseries) >= c.maxBatchSize {
-					c.flush(ctx, wr)
-				}
-			}
-		}
-	}()
-}
-
-var (
-	sentRows            = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
-	sentBytes           = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
-	sendDuration        = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
-	droppedRows         = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
-	droppedBytes        = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
-	bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
-
-	_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
-		return float64(*concurrency)
-	})
-)
-
-// flush is a blocking function that marshals WriteRequest and sends
-// it to remote-write endpoint. Flush performs limited amount of retries
-// if request fails.
-func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
-	if len(wr.Timeseries) < 1 {
-		return
-	}
-	defer prompbmarshal.ResetWriteRequest(wr)
-	defer bufferFlushDuration.UpdateDuration(time.Now())
-
-	data, err := wr.Marshal()
-	if err != nil {
-		logger.Errorf("failed to marshal WriteRequest: %s", err)
-		return
-	}
-
-	b := snappy.Encode(nil, data)
-
-	retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
-	if retryInterval > maxRetryInterval {
-		retryInterval = maxRetryInterval
-	}
-	timeStart := time.Now()
-	defer func() {
-		sendDuration.Add(time.Since(timeStart).Seconds())
-	}()
-L:
-	for attempts := 0; ; attempts++ {
-		err := c.send(ctx, b)
-		if err == nil {
-			sentRows.Add(len(wr.Timeseries))
-			sentBytes.Add(len(b))
-			return
-		}
-
-		_, isNotRetriable := err.(*nonRetriableError)
-		logger.Warnf("attempt %d to send request failed: %s (retriable: %v)", attempts+1, err, !isNotRetriable)
-
-		if isNotRetriable {
-			// exit fast if error isn't retriable
-			break
-		}
-
-		// check if request has been cancelled before backoff
-		select {
-		case <-ctx.Done():
-			logger.Errorf("interrupting retry attempt %d: context cancelled", attempts+1)
-			break L
-		default:
-		}
-
-		timeLeftForRetries := maxRetryInterval - time.Since(timeStart)
-		if timeLeftForRetries < 0 {
-			// the max retry time has passed, so we give up
-			break
-		}
-
-		if retryInterval > timeLeftForRetries {
-			retryInterval = timeLeftForRetries
-		}
-		// sleeping to prevent remote db hammering
-		time.Sleep(retryInterval)
-		retryInterval *= 2
-
-	}
-
-	droppedRows.Add(len(wr.Timeseries))
-	droppedBytes.Add(len(b))
-	logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
-		len(wr.Timeseries))
-}
-
-func (c *Client) send(ctx context.Context, data []byte) error {
-	r := bytes.NewReader(data)
-	req, err := http.NewRequest(http.MethodPost, c.addr, r)
-	if err != nil {
-		return fmt.Errorf("failed to create new HTTP request: %w", err)
-	}
-
-	// RFC standard compliant headers
-	req.Header.Set("Content-Encoding", "snappy")
-	req.Header.Set("Content-Type", "application/x-protobuf")
-
-	// Prometheus compliant headers
-	req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
-
-	if c.authCfg != nil {
-		c.authCfg.SetHeaders(req, true)
-	}
-	if !*disablePathAppend {
-		req.URL.Path = path.Join(req.URL.Path, "/api/v1/write")
-	}
-	resp, err := c.c.Do(req.WithContext(ctx))
-	if err != nil {
-		return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
-			req.URL.Redacted(), err, len(data), r.Size())
-	}
-	defer func() { _ = resp.Body.Close() }()
-
-	body, _ := io.ReadAll(resp.Body)
-
-	// according to https://prometheus.io/docs/concepts/remote_write_spec/
-	// Prometheus remote Write compatible receivers MUST
-	switch resp.StatusCode / 100 {
-	case 2:
-		// respond with a HTTP 2xx status code when the write is successful.
-		return nil
-	case 4:
-		if resp.StatusCode != http.StatusTooManyRequests {
-			// MUST NOT retry write requests on HTTP 4xx responses other than 429
-			return &nonRetriableError{fmt.Errorf("unexpected response code %d for %s. Response body %q",
-				resp.StatusCode, req.URL.Redacted(), body)}
-		}
-		fallthrough
-	default:
-		return fmt.Errorf("unexpected response code %d for %s. Response body %q",
-			resp.StatusCode, req.URL.Redacted(), body)
-	}
-}
-
-type nonRetriableError struct {
-	err error
-}
-
-func (e *nonRetriableError) Error() string {
-	return e.err.Error()
+// RWClient represents an HTTP client for pushing data via remote write protocol
+type RWClient interface {
+	// Push pushes the give time series to remote storage
+	Push(s prompbmarshal.TimeSeries) error
+	// Close stops the client. Client can't be reused after Close call.
+	Close() error
 }
--- a/app/vmalert/replay.go
+++ b/app/vmalert/replay.go
@@ -1,19 +1,16 @@
 package main

 import (
-	"context"
 	"flag"
 	"fmt"
 	"strings"
 	"time"

-	"github.com/cheggaaa/pb/v3"
-
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
 )

 var (
@@ -33,17 +30,17 @@ var (
 		"Progress bar rendering might be verbose or break the logs parsing, so it is recommended to be disabled when not used in interactive mode.")
 )

-func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw *remotewrite.Client) error {
+func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewrite.RWClient) error {
 	if *replayMaxDatapoints < 1 {
 		return fmt.Errorf("replay.maxDatapointsPerQuery can't be lower than 1")
 	}
 	tFrom, err := time.Parse(time.RFC3339, *replayFrom)
 	if err != nil {
-		return fmt.Errorf("failed to parse %q: %s", *replayFrom, err)
+		return fmt.Errorf("failed to parse %q: %w", *replayFrom, err)
 	}
 	tTo, err := time.Parse(time.RFC3339, *replayTo)
 	if err != nil {
-		return fmt.Errorf("failed to parse %q: %s", *replayTo, err)
+		return fmt.Errorf("failed to parse %q: %w", *replayTo, err)
 	}
 	if !tTo.After(tFrom) {
 		return fmt.Errorf("replay.timeTo must be bigger than replay.timeFrom")
@@ -68,8 +65,8 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw *remotewr

 	var total int
 	for _, cfg := range groupsCfg {
-		ng := newGroup(cfg, qb, *evaluationInterval, labels)
-		total += ng.replay(tFrom, tTo, rw)
+		ng := rule.NewGroup(cfg, qb, *evaluationInterval, labels)
+		total += ng.Replay(tFrom, tTo, rw, *replayMaxDatapoints, *replayRuleRetryAttempts, *replayRulesDelay, *disableProgressBar)
 	}
 	logger.Infof("replay finished! Imported %d samples", total)
 	if rw != nil {
@@ -77,97 +74,3 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw *remotewr
 	}
 	return nil
 }
-
-func (g *Group) replay(start, end time.Time, rw *remotewrite.Client) int {
-	var total int
-	step := g.Interval * time.Duration(*replayMaxDatapoints)
-	ri := rangeIterator{start: start, end: end, step: step}
-	iterations := int(end.Sub(start)/step) + 1
-	fmt.Printf("\nGroup %q"+
-		"\ninterval: \t%v"+
-		"\nrequests to make: \t%d"+
-		"\nmax range per request: \t%v\n",
-		g.Name, g.Interval, iterations, step)
-	if g.Limit > 0 {
-		fmt.Printf("\nPlease note, `limit: %d` param has no effect during replay.\n",
-			g.Limit)
-	}
-	for _, rule := range g.Rules {
-		fmt.Printf("> Rule %q (ID: %d)\n", rule, rule.ID())
-		var bar *pb.ProgressBar
-		if !*disableProgressBar {
-			bar = pb.StartNew(iterations)
-		}
-		ri.reset()
-		for ri.next() {
-			n, err := replayRule(rule, ri.s, ri.e, rw)
-			if err != nil {
-				logger.Fatalf("rule %q: %s", rule, err)
-			}
-			total += n
-			if bar != nil {
-				bar.Increment()
-			}
-		}
-		if bar != nil {
-			bar.Finish()
-		}
-		// sleep to let remote storage to flush data on-disk
-		// so chained rules could be calculated correctly
-		time.Sleep(*replayRulesDelay)
-	}
-	return total
-}
-
-func replayRule(rule Rule, start, end time.Time, rw *remotewrite.Client) (int, error) {
-	var err error
-	var tss []prompbmarshal.TimeSeries
-	for i := 0; i < *replayRuleRetryAttempts; i++ {
-		tss, err = rule.ExecRange(context.Background(), start, end)
-		if err == nil {
-			break
-		}
-		logger.Errorf("attempt %d to execute rule %q failed: %s", i+1, rule, err)
-		time.Sleep(time.Second)
-	}
-	if err != nil { // means all attempts failed
-		return 0, err
-	}
-	if len(tss) < 1 {
-		return 0, nil
-	}
-	var n int
-	for _, ts := range tss {
-		if err := rw.Push(ts); err != nil {
-			return n, fmt.Errorf("remote write failure: %s", err)
-		}
-		n += len(ts.Samples)
-	}
-	return n, nil
-}
-
-type rangeIterator struct {
-	step       time.Duration
-	start, end time.Time
-
-	iter int
-	s, e time.Time
-}
-
-func (ri *rangeIterator) reset() {
-	ri.iter = 0
-	ri.s, ri.e = time.Time{}, time.Time{}
-}
-
-func (ri *rangeIterator) next() bool {
-	ri.s = ri.start.Add(ri.step * time.Duration(ri.iter))
-	if !ri.end.After(ri.s) {
-		return false
-	}
-	ri.e = ri.s.Add(ri.step)
-	if ri.e.After(ri.end) {
-		ri.e = ri.end
-	}
-	ri.iter++
-	return true
-}
--- a/app/vmalert/replay_test.go
+++ b/app/vmalert/replay_test.go
@@ -12,7 +12,7 @@ import (
 )

 type fakeReplayQuerier struct {
-	fakeQuerier
+	datasource.FakeQuerier
 	registry map[string]map[string]struct{}
 }

@@ -170,81 +170,3 @@ func TestReplay(t *testing.T) {
 		})
 	}
 }
-
-func TestRangeIterator(t *testing.T) {
-	testCases := []struct {
-		ri     rangeIterator
-		result [][2]time.Time
-	}{
-		{
-			ri: rangeIterator{
-				start: parseTime(t, "2021-01-01T12:00:00.000Z"),
-				end:   parseTime(t, "2021-01-01T12:30:00.000Z"),
-				step:  5 * time.Minute,
-			},
-			result: [][2]time.Time{
-				{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:05:00.000Z")},
-				{parseTime(t, "2021-01-01T12:05:00.000Z"), parseTime(t, "2021-01-01T12:10:00.000Z")},
-				{parseTime(t, "2021-01-01T12:10:00.000Z"), parseTime(t, "2021-01-01T12:15:00.000Z")},
-				{parseTime(t, "2021-01-01T12:15:00.000Z"), parseTime(t, "2021-01-01T12:20:00.000Z")},
-				{parseTime(t, "2021-01-01T12:20:00.000Z"), parseTime(t, "2021-01-01T12:25:00.000Z")},
-				{parseTime(t, "2021-01-01T12:25:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
-			},
-		},
-		{
-			ri: rangeIterator{
-				start: parseTime(t, "2021-01-01T12:00:00.000Z"),
-				end:   parseTime(t, "2021-01-01T12:30:00.000Z"),
-				step:  45 * time.Minute,
-			},
-			result: [][2]time.Time{
-				{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
-				{parseTime(t, "2021-01-01T12:30:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
-			},
-		},
-		{
-			ri: rangeIterator{
-				start: parseTime(t, "2021-01-01T12:00:12.000Z"),
-				end:   parseTime(t, "2021-01-01T12:00:17.000Z"),
-				step:  time.Second,
-			},
-			result: [][2]time.Time{
-				{parseTime(t, "2021-01-01T12:00:12.000Z"), parseTime(t, "2021-01-01T12:00:13.000Z")},
-				{parseTime(t, "2021-01-01T12:00:13.000Z"), parseTime(t, "2021-01-01T12:00:14.000Z")},
-				{parseTime(t, "2021-01-01T12:00:14.000Z"), parseTime(t, "2021-01-01T12:00:15.000Z")},
-				{parseTime(t, "2021-01-01T12:00:15.000Z"), parseTime(t, "2021-01-01T12:00:16.000Z")},
-				{parseTime(t, "2021-01-01T12:00:16.000Z"), parseTime(t, "2021-01-01T12:00:17.000Z")},
-			},
-		},
-	}
-
-	for i, tc := range testCases {
-		t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
-			var j int
-			for tc.ri.next() {
-				if len(tc.result) < j+1 {
-					t.Fatalf("unexpected result for iterator on step %d: %v - %v",
-						j, tc.ri.s, tc.ri.e)
-				}
-				s, e := tc.ri.s, tc.ri.e
-				expS, expE := tc.result[j][0], tc.result[j][1]
-				if s != expS {
-					t.Fatalf("expected to get start=%v; got %v", expS, s)
-				}
-				if e != expE {
-					t.Fatalf("expected to get end=%v; got %v", expE, e)
-				}
-				j++
-			}
-		})
-	}
-}
-
-func parseTime(t *testing.T, s string) time.Time {
-	t.Helper()
-	tt, err := time.Parse("2006-01-02T15:04:05.000Z", s)
-	if err != nil {
-		t.Fatal(err)
-	}
-	return tt
-}
--- a/app/vmalert/rule.go
+++ b/app/vmalert/rule.go
@@ -1,118 +0,0 @@
-package main
-
-import (
-	"context"
-	"errors"
-	"sync"
-	"time"
-
-	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
-)
-
-// Rule represents alerting or recording rule
-// that has unique ID, can be Executed and
-// updated with other Rule.
-type Rule interface {
-	// ID returns unique ID that may be used for
-	// identifying this Rule among others.
-	ID() uint64
-	// Exec executes the rule with given context at the given timestamp and limit.
-	// returns an err if number of resulting time series exceeds the limit.
-	Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error)
-	// ExecRange executes the rule on the given time range.
-	ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error)
-	// UpdateWith performs modification of current Rule
-	// with fields of the given Rule.
-	UpdateWith(Rule) error
-	// ToAPI converts Rule into APIRule
-	ToAPI() APIRule
-	// Close performs the shutdown procedures for rule
-	// such as metrics unregister
-	Close()
-}
-
-var errDuplicate = errors.New("result contains metrics with the same labelset after applying rule labels. See https://docs.victoriametrics.com/vmalert.html#series-with-the-same-labelset for details")
-
-type ruleState struct {
-	sync.RWMutex
-	entries []ruleStateEntry
-	cur     int
-}
-
-type ruleStateEntry struct {
-	// stores last moment of time rule.Exec was called
-	time time.Time
-	// stores the timesteamp with which rule.Exec was called
-	at time.Time
-	// stores the duration of the last rule.Exec call
-	duration time.Duration
-	// stores last error that happened in Exec func
-	// resets on every successful Exec
-	// may be used as Health ruleState
-	err error
-	// stores the number of samples returned during
-	// the last evaluation
-	samples int
-	// stores the number of time series fetched during
-	// the last evaluation.
-	// Is supported by VictoriaMetrics only, starting from v1.90.0
-	// If seriesFetched == nil, then this attribute was missing in
-	// datasource response (unsupported).
-	seriesFetched *int
-	// stores the curl command reflecting the HTTP request used during rule.Exec
-	curl string
-}
-
-func newRuleState(size int) *ruleState {
-	if size < 1 {
-		size = 1
-	}
-	return &ruleState{
-		entries: make([]ruleStateEntry, size),
-	}
-}
-
-func (s *ruleState) getLast() ruleStateEntry {
-	s.RLock()
-	defer s.RUnlock()
-	return s.entries[s.cur]
-}
-
-func (s *ruleState) size() int {
-	s.RLock()
-	defer s.RUnlock()
-	return len(s.entries)
-}
-
-func (s *ruleState) getAll() []ruleStateEntry {
-	entries := make([]ruleStateEntry, 0)
-
-	s.RLock()
-	defer s.RUnlock()
-
-	cur := s.cur
-	for {
-		e := s.entries[cur]
-		if !e.time.IsZero() || !e.at.IsZero() {
-			entries = append(entries, e)
-		}
-		cur--
-		if cur < 0 {
-			cur = cap(s.entries) - 1
-		}
-		if cur == s.cur {
-			return entries
-		}
-	}
-}
-
-func (s *ruleState) add(e ruleStateEntry) {
-	s.Lock()
-	defer s.Unlock()
-
-	s.cur++
-	if s.cur > cap(s.entries)-1 {
-		s.cur = 0
-	}
-	s.entries[s.cur] = e
-}
--- a/app/vmalert/rule/alerting.go
+++ b/app/vmalert/rule/alerting.go
@@ -1,11 +1,10 @@
-package main
+package rule

 import (
 	"context"
 	"fmt"
 	"hash/fnv"
 	"sort"
-	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -31,6 +30,7 @@ type AlertingRule struct {
 	Annotations   map[string]string
 	GroupID       uint64
 	GroupName     string
+	File          string
 	EvalInterval  time.Duration
 	Debug         bool

@@ -55,7 +55,8 @@ type alertingRuleMetrics struct {
 	seriesFetched *utils.Gauge
 }

-func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
+// NewAlertingRule creates a new AlertingRule
+func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
 	ar := &AlertingRule{
 		Type:          group.Type,
 		RuleID:        cfg.ID,
@@ -67,6 +68,7 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
 		Annotations:   cfg.Annotations,
 		GroupID:       group.ID(),
 		GroupName:     group.Name,
+		File:          group.File,
 		EvalInterval:  group.Interval,
 		Debug:         cfg.Debug,
 		q: qb.BuildWithParams(datasource.QuerierParams{
@@ -80,13 +82,18 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
 		metrics: &alertingRuleMetrics{},
 	}

+	entrySize := *ruleUpdateEntriesLimit
 	if cfg.UpdateEntriesLimit != nil {
-		ar.state = newRuleState(*cfg.UpdateEntriesLimit)
-	} else {
-		ar.state = newRuleState(*ruleUpdateEntriesLimit)
+		entrySize = *cfg.UpdateEntriesLimit
+	}
+	if entrySize < 1 {
+		entrySize = 1
+	}
+	ar.state = &ruleState{
+		entries: make([]StateEntry, entrySize),
 	}

-	labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
+	labels := fmt.Sprintf(`alertname=%q, group=%q, file=%q, id="%d"`, ar.Name, group.Name, group.File, ar.ID())
 	ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
 		func() float64 {
 			ar.alertsMu.RLock()
@@ -114,7 +121,7 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
 	ar.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_error{%s}`, labels),
 		func() float64 {
 			e := ar.state.getLast()
-			if e.err == nil {
+			if e.Err == nil {
 				return 0
 			}
 			return 1
@@ -122,28 +129,28 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
 	ar.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_samples{%s}`, labels),
 		func() float64 {
 			e := ar.state.getLast()
-			return float64(e.samples)
+			return float64(e.Samples)
 		})
 	ar.metrics.seriesFetched = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_series_fetched{%s}`, labels),
 		func() float64 {
 			e := ar.state.getLast()
-			if e.seriesFetched == nil {
+			if e.SeriesFetched == nil {
 				// means seriesFetched is unsupported
 				return -1
 			}
-			seriesFetched := float64(*e.seriesFetched)
-			if seriesFetched == 0 && e.samples > 0 {
+			seriesFetched := float64(*e.SeriesFetched)
+			if seriesFetched == 0 && e.Samples > 0 {
 				// `alert: 0.95` will fetch no series
 				// but will get one time series in response.
-				seriesFetched = float64(e.samples)
+				seriesFetched = float64(e.Samples)
 			}
 			return seriesFetched
 		})
 	return ar
 }

-// Close unregisters rule metrics
-func (ar *AlertingRule) Close() {
+// close unregisters rule metrics
+func (ar *AlertingRule) close() {
 	ar.metrics.active.Unregister()
 	ar.metrics.pending.Unregister()
 	ar.metrics.errors.Unregister()
@@ -162,6 +169,27 @@ func (ar *AlertingRule) ID() uint64 {
 	return ar.RuleID
 }

+// GetAlerts returns active alerts of rule
+func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
+	ar.alertsMu.RLock()
+	defer ar.alertsMu.RUnlock()
+	var alerts []*notifier.Alert
+	for _, a := range ar.alerts {
+		alerts = append(alerts, a)
+	}
+	return alerts
+}
+
+// GetAlert returns alert if id exists
+func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
+	ar.alertsMu.RLock()
+	defer ar.alertsMu.RUnlock()
+	if ar.alerts == nil {
+		return nil
+	}
+	return ar.alerts[id]
+}
+
 func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...interface{}) {
 	if !ar.Debug {
 		return
@@ -188,6 +216,26 @@ func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string
 	logger.Infof("%s", prefix+msg)
 }

+// updateWith copies all significant fields.
+// alerts state isn't copied since
+// it should be updated in next 2 Execs
+func (ar *AlertingRule) updateWith(r Rule) error {
+	nr, ok := r.(*AlertingRule)
+	if !ok {
+		return fmt.Errorf("BUG: attempt to update alerting rule with wrong type %#v", r)
+	}
+	ar.Expr = nr.Expr
+	ar.For = nr.For
+	ar.KeepFiringFor = nr.KeepFiringFor
+	ar.Labels = nr.Labels
+	ar.Annotations = nr.Annotations
+	ar.EvalInterval = nr.EvalInterval
+	ar.Debug = nr.Debug
+	ar.q = nr.q
+	ar.state = nr.state
+	return nil
+}
+
 type labelSet struct {
 	// origin labels extracted from received time series
 	// plus extra labels (group labels, service labels like alertNameLabel).
@@ -223,7 +271,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
 		Expr:   ar.Expr,
 	})
 	if err != nil {
-		return nil, fmt.Errorf("failed to expand labels: %s", err)
+		return nil, fmt.Errorf("failed to expand labels: %w", err)
 	}
 	for k, v := range extraLabels {
 		ls.processed[k] = v
@@ -248,25 +296,34 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
 	return ls, nil
 }

-// ExecRange executes alerting rule on the given time range similarly to Exec.
-// It doesn't update internal states of the Rule and meant to be used just
-// to get time series for backfilling.
-// It returns ALERT and ALERT_FOR_STATE time series as result.
-func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
+// execRange executes alerting rule on the given time range similarly to exec.
+// When making consecutive calls make sure to respect time linearity for start and end params,
+// as this function modifies AlertingRule alerts state.
+// It is not thread safe.
+// It returns ALERT and ALERT_FOR_STATE time series as a result.
+func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
 	res, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
 	if err != nil {
 		return nil, err
 	}
 	var result []prompbmarshal.TimeSeries
+	holdAlertState := make(map[uint64]*notifier.Alert)
 	qFn := func(query string) ([]datasource.Metric, error) {
 		return nil, fmt.Errorf("`query` template isn't supported in replay mode")
 	}
 	for _, s := range res.Data {
+		ls, err := ar.toLabels(s, qFn)
+		if err != nil {
+			return nil, fmt.Errorf("failed to expand labels: %s", err)
+		}
+		h := hash(ls.processed)
 		a, err := ar.newAlert(s, nil, time.Time{}, qFn) // initial alert
 		if err != nil {
-			return nil, fmt.Errorf("failed to create alert: %s", err)
+			return nil, fmt.Errorf("failed to create alert: %w", err)
 		}
-		if ar.For == 0 { // if alert is instant
+
+		// if alert is instant, For: 0
+		if ar.For == 0 {
 			a.State = notifier.StateFiring
 			for i := range s.Values {
 				result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
@@ -278,18 +335,32 @@ func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]
 		prevT := time.Time{}
 		for i := range s.Values {
 			at := time.Unix(s.Timestamps[i], 0)
+			// try to restore alert's state on the first iteration
+			if at.Equal(start) {
+				if _, ok := ar.alerts[h]; ok {
+					a = ar.alerts[h]
+					prevT = at
+				}
+			}
 			if at.Sub(prevT) > ar.EvalInterval {
 				// reset to Pending if there are gaps > EvalInterval between DPs
 				a.State = notifier.StatePending
 				a.ActiveAt = at
-			} else if at.Sub(a.ActiveAt) >= ar.For {
+				a.Start = time.Time{}
+			} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
 				a.State = notifier.StateFiring
 				a.Start = at
 			}
 			prevT = at
 			result = append(result, ar.alertToTimeSeries(a, s.Timestamps[i])...)
+
+			// save alert's state on last iteration, so it can be used on the next execRange call
+			if at.Equal(end) {
+				holdAlertState[h] = a
+			}
 		}
 	}
+	ar.alerts = holdAlertState
 	return result, nil
 }

@@ -297,19 +368,19 @@ func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]
 // is kept in memory state and consequently repeatedly sent to the AlertManager.
 const resolvedRetention = 15 * time.Minute

-// Exec executes AlertingRule expression via the given Querier.
+// exec executes AlertingRule expression via the given Querier.
 // Based on the Querier results AlertingRule maintains notifier.Alerts
-func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
+func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
 	start := time.Now()
 	res, req, err := ar.q.Query(ctx, ar.Expr, ts)
-	curState := ruleStateEntry{
-		time:          start,
-		at:            ts,
-		duration:      time.Since(start),
-		samples:       len(res.Data),
-		seriesFetched: res.SeriesFetched,
-		err:           err,
-		curl:          requestToCurl(req),
+	curState := StateEntry{
+		Time:          start,
+		At:            ts,
+		Duration:      time.Since(start),
+		Samples:       len(res.Data),
+		SeriesFetched: res.SeriesFetched,
+		Err:           err,
+		Curl:          requestToCurl(req),
 	}

 	defer func() {
@@ -323,7 +394,7 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
 		return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
 	}

-	ar.logDebugf(ts, nil, "query returned %d samples (elapsed: %s)", curState.samples, curState.duration)
+	ar.logDebugf(ts, nil, "query returned %d samples (elapsed: %s)", curState.Samples, curState.Duration)

 	for h, a := range ar.alerts {
 		// cleanup inactive alerts from previous Exec
@@ -342,15 +413,15 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
 	for _, m := range res.Data {
 		ls, err := ar.toLabels(m, qFn)
 		if err != nil {
-			curState.err = fmt.Errorf("failed to expand labels: %s", err)
-			return nil, curState.err
+			curState.Err = fmt.Errorf("failed to expand labels: %w", err)
+			return nil, curState.Err
 		}
 		h := hash(ls.processed)
 		if _, ok := updated[h]; ok {
 			// duplicate may be caused by extra labels
 			// conflicting with the metric labels
-			curState.err = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
-			return nil, curState.err
+			curState.Err = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
+			return nil, curState.Err
 		}
 		updated[h] = struct{}{}
 		if a, ok := ar.alerts[h]; ok {
@@ -373,8 +444,8 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
 		}
 		a, err := ar.newAlert(m, ls, start, qFn)
 		if err != nil {
-			curState.err = fmt.Errorf("failed to create alert: %w", err)
-			return nil, curState.err
+			curState.Err = fmt.Errorf("failed to create alert: %w", err)
+			return nil, curState.Err
 		}
 		a.ID = h
 		a.State = notifier.StatePending
@@ -423,8 +494,8 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
 	}
 	if limit > 0 && numActivePending > limit {
 		ar.alerts = map[uint64]*notifier.Alert{}
-		curState.err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
-		return nil, curState.err
+		curState.Err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
+		return nil, curState.Err
 	}
 	return ar.toTimeSeries(ts.Unix()), nil
 }
@@ -441,26 +512,6 @@ func (ar *AlertingRule) toTimeSeries(timestamp int64) []prompbmarshal.TimeSeries
 	return tss
 }

-// UpdateWith copies all significant fields.
-// alerts state isn't copied since
-// it should be updated in next 2 Execs
-func (ar *AlertingRule) UpdateWith(r Rule) error {
-	nr, ok := r.(*AlertingRule)
-	if !ok {
-		return fmt.Errorf("BUG: attempt to update alerting rule with wrong type %#v", r)
-	}
-	ar.Expr = nr.Expr
-	ar.For = nr.For
-	ar.KeepFiringFor = nr.KeepFiringFor
-	ar.Labels = nr.Labels
-	ar.Annotations = nr.Annotations
-	ar.EvalInterval = nr.EvalInterval
-	ar.Debug = nr.Debug
-	ar.q = nr.q
-	ar.state = nr.state
-	return nil
-}
-
 // TODO: consider hashing algorithm in VM
 func hash(labels map[string]string) uint64 {
 	hash := fnv.New64a()
@@ -487,7 +538,7 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
 	if ls == nil {
 		ls, err = ar.toLabels(m, qFn)
 		if err != nil {
-			return nil, fmt.Errorf("failed to expand labels: %s", err)
+			return nil, fmt.Errorf("failed to expand labels: %w", err)
 		}
 	}
 	a := &notifier.Alert{
@@ -503,102 +554,6 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
 	return a, err
 }

-// AlertAPI generates APIAlert object from alert by its id(hash)
-func (ar *AlertingRule) AlertAPI(id uint64) *APIAlert {
-	ar.alertsMu.RLock()
-	defer ar.alertsMu.RUnlock()
-	a, ok := ar.alerts[id]
-	if !ok {
-		return nil
-	}
-	return ar.newAlertAPI(*a)
-}
-
-// ToAPI returns Rule representation in form of APIRule
-// Isn't thread-safe. Call must be protected by AlertingRule mutex.
-func (ar *AlertingRule) ToAPI() APIRule {
-	lastState := ar.state.getLast()
-	r := APIRule{
-		Type:              "alerting",
-		DatasourceType:    ar.Type.String(),
-		Name:              ar.Name,
-		Query:             ar.Expr,
-		Duration:          ar.For.Seconds(),
-		KeepFiringFor:     ar.KeepFiringFor.Seconds(),
-		Labels:            ar.Labels,
-		Annotations:       ar.Annotations,
-		LastEvaluation:    lastState.time,
-		EvaluationTime:    lastState.duration.Seconds(),
-		Health:            "ok",
-		State:             "inactive",
-		Alerts:            ar.AlertsToAPI(),
-		LastSamples:       lastState.samples,
-		LastSeriesFetched: lastState.seriesFetched,
-		MaxUpdates:        ar.state.size(),
-		Updates:           ar.state.getAll(),
-		Debug:             ar.Debug,
-
-		// encode as strings to avoid rounding in JSON
-		ID:      fmt.Sprintf("%d", ar.ID()),
-		GroupID: fmt.Sprintf("%d", ar.GroupID),
-	}
-	if lastState.err != nil {
-		r.LastError = lastState.err.Error()
-		r.Health = "err"
-	}
-	// satisfy APIRule.State logic
-	if len(r.Alerts) > 0 {
-		r.State = notifier.StatePending.String()
-		stateFiring := notifier.StateFiring.String()
-		for _, a := range r.Alerts {
-			if a.State == stateFiring {
-				r.State = stateFiring
-				break
-			}
-		}
-	}
-	return r
-}
-
-// AlertsToAPI generates list of APIAlert objects from existing alerts
-func (ar *AlertingRule) AlertsToAPI() []*APIAlert {
-	var alerts []*APIAlert
-	ar.alertsMu.RLock()
-	for _, a := range ar.alerts {
-		if a.State == notifier.StateInactive {
-			continue
-		}
-		alerts = append(alerts, ar.newAlertAPI(*a))
-	}
-	ar.alertsMu.RUnlock()
-	return alerts
-}
-
-func (ar *AlertingRule) newAlertAPI(a notifier.Alert) *APIAlert {
-	aa := &APIAlert{
-		// encode as strings to avoid rounding
-		ID:      fmt.Sprintf("%d", a.ID),
-		GroupID: fmt.Sprintf("%d", a.GroupID),
-		RuleID:  fmt.Sprintf("%d", ar.RuleID),
-
-		Name:        a.Name,
-		Expression:  ar.Expr,
-		Labels:      a.Labels,
-		Annotations: a.Annotations,
-		State:       a.State.String(),
-		ActiveAt:    a.ActiveAt,
-		Restored:    a.Restored,
-		Value:       strconv.FormatFloat(a.Value, 'f', -1, 32),
-	}
-	if alertURLGeneratorFn != nil {
-		aa.SourceLink = alertURLGeneratorFn(a)
-	}
-	if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
-		aa.Stabilizing = true
-	}
-	return aa
-}
-
 const (
 	// alertMetricName is the metric name for synthetic alert timeseries.
 	alertMetricName = "ALERTS"
@@ -646,10 +601,10 @@ func alertForToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.Time
 	return newTimeSeries([]float64{float64(a.ActiveAt.Unix())}, []int64{timestamp}, labels)
 }

-// Restore restores the value of ActiveAt field for active alerts,
+// restore restores the value of ActiveAt field for active alerts,
 // based on previously written time series `alertForStateMetricName`.
 // Only rules with For > 0 can be restored.
-func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
+func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
 	if ar.For < 1 {
 		return nil
 	}
@@ -661,44 +616,41 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, ts ti
 		return nil
 	}

-	for _, a := range ar.alerts {
+	nameStr := fmt.Sprintf("%s=%q", alertNameLabel, ar.Name)
+	if !*disableAlertGroupLabel {
+		nameStr = fmt.Sprintf("%s=%q,%s=%q", alertGroupNameLabel, ar.GroupName, alertNameLabel, ar.Name)
+	}
+	var labelsFilter string
+	for k, v := range ar.Labels {
+		labelsFilter += fmt.Sprintf(",%s=%q", k, v)
+	}
+	expr := fmt.Sprintf("last_over_time(%s{%s%s}[%ds])",
+		alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
+
+	res, _, err := q.Query(ctx, expr, ts)
+	if err != nil {
+		return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
+	}
+
+	if len(res.Data) < 1 {
+		ar.logDebugf(ts, nil, "no response was received from restore query")
+		return nil
+	}
+	for _, series := range res.Data {
+		series.DelLabel("__name__")
+		labelSet := make(map[string]string, len(series.Labels))
+		for _, v := range series.Labels {
+			labelSet[v.Name] = v.Value
+		}
+		id := hash(labelSet)
+		a, ok := ar.alerts[id]
+		if !ok {
+			continue
+		}
 		if a.Restored || a.State != notifier.StatePending {
 			continue
 		}
-
-		var labelsFilter []string
-		for k, v := range a.Labels {
-			labelsFilter = append(labelsFilter, fmt.Sprintf("%s=%q", k, v))
-		}
-		sort.Strings(labelsFilter)
-		expr := fmt.Sprintf("last_over_time(%s{%s}[%ds])",
-			alertForStateMetricName, strings.Join(labelsFilter, ","), int(lookback.Seconds()))
-
-		ar.logDebugf(ts, nil, "restoring alert state via query %q", expr)
-
-		res, _, err := q.Query(ctx, expr, ts)
-		if err != nil {
-			return err
-		}
-
-		qMetrics := res.Data
-		if len(qMetrics) < 1 {
-			ar.logDebugf(ts, nil, "no response was received from restore query")
-			continue
-		}
-
-		// only one series expected in response
-		m := qMetrics[0]
-		// __name__ supposed to be alertForStateMetricName
-		m.DelLabel("__name__")
-
-		// we assume that restore query contains all label matchers,
-		// so all received labels will match anyway if their number is equal.
-		if len(m.Labels) != len(a.Labels) {
-			ar.logDebugf(ts, nil, "state restore query returned not expected label-set %v", m.Labels)
-			continue
-		}
-		a.ActiveAt = time.Unix(int64(m.Values[0]), 0)
+		a.ActiveAt = time.Unix(int64(series.Values[0]), 0)
 		a.Restored = true
 		logger.Infof("alert %q (%d) restored to state at %v", a.Name, a.ID, a.ActiveAt)
 	}
--- a/app/vmalert/rule/alerting_test.go
+++ b/app/vmalert/rule/alerting_test.go
@@ -1,4 +1,4 @@
-package main
+package rule

 import (
 	"context"
@@ -303,13 +303,13 @@ func TestAlertingRule_Exec(t *testing.T) {
 	fakeGroup := Group{Name: "TestRule_Exec"}
 	for _, tc := range testCases {
 		t.Run(tc.rule.Name, func(t *testing.T) {
-			fq := &fakeQuerier{}
+			fq := &datasource.FakeQuerier{}
 			tc.rule.q = fq
 			tc.rule.GroupID = fakeGroup.ID()
 			for i, step := range tc.steps {
-				fq.reset()
-				fq.add(step...)
-				if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
+				fq.Reset()
+				fq.Add(step...)
+				if _, err := tc.rule.exec(context.TODO(), time.Now(), 0); err != nil {
 					t.Fatalf("unexpected err: %s", err)
 				}
 				// artificial delay between applying steps
@@ -346,15 +346,18 @@ func TestAlertingRule_Exec(t *testing.T) {
 }

 func TestAlertingRule_ExecRange(t *testing.T) {
+	fakeGroup := Group{Name: "TestRule_ExecRange"}
 	testCases := []struct {
-		rule      *AlertingRule
-		data      []datasource.Metric
-		expAlerts []*notifier.Alert
+		rule                    *AlertingRule
+		data                    []datasource.Metric
+		expAlerts               []*notifier.Alert
+		expHoldAlertStateAlerts map[uint64]*notifier.Alert
 	}{
 		{
 			newTestAlertingRule("empty", 0),
 			[]datasource.Metric{},
 			nil,
+			nil,
 		},
 		{
 			newTestAlertingRule("empty labels", 0),
@@ -364,6 +367,7 @@ func TestAlertingRule_ExecRange(t *testing.T) {
 			[]*notifier.Alert{
 				{State: notifier.StateFiring},
 			},
+			nil,
 		},
 		{
 			newTestAlertingRule("single-firing", 0),
@@ -376,6 +380,7 @@ func TestAlertingRule_ExecRange(t *testing.T) {
 					State:  notifier.StateFiring,
 				},
 			},
+			nil,
 		},
 		{
 			newTestAlertingRule("single-firing-on-range", 0),
@@ -387,6 +392,7 @@ func TestAlertingRule_ExecRange(t *testing.T) {
 				{State: notifier.StateFiring},
 				{State: notifier.StateFiring},
 			},
+			nil,
 		},
 		{
 			newTestAlertingRule("for-pending", time.Second),
@@ -398,6 +404,16 @@ func TestAlertingRule_ExecRange(t *testing.T) {
 				{State: notifier.StatePending, ActiveAt: time.Unix(3, 0)},
 				{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
 			},
+			map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-pending"}): {
+				GroupID:     fakeGroup.ID(),
+				Name:        "for-pending",
+				Labels:      map[string]string{"alertname": "for-pending"},
+				Annotations: map[string]string{},
+				State:       notifier.StatePending,
+				ActiveAt:    time.Unix(5, 0),
+				Value:       1,
+				For:         time.Second,
+			}},
 		},
 		{
 			newTestAlertingRule("for-firing", 3*time.Second),
@@ -409,6 +425,38 @@ func TestAlertingRule_ExecRange(t *testing.T) {
 				{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
 				{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
 			},
+			map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-firing"}): {
+				GroupID:     fakeGroup.ID(),
+				Name:        "for-firing",
+				Labels:      map[string]string{"alertname": "for-firing"},
+				Annotations: map[string]string{},
+				State:       notifier.StateFiring,
+				ActiveAt:    time.Unix(1, 0),
+				Start:       time.Unix(5, 0),
+				Value:       1,
+				For:         3 * time.Second,
+			}},
+		},
+		{
+			newTestAlertingRule("for-hold-pending", time.Second),
+			[]datasource.Metric{
+				{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 2, 5}},
+			},
+			[]*notifier.Alert{
+				{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
+				{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
+				{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
+			},
+			map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-hold-pending"}): {
+				GroupID:     fakeGroup.ID(),
+				Name:        "for-hold-pending",
+				Labels:      map[string]string{"alertname": "for-hold-pending"},
+				Annotations: map[string]string{},
+				State:       notifier.StatePending,
+				ActiveAt:    time.Unix(5, 0),
+				Value:       1,
+				For:         time.Second,
+			}},
 		},
 		{
 			newTestAlertingRule("for=>pending=>firing=>pending=>firing=>pending", time.Second),
@@ -422,12 +470,14 @@ func TestAlertingRule_ExecRange(t *testing.T) {
 				{State: notifier.StateFiring, ActiveAt: time.Unix(5, 0)},
 				{State: notifier.StatePending, ActiveAt: time.Unix(20, 0)},
 			},
+			nil,
 		},
 		{
-			newTestAlertingRule("multi-series-for=>pending=>pending=>firing", 3*time.Second),
+			newTestAlertingRule("multi-series", 3*time.Second),
 			[]datasource.Metric{
 				{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
-				{Values: []float64{1, 1}, Timestamps: []int64{1, 5},
+				{
+					Values: []float64{1, 1}, Timestamps: []int64{1, 5},
 					Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
 				},
 			},
@@ -435,22 +485,49 @@ func TestAlertingRule_ExecRange(t *testing.T) {
 				{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
 				{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
 				{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
-				//
-				{State: notifier.StatePending, ActiveAt: time.Unix(1, 0),
+				{
+					State: notifier.StatePending, ActiveAt: time.Unix(1, 0),
 					Labels: map[string]string{
 						"foo": "bar",
-					}},
-				{State: notifier.StatePending, ActiveAt: time.Unix(5, 0),
+					},
+				},
+				{
+					State: notifier.StatePending, ActiveAt: time.Unix(5, 0),
 					Labels: map[string]string{
 						"foo": "bar",
-					}},
+					},
+				},
+			},
+			map[uint64]*notifier.Alert{
+				hash(map[string]string{"alertname": "multi-series"}): {
+					GroupID:     fakeGroup.ID(),
+					Name:        "multi-series",
+					Labels:      map[string]string{"alertname": "multi-series"},
+					Annotations: map[string]string{},
+					State:       notifier.StateFiring,
+					ActiveAt:    time.Unix(1, 0),
+					Start:       time.Unix(5, 0),
+					Value:       1,
+					For:         3 * time.Second,
+				},
+				hash(map[string]string{"alertname": "multi-series", "foo": "bar"}): {
+					GroupID:     fakeGroup.ID(),
+					Name:        "multi-series",
+					Labels:      map[string]string{"alertname": "multi-series", "foo": "bar"},
+					Annotations: map[string]string{},
+					State:       notifier.StatePending,
+					ActiveAt:    time.Unix(5, 0),
+					Value:       1,
+					For:         3 * time.Second,
+				},
 			},
 		},
 		{
 			newTestRuleWithLabels("multi-series-firing", "source", "vm"),
 			[]datasource.Metric{
 				{Values: []float64{1, 1}, Timestamps: []int64{1, 100}},
-				{Values: []float64{1, 1}, Timestamps: []int64{1, 5},
+				{
+					Values: []float64{1, 1}, Timestamps: []int64{1, 5},
 					Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
 				},
 			},
@@ -471,16 +548,16 @@ func TestAlertingRule_ExecRange(t *testing.T) {
 					"source": "vm",
 				}},
 			},
+			nil,
 		},
 	}
-	fakeGroup := Group{Name: "TestRule_ExecRange"}
 	for _, tc := range testCases {
 		t.Run(tc.rule.Name, func(t *testing.T) {
-			fq := &fakeQuerier{}
+			fq := &datasource.FakeQuerier{}
 			tc.rule.q = fq
 			tc.rule.GroupID = fakeGroup.ID()
-			fq.add(tc.data...)
-			gotTS, err := tc.rule.ExecRange(context.TODO(), time.Now(), time.Now())
+			fq.Add(tc.data...)
+			gotTS, err := tc.rule.execRange(context.TODO(), time.Unix(1, 0), time.Unix(5, 0))
 			if err != nil {
 				t.Fatalf("unexpected err: %s", err)
 			}
@@ -506,30 +583,35 @@ func TestAlertingRule_ExecRange(t *testing.T) {
 					t.Fatalf("%d: expected \n%v but got \n%v", i, exp, got)
 				}
 			}
+			if tc.expHoldAlertStateAlerts != nil {
+				if !reflect.DeepEqual(tc.expHoldAlertStateAlerts, tc.rule.alerts) {
+					t.Fatalf("expected hold alerts state: \n%v but got \n%v", tc.expHoldAlertStateAlerts, tc.rule.alerts)
+				}
+			}
 		})
 	}
 }

 func TestGroup_Restore(t *testing.T) {
 	defaultTS := time.Now()
-	fqr := &fakeQuerierWithRegistry{}
+	fqr := &datasource.FakeQuerierWithRegistry{}
 	fn := func(rules []config.Rule, expAlerts map[uint64]*notifier.Alert) {
 		t.Helper()
-		defer fqr.reset()
+		defer fqr.Reset()

 		for _, r := range rules {
-			fqr.set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
+			fqr.Set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
 		}

-		fg := newGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
+		fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
 		wg := sync.WaitGroup{}
 		wg.Add(1)
 		go func() {
-			nts := func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} }
-			fg.start(context.Background(), nts, nil, fqr)
+			nts := func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} }
+			fg.Start(context.Background(), nts, nil, fqr)
 			wg.Done()
 		}()
-		fg.close()
+		fg.Close()
 		wg.Wait()

 		gotAlerts := make(map[uint64]*notifier.Alert)
@@ -558,6 +640,9 @@ func TestGroup_Restore(t *testing.T) {
 			if got.ActiveAt != exp.ActiveAt {
 				t.Fatalf("expected ActiveAt %v; got %v", exp.ActiveAt, got.ActiveAt)
 			}
+			if got.Name != exp.Name {
+				t.Fatalf("expected alertname %q; got %q", exp.Name, got.Name)
+			}
 		}
 	}

@@ -573,44 +658,28 @@ func TestGroup_Restore(t *testing.T) {
 		[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
 		map[uint64]*notifier.Alert{
 			hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
+				Name:     "foo",
 				ActiveAt: defaultTS,
 			},
 		})
-	fqr.reset()
+	fqr.Reset()

 	// one active alert with state restore
 	ts := time.Now().Truncate(time.Hour)
-	fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
+	fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
 		stateMetric("foo", ts))
 	fn(
 		[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
 		map[uint64]*notifier.Alert{
 			hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
-				ActiveAt: ts},
+				Name:     "foo",
+				ActiveAt: ts,
+			},
 		})

 	// two rules, two active alerts, one with state restored
 	ts = time.Now().Truncate(time.Hour)
-	fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
-		stateMetric("foo", ts))
-	fn(
-		[]config.Rule{
-			{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
-			{Alert: "bar", Expr: "bar", For: promutils.NewDuration(time.Second)},
-		},
-		map[uint64]*notifier.Alert{
-			hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
-				ActiveAt: defaultTS,
-			},
-			hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
-				ActiveAt: ts},
-		})
-
-	// two rules, two active alerts, two with state restored
-	ts = time.Now().Truncate(time.Hour)
-	fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
-		stateMetric("foo", ts))
-	fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
+	fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
 		stateMetric("bar", ts))
 	fn(
 		[]config.Rule{
@@ -619,74 +688,102 @@ func TestGroup_Restore(t *testing.T) {
 		},
 		map[uint64]*notifier.Alert{
 			hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
+				Name:     "foo",
+				ActiveAt: defaultTS,
+			},
+			hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
+				Name:     "bar",
+				ActiveAt: ts,
+			},
+		})
+
+	// two rules, two active alerts, two with state restored
+	ts = time.Now().Truncate(time.Hour)
+	fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
+		stateMetric("foo", ts))
+	fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
+		stateMetric("bar", ts))
+	fn(
+		[]config.Rule{
+			{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
+			{Alert: "bar", Expr: "bar", For: promutils.NewDuration(time.Second)},
+		},
+		map[uint64]*notifier.Alert{
+			hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
+				Name:     "foo",
 				ActiveAt: ts,
 			},
 			hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
-				ActiveAt: ts},
+				Name:     "bar",
+				ActiveAt: ts,
+			},
 		})

 	// one active alert but wrong state restore
 	ts = time.Now().Truncate(time.Hour)
-	fqr.set(`last_over_time(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
+	fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
 		stateMetric("wrong alert", ts))
 	fn(
 		[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
 		map[uint64]*notifier.Alert{
 			hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
+				Name:     "foo",
 				ActiveAt: defaultTS,
 			},
 		})

 	// one active alert with labels
 	ts = time.Now().Truncate(time.Hour)
-	fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
+	fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
 		stateMetric("foo", ts, "env", "dev"))
 	fn(
 		[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
 		map[uint64]*notifier.Alert{
 			hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
+				Name:     "foo",
 				ActiveAt: ts,
 			},
 		})

 	// one active alert with restore labels missmatch
 	ts = time.Now().Truncate(time.Hour)
-	fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
+	fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
 		stateMetric("foo", ts, "env", "dev", "team", "foo"))
 	fn(
 		[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
 		map[uint64]*notifier.Alert{
 			hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
+				Name:     "foo",
 				ActiveAt: defaultTS,
 			},
 		})
 }

 func TestAlertingRule_Exec_Negative(t *testing.T) {
-	fq := &fakeQuerier{}
+	fq := &datasource.FakeQuerier{}
 	ar := newTestAlertingRule("test", 0)
 	ar.Labels = map[string]string{"job": "test"}
 	ar.q = fq

 	// successful attempt
-	fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
-	_, err := ar.Exec(context.TODO(), time.Now(), 0)
+	fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
+	_, err := ar.exec(context.TODO(), time.Now(), 0)
 	if err != nil {
 		t.Fatal(err)
 	}

 	// label `job` will collide with rule extra label and will make both time series equal
-	fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
-	_, err = ar.Exec(context.TODO(), time.Now(), 0)
+	fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
+	_, err = ar.exec(context.TODO(), time.Now(), 0)
 	if !errors.Is(err, errDuplicate) {
 		t.Fatalf("expected to have %s error; got %s", errDuplicate, err)
 	}

-	fq.reset()
+	fq.Reset()

 	expErr := "connection reset by peer"
-	fq.setErr(errors.New(expErr))
-	_, err = ar.Exec(context.TODO(), time.Now(), 0)
+	fq.SetErr(errors.New(expErr))
+	_, err = ar.exec(context.TODO(), time.Now(), 0)
 	if err == nil {
 		t.Fatalf("expected to get err; got nil")
 	}
@@ -696,7 +793,7 @@ func TestAlertingRule_Exec_Negative(t *testing.T) {
 }

 func TestAlertingRuleLimit(t *testing.T) {
-	fq := &fakeQuerier{}
+	fq := &datasource.FakeQuerier{}
 	ar := newTestAlertingRule("test", 0)
 	ar.Labels = map[string]string{"job": "test"}
 	ar.q = fq
@@ -728,15 +825,15 @@ func TestAlertingRuleLimit(t *testing.T) {
 		err       error
 		timestamp = time.Now()
 	)
-	fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
-	fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "bar", "job"))
+	fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
+	fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "bar", "job"))
 	for _, testCase := range testCases {
-		_, err = ar.Exec(context.TODO(), timestamp, testCase.limit)
+		_, err = ar.exec(context.TODO(), timestamp, testCase.limit)
 		if err != nil && !strings.EqualFold(err.Error(), testCase.err) {
 			t.Fatal(err)
 		}
 	}
-	fq.reset()
+	fq.Reset()
 }

 func TestAlertingRule_Template(t *testing.T) {
@@ -844,7 +941,8 @@ func TestAlertingRule_Template(t *testing.T) {
 				hash(map[string]string{
 					alertNameLabel:      "OriginLabels",
 					alertGroupNameLabel: "Testing",
-					"instance":          "foo"}): {
+					"instance":          "foo",
+				}): {
 					Labels: map[string]string{
 						alertNameLabel:      "OriginLabels",
 						alertGroupNameLabel: "Testing",
@@ -860,19 +958,18 @@ func TestAlertingRule_Template(t *testing.T) {
 	fakeGroup := Group{Name: "TestRule_Exec"}
 	for _, tc := range testCases {
 		t.Run(tc.rule.Name, func(t *testing.T) {
-			fq := &fakeQuerier{}
+			fq := &datasource.FakeQuerier{}
 			tc.rule.GroupID = fakeGroup.ID()
 			tc.rule.q = fq
-			tc.rule.state = newRuleState(10)
-			fq.add(tc.metrics...)
-			if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
+			tc.rule.state = &ruleState{entries: make([]StateEntry, 10)}
+			fq.Add(tc.metrics...)
+			if _, err := tc.rule.exec(context.TODO(), time.Now(), 0); err != nil {
 				t.Fatalf("unexpected err: %s", err)
 			}
 			for hash, expAlert := range tc.expAlerts {
 				gotAlert := tc.rule.alerts[hash]
 				if gotAlert == nil {
 					t.Fatalf("alert %d is missing; labels: %v; annotations: %v", hash, expAlert.Labels, expAlert.Annotations)
-					break
 				}
 				if !reflect.DeepEqual(expAlert.Annotations, gotAlert.Annotations) {
 					t.Fatalf("expected to have annotations %#v; got %#v", expAlert.Annotations, gotAlert.Annotations)
@@ -980,7 +1077,7 @@ func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
 		For:          waitFor,
 		EvalInterval: waitFor,
 		alerts:       make(map[uint64]*notifier.Alert),
-		state:        newRuleState(10),
+		state:        &ruleState{entries: make([]StateEntry, 10)},
 	}
 	return &rule
 }
--- a/app/vmalert/rule/group.go
+++ b/app/vmalert/rule/group.go
@@ -1,8 +1,10 @@
-package main
+package rule

 import (
 	"context"
+	"encoding/json"
 	"errors"
+	"flag"
 	"fmt"
 	"hash/fnv"
 	"net/url"
@@ -11,7 +13,7 @@ import (
 	"sync"
 	"time"

-	"github.com/VictoriaMetrics/metrics"
+	"github.com/cheggaaa/pb/v3"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
@@ -21,16 +23,34 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
+	"github.com/VictoriaMetrics/metrics"
+)
+
+var (
+	ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
+		"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
+	resendDelay        = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier")
+	maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
+		"which by default is 4 times evaluationInterval of the parent group")
+	evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the `time` parameter for rule evaluation requests to compensate intentional data delay from the datasource."+
+		"Normally, should be equal to `-search.latencyOffset` (cmd-line flag configured for VictoriaMetrics single-node or vmselect).")
+	disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
+	remoteReadLookBack     = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
+		" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
 )

 // Group is an entity for grouping rules
 type Group struct {
-	mu             sync.RWMutex
-	Name           string
-	File           string
-	Rules          []Rule
-	Type           config.Type
-	Interval       time.Duration
+	mu         sync.RWMutex
+	Name       string
+	File       string
+	Rules      []Rule
+	Type       config.Type
+	Interval   time.Duration
+	EvalOffset *time.Duration
+	// EvalDelay will adjust timestamp for rule evaluation requests to compensate intentional query delay from datasource.
+	// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
+	EvalDelay      *time.Duration
 	Limit          int
 	Concurrency    int
 	Checksum       string
@@ -51,6 +71,9 @@ type Group struct {
 	evalCancel context.CancelFunc

 	metrics *groupMetrics
+	// evalAlignment will make the timestamp of group query
+	// requests be aligned with interval
+	evalAlignment *bool
 }

 type groupMetrics struct {
@@ -92,7 +115,8 @@ func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[s
 	return r
 }

-func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval time.Duration, labels map[string]string) *Group {
+// NewGroup returns a new group
+func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval time.Duration, labels map[string]string) *Group {
 	g := &Group{
 		Type:            cfg.Type,
 		Name:            cfg.Name,
@@ -105,6 +129,7 @@ func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
 		Headers:         make(map[string]string),
 		NotifierHeaders: make(map[string]string),
 		Labels:          cfg.Labels,
+		evalAlignment:   cfg.EvalAlignment,

 		doneCh:     make(chan struct{}),
 		finishedCh: make(chan struct{}),
@@ -116,6 +141,12 @@ func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
 	if g.Concurrency < 1 {
 		g.Concurrency = 1
 	}
+	if cfg.EvalOffset != nil {
+		g.EvalOffset = &cfg.EvalOffset.D
+	}
+	if cfg.EvalDelay != nil {
+		g.EvalDelay = &cfg.EvalDelay.D
+	}
 	for _, h := range cfg.Headers {
 		g.Headers[h.Key] = h.Value
 	}
@@ -145,11 +176,11 @@ func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
 	return g
 }

-func (g *Group) newRule(qb datasource.QuerierBuilder, rule config.Rule) Rule {
-	if rule.Alert != "" {
-		return newAlertingRule(qb, g, rule)
+func (g *Group) newRule(qb datasource.QuerierBuilder, r config.Rule) Rule {
+	if r.Alert != "" {
+		return NewAlertingRule(qb, g, r)
 	}
-	return newRecordingRule(qb, g, rule)
+	return NewRecordingRule(qb, g, r)
 }

 // ID return unique group ID that consists of
@@ -163,11 +194,15 @@ func (g *Group) ID() uint64 {
 	hash.Write([]byte("\xff"))
 	hash.Write([]byte(g.Name))
 	hash.Write([]byte(g.Type.Get()))
+	hash.Write([]byte(g.Interval.String()))
+	if g.EvalOffset != nil {
+		hash.Write([]byte(g.EvalOffset.String()))
+	}
 	return hash.Sum64()
 }

-// Restore restores alerts state for group rules
-func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, ts time.Time, lookback time.Duration) error {
+// restore restores alerts state for group rules
+func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts time.Time, lookback time.Duration) error {
 	for _, rule := range g.Rules {
 		ar, ok := rule.(*AlertingRule)
 		if !ok {
@@ -183,7 +218,7 @@ func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, ts ti
 			Headers:            g.Headers,
 			Debug:              ar.Debug,
 		})
-		if err := ar.Restore(ctx, q, ts, lookback); err != nil {
+		if err := ar.restore(ctx, q, ts, lookback); err != nil {
 			return fmt.Errorf("error while restoring rule %q: %w", rule, err)
 		}
 	}
@@ -193,7 +228,7 @@ func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, ts ti
 // updateWith updates existing group with
 // passed group object. This function ignores group
 // evaluation interval change. It supposed to be updated
-// in group.start function.
+// in group.Start function.
 // Not thread-safe.
 func (g *Group) updateWith(newGroup *Group) error {
 	rulesRegistry := make(map[uint64]Rule)
@@ -206,11 +241,11 @@ func (g *Group) updateWith(newGroup *Group) error {
 		if !ok {
 			// old rule is not present in the new list
 			// so we mark it for removing
-			g.Rules[i].Close()
+			g.Rules[i].close()
 			g.Rules[i] = nil
 			continue
 		}
-		if err := or.UpdateWith(nr); err != nil {
+		if err := or.updateWith(nr); err != nil {
 			return err
 		}
 		delete(rulesRegistry, nr.ID())
@@ -243,10 +278,10 @@ func (g *Group) updateWith(newGroup *Group) error {
 	return nil
 }

-// interruptEval interrupts in-flight rules evaluations
+// InterruptEval interrupts in-flight rules evaluations
 // within the group. It is expected that g.evalCancel
 // will be repopulated after the call.
-func (g *Group) interruptEval() {
+func (g *Group) InterruptEval() {
 	g.mu.RLock()
 	defer g.mu.RUnlock()

@@ -255,12 +290,13 @@ func (g *Group) interruptEval() {
 	}
 }

-func (g *Group) close() {
+// Close stops the group and it's rules, unregisters group metrics
+func (g *Group) Close() {
 	if g.doneCh == nil {
 		return
 	}
 	close(g.doneCh)
-	g.interruptEval()
+	g.InterruptEval()
 	<-g.finishedCh

 	g.metrics.iterationDuration.Unregister()
@@ -268,24 +304,25 @@ func (g *Group) close() {
 	g.metrics.iterationMissed.Unregister()
 	g.metrics.iterationInterval.Unregister()
 	for _, rule := range g.Rules {
-		rule.Close()
+		rule.close()
 	}
 }

-var skipRandSleepOnGroupStart bool
+// SkipRandSleepOnGroupStart will skip random sleep delay in group first evaluation
+var SkipRandSleepOnGroupStart bool

-func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *remotewrite.Client, rr datasource.QuerierBuilder) {
+// Start starts group's evaluation
+func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
 	defer func() { close(g.finishedCh) }()

-	// Spread group rules evaluation over time in order to reduce load on VictoriaMetrics.
-	if !skipRandSleepOnGroupStart {
-		randSleep := uint64(float64(g.Interval) * (float64(g.ID()) / (1 << 64)))
-		sleepOffset := uint64(time.Now().UnixNano()) % uint64(g.Interval)
-		if randSleep < sleepOffset {
-			randSleep += uint64(g.Interval)
-		}
-		randSleep -= sleepOffset
-		sleepTimer := time.NewTimer(time.Duration(randSleep))
+	evalTS := time.Now()
+	// sleep random duration to spread group rules evaluation
+	// over time in order to reduce load on datasource.
+	if !SkipRandSleepOnGroupStart {
+		sleepBeforeStart := delayBeforeStart(evalTS, g.ID(), g.Interval, g.EvalOffset)
+		g.infof("will start in %v", sleepBeforeStart)
+
+		sleepTimer := time.NewTimer(sleepBeforeStart)
 		select {
 		case <-ctx.Done():
 			sleepTimer.Stop()
@@ -295,18 +332,17 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
 			return
 		case <-sleepTimer.C:
 		}
+		evalTS = evalTS.Add(sleepBeforeStart)
 	}

 	e := &executor{
-		rw:                       rw,
-		notifiers:                nts,
+		Rw:                       rw,
+		Notifiers:                nts,
 		notifierHeaders:          g.NotifierHeaders,
 		previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
 	}

-	evalTS := time.Now()
-
-	logger.Infof("group %q started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
+	g.infof("started")

 	eval := func(ctx context.Context, ts time.Time) {
 		g.metrics.iterationTotal.Inc()
@@ -320,6 +356,7 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
 		}

 		resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
+		ts = g.adjustReqTimestamp(ts)
 		errs := e.execConcurrently(ctx, g.Rules, ts, g.Concurrency, resolveDuration, g.Limit)
 		for err := range errs {
 			if err != nil {
@@ -344,7 +381,7 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
 	// restore the rules state after the first evaluation
 	// so only active alerts can be restored.
 	if rr != nil {
-		err := g.Restore(ctx, rr, evalTS, *remoteReadLookBack)
+		err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
 		if err != nil {
 			logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
 		}
@@ -375,19 +412,12 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
 				continue
 			}

-			// ensure that staleness is tracked or existing rules only
+			// ensure that staleness is tracked for existing rules only
 			e.purgeStaleSeries(g.Rules)
-
 			e.notifierHeaders = g.NotifierHeaders
-
-			if g.Interval != ng.Interval {
-				g.Interval = ng.Interval
-				t.Stop()
-				t = time.NewTicker(g.Interval)
-				evalTS = time.Now()
-			}
 			g.mu.Unlock()
-			logger.Infof("group %q re-started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
+
+			g.infof("re-started")
 		case <-t.C:
 			missed := (time.Since(evalTS) / g.Interval) - 1
 			if missed < 0 {
@@ -405,6 +435,134 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r
 	}
 }

+// UpdateWith inserts new group to updateCh
+func (g *Group) UpdateWith(new *Group) {
+	g.updateCh <- new
+}
+
+// DeepCopy returns a deep copy of group
+func (g *Group) DeepCopy() *Group {
+	g.mu.RLock()
+	data, _ := json.Marshal(g)
+	g.mu.RUnlock()
+	newG := Group{}
+	_ = json.Unmarshal(data, &newG)
+	newG.Rules = g.Rules
+	return &newG
+}
+
+// delayBeforeStart returns a duration on the interval between [ts..ts+interval].
+// delayBeforeStart accounts for `offset`, so returned duration should be always
+// bigger than the `offset`.
+func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *time.Duration) time.Duration {
+	var randSleep time.Duration
+	randSleep = time.Duration(float64(interval) * (float64(key) / (1 << 64)))
+	sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
+	if randSleep < sleepOffset {
+		randSleep += interval
+	}
+	randSleep -= sleepOffset
+	// check if `ts` after randSleep is before `offset`,
+	// if it is, add extra eval_offset to randSleep.
+	// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3409.
+	if offset != nil {
+		tmpEvalTS := ts.Add(randSleep)
+		if tmpEvalTS.Before(tmpEvalTS.Truncate(interval).Add(*offset)) {
+			randSleep += *offset
+		}
+	}
+	return randSleep
+}
+
+func (g *Group) infof(format string, args ...interface{}) {
+	msg := fmt.Sprintf(format, args...)
+	logger.Infof("group %q %s; interval=%v; eval_offset=%v; concurrency=%d",
+		g.Name, msg, g.Interval, g.EvalOffset, g.Concurrency)
+}
+
+// Replay performs group replay
+func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoint, replayRuleRetryAttempts int, replayDelay time.Duration, disableProgressBar bool) int {
+	var total int
+	step := g.Interval * time.Duration(maxDataPoint)
+	ri := rangeIterator{start: start, end: end, step: step}
+	iterations := int(end.Sub(start)/step) + 1
+	fmt.Printf("\nGroup %q"+
+		"\ninterval: \t%v"+
+		"\nrequests to make: \t%d"+
+		"\nmax range per request: \t%v\n",
+		g.Name, g.Interval, iterations, step)
+	if g.Limit > 0 {
+		fmt.Printf("\nPlease note, `limit: %d` param has no effect during replay.\n",
+			g.Limit)
+	}
+	for _, rule := range g.Rules {
+		fmt.Printf("> Rule %q (ID: %d)\n", rule, rule.ID())
+		var bar *pb.ProgressBar
+		if !disableProgressBar {
+			bar = pb.StartNew(iterations)
+		}
+		ri.reset()
+		for ri.next() {
+			n, err := replayRule(rule, ri.s, ri.e, rw, replayRuleRetryAttempts)
+			if err != nil {
+				logger.Fatalf("rule %q: %s", rule, err)
+			}
+			total += n
+			if bar != nil {
+				bar.Increment()
+			}
+		}
+		if bar != nil {
+			bar.Finish()
+		}
+		// sleep to let remote storage to flush data on-disk
+		// so chained rules could be calculated correctly
+		time.Sleep(replayDelay)
+	}
+	return total
+}
+
+// ExecOnce evaluates all the rules under group for once with given timestamp.
+func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
+	e := &executor{
+		Rw:                       rw,
+		Notifiers:                nts,
+		notifierHeaders:          g.NotifierHeaders,
+		previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
+	}
+	if len(g.Rules) < 1 {
+		return nil
+	}
+	resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
+	return e.execConcurrently(ctx, g.Rules, evalTS, g.Concurrency, resolveDuration, g.Limit)
+}
+
+type rangeIterator struct {
+	step       time.Duration
+	start, end time.Time
+
+	iter int
+	s, e time.Time
+}
+
+func (ri *rangeIterator) reset() {
+	ri.iter = 0
+	ri.s, ri.e = time.Time{}, time.Time{}
+}
+
+func (ri *rangeIterator) next() bool {
+	ri.s = ri.start.Add(ri.step * time.Duration(ri.iter))
+	if !ri.end.After(ri.s) {
+		return false
+	}
+	ri.e = ri.s.Add(ri.step)
+	if ri.e.After(ri.end) {
+		ri.e = ri.end
+	}
+	ri.iter++
+	return true
+}
+
 // getResolveDuration returns the duration after which firing alert
 // can be considered as resolved.
 func getResolveDuration(groupInterval, delta, maxDuration time.Duration) time.Duration {
@@ -418,11 +576,49 @@ func getResolveDuration(groupInterval, delta, maxDuration time.Duration) time.Du
 	return resolveDuration
 }

+func (g *Group) adjustReqTimestamp(timestamp time.Time) time.Time {
+	if g.EvalOffset != nil {
+		// calculate the min timestamp on the evaluationInterval
+		intervalStart := timestamp.Truncate(g.Interval)
+		ts := intervalStart.Add(*g.EvalOffset)
+		if timestamp.Before(ts) {
+			// if passed timestamp is before the expected evaluation offset,
+			// then we should adjust it to the previous evaluation round.
+			// E.g. request with evaluationInterval=1h and evaluationOffset=30m
+			// was evaluated at 11:20. Then the timestamp should be adjusted
+			// to 10:30, to the previous evaluationInterval.
+			return ts.Add(-g.Interval)
+		}
+		// when `eval_offset` is using, ts shouldn't be effect by `eval_alignment` and `eval_delay`
+		// since it should be always aligned.
+		return ts
+	}
+
+	timestamp = timestamp.Add(-g.getEvalDelay())
+
+	// always apply the alignment as a last step
+	if g.evalAlignment == nil || *g.evalAlignment {
+		// align query time with interval to get similar result with grafana when plotting time series.
+		// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
+		// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1232
+		return timestamp.Truncate(g.Interval)
+	}
+	return timestamp
+}
+
+func (g *Group) getEvalDelay() time.Duration {
+	if g.EvalDelay != nil {
+		return *g.EvalDelay
+	}
+	return *evalDelay
+}
+
+// executor contains group's notify and rw configs
 type executor struct {
-	notifiers       func() []notifier.Notifier
+	Notifiers       func() []notifier.Notifier
 	notifierHeaders map[string]string

-	rw *remotewrite.Client
+	Rw remotewrite.RWClient

 	previouslySentSeriesToRWMu sync.Mutex
 	// previouslySentSeriesToRW stores series sent to RW on previous iteration
@@ -432,6 +628,7 @@ type executor struct {
 	previouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
 }

+// execConcurrently executes rules concurrently if concurrency>1
 func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.Time, concurrency int, resolveDuration time.Duration, limit int) chan error {
 	res := make(chan error, len(rules))
 	if concurrency == 1 {
@@ -446,14 +643,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
 	sem := make(chan struct{}, concurrency)
 	go func() {
 		wg := sync.WaitGroup{}
-		for _, rule := range rules {
+		for _, r := range rules {
 			sem <- struct{}{}
 			wg.Add(1)
 			go func(r Rule) {
 				res <- e.exec(ctx, r, ts, resolveDuration, limit)
 				<-sem
 				wg.Done()
-			}(rule)
+			}(r)
 		}
 		wg.Wait()
 		close(res)
@@ -466,15 +663,12 @@ var (

 	execTotal  = metrics.NewCounter(`vmalert_execution_total`)
 	execErrors = metrics.NewCounter(`vmalert_execution_errors_total`)
-
-	remoteWriteErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
-	remoteWriteTotal  = metrics.NewCounter(`vmalert_remotewrite_total`)
 )

-func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDuration time.Duration, limit int) error {
+func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDuration time.Duration, limit int) error {
 	execTotal.Inc()

-	tss, err := rule.Exec(ctx, ts, limit)
+	tss, err := r.exec(ctx, ts, limit)
 	if err != nil {
 		if errors.Is(err, context.Canceled) {
 			// the context can be cancelled on graceful shutdown
@@ -482,17 +676,15 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
 			return nil
 		}
 		execErrors.Inc()
-		return fmt.Errorf("rule %q: failed to execute: %w", rule, err)
+		return fmt.Errorf("rule %q: failed to execute: %w", r, err)
 	}

-	if e.rw != nil {
+	if e.Rw != nil {
 		pushToRW := func(tss []prompbmarshal.TimeSeries) error {
 			var lastErr error
 			for _, ts := range tss {
-				remoteWriteTotal.Inc()
-				if err := e.rw.Push(ts); err != nil {
-					remoteWriteErrors.Inc()
-					lastErr = fmt.Errorf("rule %q: remote write failure: %w", rule, err)
+				if err := e.Rw.Push(ts); err != nil {
+					lastErr = fmt.Errorf("rule %q: remote write failure: %w", r, err)
 				}
 			}
 			return lastErr
@@ -501,13 +693,13 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
 			return err
 		}

-		staleSeries := e.getStaleSeries(rule, tss, ts)
+		staleSeries := e.getStaleSeries(r, tss, ts)
 		if err := pushToRW(staleSeries); err != nil {
 			return err
 		}
 	}

-	ar, ok := rule.(*AlertingRule)
+	ar, ok := r.(*AlertingRule)
 	if !ok {
 		return nil
 	}
@@ -519,11 +711,11 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur

 	wg := sync.WaitGroup{}
 	errGr := new(utils.ErrGroup)
-	for _, nt := range e.notifiers() {
+	for _, nt := range e.Notifiers() {
 		wg.Add(1)
 		go func(nt notifier.Notifier) {
 			if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
-				errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", rule, nt.Addr(), err))
+				errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
 			}
 			wg.Done()
 		}(nt)
@@ -533,7 +725,7 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
 }

 // getStaledSeries checks whether there are stale series from previously sent ones.
-func (e *executor) getStaleSeries(rule Rule, tss []prompbmarshal.TimeSeries, timestamp time.Time) []prompbmarshal.TimeSeries {
+func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timestamp time.Time) []prompbmarshal.TimeSeries {
 	ruleLabels := make(map[string][]prompbmarshal.Label, len(tss))
 	for _, ts := range tss {
 		// convert labels to strings so we can compare with previously sent series
@@ -541,7 +733,7 @@ func (e *executor) getStaleSeries(rule Rule, tss []prompbmarshal.TimeSeries, tim
 		ruleLabels[key] = ts.Labels
 	}

-	rID := rule.ID()
+	rID := r.ID()
 	var staleS []prompbmarshal.TimeSeries
 	// check whether there are series which disappeared and need to be marked as stale
 	e.previouslySentSeriesToRWMu.Lock()
--- a/app/vmalert/rule/group_test.go
+++ b/app/vmalert/rule/group_test.go
@@ -1,16 +1,22 @@
-package main
+package rule

 import (
 	"context"
 	"fmt"
+	"math"
+	"os"
 	"reflect"
 	"sort"
 	"testing"
 	"time"

+	"gopkg.in/yaml.v2"
+
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
@@ -19,7 +25,15 @@ import (
 func init() {
 	// Disable rand sleep on group start during tests in order to speed up test execution.
 	// Rand sleep is needed only in prod code.
-	skipRandSleepOnGroupStart = true
+	SkipRandSleepOnGroupStart = true
+}
+
+func TestMain(m *testing.M) {
+	if err := templates.Load([]string{}, true); err != nil {
+		fmt.Println("failed to load template for test")
+		os.Exit(1)
+	}
+	os.Exit(m.Run())
 }

 func TestUpdateWith(t *testing.T) {
@@ -35,18 +49,19 @@ func TestUpdateWith(t *testing.T) {
 		},
 		{
 			"update alerting rule",
-			[]config.Rule{{
-				Alert: "foo",
-				Expr:  "up > 0",
-				For:   promutils.NewDuration(time.Second),
-				Labels: map[string]string{
-					"bar": "baz",
+			[]config.Rule{
+				{
+					Alert: "foo",
+					Expr:  "up > 0",
+					For:   promutils.NewDuration(time.Second),
+					Labels: map[string]string{
+						"bar": "baz",
+					},
+					Annotations: map[string]string{
+						"summary":     "{{ $value|humanize }}",
+						"description": "{{$labels}}",
+					},
 				},
-				Annotations: map[string]string{
-					"summary":     "{{ $value|humanize }}",
-					"description": "{{$labels}}",
-				},
-			},
 				{
 					Alert: "bar",
 					Expr:  "up > 0",
@@ -54,7 +69,8 @@ func TestUpdateWith(t *testing.T) {
 					Labels: map[string]string{
 						"bar": "baz",
 					},
-				}},
+				},
+			},
 			[]config.Rule{
 				{
 					Alert: "foo",
@@ -75,7 +91,8 @@ func TestUpdateWith(t *testing.T) {
 					Labels: map[string]string{
 						"bar": "baz",
 					},
-				}},
+				},
+			},
 		},
 		{
 			"update recording rule",
@@ -134,7 +151,7 @@ func TestUpdateWith(t *testing.T) {
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			g := &Group{Name: "test"}
-			qb := &fakeQuerier{}
+			qb := &datasource.FakeQuerier{}
 			for _, r := range tc.currentRules {
 				r.ID = config.HashRule(r)
 				g.Rules = append(g.Rules, g.newRule(qb, r))
@@ -166,7 +183,7 @@ func TestUpdateWith(t *testing.T) {
 				if got.ID() != want.ID() {
 					t.Fatalf("expected to have rule %q; got %q", want, got)
 				}
-				if err := compareRules(t, got, want); err != nil {
+				if err := CompareRules(t, got, want); err != nil {
 					t.Fatalf("comparison error: %s", err)
 				}
 			}
@@ -175,17 +192,31 @@ func TestUpdateWith(t *testing.T) {
 }

 func TestGroupStart(t *testing.T) {
-	// TODO: make parsing from string instead of file
-	groups, err := config.Parse([]string{"config/testdata/rules/rules1-good.rules"}, notifier.ValidateTemplates, true)
+	const (
+		rules = `
+  - name: groupTest
+    rules:
+      - alert: VMRows
+        for: 1ms
+        expr: vm_rows > 0
+        labels:
+          label: bar
+          host: "{{ $labels.instance }}"
+        annotations:
+          summary: "{{ $value }}"
+`
+	)
+	var groups []config.Group
+	err := yaml.Unmarshal([]byte(rules), &groups)
 	if err != nil {
 		t.Fatalf("failed to parse rules: %s", err)
 	}

-	fs := &fakeQuerier{}
-	fn := &fakeNotifier{}
+	fs := &datasource.FakeQuerier{}
+	fn := &notifier.FakeNotifier{}

 	const evalInterval = time.Millisecond
-	g := newGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
+	g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
 	g.Concurrency = 2

 	const inst1, inst2, job = "foo", "bar", "baz"
@@ -200,7 +231,7 @@ func TestGroupStart(t *testing.T) {
 	alert1.State = notifier.StateFiring
 	// add external label
 	alert1.Labels["cluster"] = "east-1"
-	// add rule labels - see config/testdata/rules1-good.rules
+	// add rule labels
 	alert1.Labels["label"] = "bar"
 	alert1.Labels["host"] = inst1
 	// add service labels
@@ -215,7 +246,7 @@ func TestGroupStart(t *testing.T) {
 	alert2.State = notifier.StateFiring
 	// add external label
 	alert2.Labels["cluster"] = "east-1"
-	// add rule labels - see config/testdata/rules1-good.rules
+	// add rule labels
 	alert2.Labels["label"] = "bar"
 	alert2.Labels["host"] = inst2
 	// add service labels
@@ -224,40 +255,40 @@ func TestGroupStart(t *testing.T) {
 	alert2.ID = hash(alert2.Labels)

 	finished := make(chan struct{})
-	fs.add(m1)
-	fs.add(m2)
+	fs.Add(m1)
+	fs.Add(m2)
 	go func() {
-		g.start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
+		g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
 		close(finished)
 	}()

 	// wait for multiple evals
 	time.Sleep(20 * evalInterval)

-	gotAlerts := fn.getAlerts()
+	gotAlerts := fn.GetAlerts()
 	expectedAlerts := []notifier.Alert{*alert1, *alert2}
 	compareAlerts(t, expectedAlerts, gotAlerts)

-	gotAlertsNum := fn.getCounter()
+	gotAlertsNum := fn.GetCounter()
 	if gotAlertsNum < len(expectedAlerts)*2 {
 		t.Fatalf("expected to receive at least %d alerts; got %d instead",
 			len(expectedAlerts)*2, gotAlertsNum)
 	}

 	// reset previous data
-	fs.reset()
+	fs.Reset()
 	// and set only one datapoint for response
-	fs.add(m1)
+	fs.Add(m1)

 	// wait for multiple evals
 	time.Sleep(20 * evalInterval)

-	gotAlerts = fn.getAlerts()
+	gotAlerts = fn.GetAlerts()
 	alert2.State = notifier.StateInactive
 	expectedAlerts = []notifier.Alert{*alert1, *alert2}
 	compareAlerts(t, expectedAlerts, gotAlerts)

-	g.close()
+	g.Close()
 	<-finished
 }

@@ -292,13 +323,13 @@ func TestGetStaleSeries(t *testing.T) {
 	e := &executor{
 		previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
 	}
-	f := func(rule Rule, labels, expLabels [][]prompbmarshal.Label) {
+	f := func(r Rule, labels, expLabels [][]prompbmarshal.Label) {
 		t.Helper()
 		var tss []prompbmarshal.TimeSeries
 		for _, l := range labels {
 			tss = append(tss, newTimeSeriesPB([]float64{1}, []int64{ts.Unix()}, l))
 		}
-		staleS := e.getStaleSeries(rule, tss, ts)
+		staleS := e.getStaleSeries(r, tss, ts)
 		if staleS == nil && expLabels == nil {
 			return
 		}
@@ -434,17 +465,17 @@ func TestPurgeStaleSeries(t *testing.T) {
 }

 func TestFaultyNotifier(t *testing.T) {
-	fq := &fakeQuerier{}
-	fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
+	fq := &datasource.FakeQuerier{}
+	fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))

 	r := newTestAlertingRule("instant", 0)
 	r.q = fq

-	fn := &fakeNotifier{}
+	fn := &notifier.FakeNotifier{}
 	e := &executor{
-		notifiers: func() []notifier.Notifier {
+		Notifiers: func() []notifier.Notifier {
 			return []notifier.Notifier{
-				&faultyNotifier{},
+				&notifier.FaultyNotifier{},
 				fn,
 			}
 		},
@@ -460,7 +491,7 @@ func TestFaultyNotifier(t *testing.T) {
 	tn := time.Now()
 	deadline := tn.Add(delay / 2)
 	for {
-		if fn.getCounter() > 0 {
+		if fn.GetCounter() > 0 {
 			return
 		}
 		if tn.After(deadline) {
@@ -473,17 +504,17 @@ func TestFaultyNotifier(t *testing.T) {
 }

 func TestFaultyRW(t *testing.T) {
-	fq := &fakeQuerier{}
-	fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
+	fq := &datasource.FakeQuerier{}
+	fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))

 	r := &RecordingRule{
 		Name:  "test",
-		state: newRuleState(10),
 		q:     fq,
+		state: &ruleState{entries: make([]StateEntry, 10)},
 	}

 	e := &executor{
-		rw:                       &remotewrite.Client{},
+		Rw:                       &remotewrite.Client{},
 		previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
 	}

@@ -494,23 +525,38 @@ func TestFaultyRW(t *testing.T) {
 }

 func TestCloseWithEvalInterruption(t *testing.T) {
-	groups, err := config.Parse([]string{"config/testdata/rules/rules1-good.rules"}, notifier.ValidateTemplates, true)
+	const (
+		rules = `
+  - name: groupTest
+    rules:
+      - alert: VMRows
+        for: 1ms
+        expr: vm_rows > 0
+        labels:
+          label: bar
+          host: "{{ $labels.instance }}"
+        annotations:
+          summary: "{{ $value }}"
+`
+	)
+	var groups []config.Group
+	err := yaml.Unmarshal([]byte(rules), &groups)
 	if err != nil {
 		t.Fatalf("failed to parse rules: %s", err)
 	}

 	const delay = time.Second * 2
-	fq := &fakeQuerierWithDelay{delay: delay}
+	fq := &datasource.FakeQuerierWithDelay{Delay: delay}

 	const evalInterval = time.Millisecond
-	g := newGroup(groups[0], fq, evalInterval, nil)
+	g := NewGroup(groups[0], fq, evalInterval, nil)

-	go g.start(context.Background(), nil, nil, nil)
+	go g.Start(context.Background(), nil, nil, nil)

 	time.Sleep(evalInterval * 20)

 	go func() {
-		g.close()
+		g.Close()
 	}()

 	deadline := time.Tick(delay / 2)
@@ -520,3 +566,225 @@ func TestCloseWithEvalInterruption(t *testing.T) {
 	case <-g.finishedCh:
 	}
 }
+
+func TestGroupStartDelay(t *testing.T) {
+	g := &Group{}
+	// interval of 5min and key generate a static delay of 30s
+	g.Interval = time.Minute * 5
+	key := uint64(math.MaxUint64 / 10)
+
+	f := func(atS, expS string) {
+		t.Helper()
+		at, err := time.Parse(time.RFC3339Nano, atS)
+		if err != nil {
+			t.Fatal(err)
+		}
+		expTS, err := time.Parse(time.RFC3339Nano, expS)
+		if err != nil {
+			t.Fatal(err)
+		}
+		delay := delayBeforeStart(at, key, g.Interval, g.EvalOffset)
+		gotStart := at.Add(delay)
+		if expTS != gotStart {
+			t.Errorf("expected to get %v; got %v instead", expTS, gotStart)
+		}
+	}
+
+	// test group without offset
+	f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:30.000+00:00")
+	f("2023-01-01T00:00:00.999+00:00", "2023-01-01T00:00:30.000+00:00")
+	f("2023-01-01T00:00:29.000+00:00", "2023-01-01T00:00:30.000+00:00")
+	f("2023-01-01T00:00:31.000+00:00", "2023-01-01T00:05:30.000+00:00")
+
+	// test group with offset smaller than above fixed randSleep,
+	// this way randSleep will always be enough
+	offset := 20 * time.Second
+	g.EvalOffset = &offset
+
+	f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:30.000+00:00")
+	f("2023-01-01T00:00:29.000+00:00", "2023-01-01T00:00:30.000+00:00")
+	f("2023-01-01T00:00:31.000+00:00", "2023-01-01T00:05:30.000+00:00")
+
+	// test group with offset bigger than above fixed randSleep,
+	// this way offset will be added to delay
+	offset = 3 * time.Minute
+	g.EvalOffset = &offset
+
+	f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:03:30.000+00:00")
+	f("2023-01-01T00:00:29.000+00:00", "2023-01-01T00:03:30.000+00:00")
+	f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:08:30.000+00:00")
+	f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:30.000+00:00")
+	f("2023-01-01T00:07:30.000+00:00", "2023-01-01T00:13:30.000+00:00")
+
+	offset = 10 * time.Minute
+	g.EvalOffset = &offset
+	// interval of 1h and key generate a static delay of 6m
+	g.Interval = time.Hour
+
+	f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:16:00.000+00:00")
+	f("2023-01-01T00:05:00.000+00:00", "2023-01-01T00:16:00.000+00:00")
+	f("2023-01-01T00:30:00.000+00:00", "2023-01-01T01:16:00.000+00:00")
+}
+
+func TestGetPrometheusReqTimestamp(t *testing.T) {
+	offset := 30 * time.Minute
+	evalDelay := 1 * time.Minute
+	disableAlign := false
+	testCases := []struct {
+		name            string
+		g               *Group
+		originTS, expTS string
+	}{
+		{
+			"with query align + default evalDelay",
+			&Group{
+				Interval: time.Hour,
+			},
+			"2023-08-28T11:11:00+00:00",
+			"2023-08-28T11:00:00+00:00",
+		},
+		{
+			"without query align + default evalDelay",
+			&Group{
+				Interval:      time.Hour,
+				evalAlignment: &disableAlign,
+			},
+			"2023-08-28T11:11:00+00:00",
+			"2023-08-28T11:10:30+00:00",
+		},
+		{
+			"with eval_offset, find previous offset point + default evalDelay",
+			&Group{
+				EvalOffset: &offset,
+				Interval:   time.Hour,
+			},
+			"2023-08-28T11:11:00+00:00",
+			"2023-08-28T10:30:00+00:00",
+		},
+		{
+			"with eval_offset + default evalDelay",
+			&Group{
+				EvalOffset: &offset,
+				Interval:   time.Hour,
+			},
+			"2023-08-28T11:41:00+00:00",
+			"2023-08-28T11:30:00+00:00",
+		},
+		{
+			"1h interval with eval_delay",
+			&Group{
+				EvalDelay: &evalDelay,
+				Interval:  time.Hour,
+			},
+			"2023-08-28T11:41:00+00:00",
+			"2023-08-28T11:00:00+00:00",
+		},
+		{
+			"1m interval with eval_delay",
+			&Group{
+				EvalDelay: &evalDelay,
+				Interval:  time.Minute,
+			},
+			"2023-08-28T11:41:13+00:00",
+			"2023-08-28T11:40:00+00:00",
+		},
+		{
+			"disable alignment with eval_delay",
+			&Group{
+				EvalDelay:     &evalDelay,
+				Interval:      time.Hour,
+				evalAlignment: &disableAlign,
+			},
+			"2023-08-28T11:41:00+00:00",
+			"2023-08-28T11:40:00+00:00",
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			originT, _ := time.Parse(time.RFC3339, tc.originTS)
+			expT, _ := time.Parse(time.RFC3339, tc.expTS)
+			gotTS := tc.g.adjustReqTimestamp(originT)
+			if !gotTS.Equal(expT) {
+				t.Fatalf("get wrong prometheus request timestamp, expect %s, got %s", expT, gotTS)
+			}
+		})
+	}
+}
+
+func TestRangeIterator(t *testing.T) {
+	testCases := []struct {
+		ri     rangeIterator
+		result [][2]time.Time
+	}{
+		{
+			ri: rangeIterator{
+				start: parseTime(t, "2021-01-01T12:00:00.000Z"),
+				end:   parseTime(t, "2021-01-01T12:30:00.000Z"),
+				step:  5 * time.Minute,
+			},
+			result: [][2]time.Time{
+				{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:05:00.000Z")},
+				{parseTime(t, "2021-01-01T12:05:00.000Z"), parseTime(t, "2021-01-01T12:10:00.000Z")},
+				{parseTime(t, "2021-01-01T12:10:00.000Z"), parseTime(t, "2021-01-01T12:15:00.000Z")},
+				{parseTime(t, "2021-01-01T12:15:00.000Z"), parseTime(t, "2021-01-01T12:20:00.000Z")},
+				{parseTime(t, "2021-01-01T12:20:00.000Z"), parseTime(t, "2021-01-01T12:25:00.000Z")},
+				{parseTime(t, "2021-01-01T12:25:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
+			},
+		},
+		{
+			ri: rangeIterator{
+				start: parseTime(t, "2021-01-01T12:00:00.000Z"),
+				end:   parseTime(t, "2021-01-01T12:30:00.000Z"),
+				step:  45 * time.Minute,
+			},
+			result: [][2]time.Time{
+				{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
+				{parseTime(t, "2021-01-01T12:30:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
+			},
+		},
+		{
+			ri: rangeIterator{
+				start: parseTime(t, "2021-01-01T12:00:12.000Z"),
+				end:   parseTime(t, "2021-01-01T12:00:17.000Z"),
+				step:  time.Second,
+			},
+			result: [][2]time.Time{
+				{parseTime(t, "2021-01-01T12:00:12.000Z"), parseTime(t, "2021-01-01T12:00:13.000Z")},
+				{parseTime(t, "2021-01-01T12:00:13.000Z"), parseTime(t, "2021-01-01T12:00:14.000Z")},
+				{parseTime(t, "2021-01-01T12:00:14.000Z"), parseTime(t, "2021-01-01T12:00:15.000Z")},
+				{parseTime(t, "2021-01-01T12:00:15.000Z"), parseTime(t, "2021-01-01T12:00:16.000Z")},
+				{parseTime(t, "2021-01-01T12:00:16.000Z"), parseTime(t, "2021-01-01T12:00:17.000Z")},
+			},
+		},
+	}
+
+	for i, tc := range testCases {
+		t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
+			var j int
+			for tc.ri.next() {
+				if len(tc.result) < j+1 {
+					t.Fatalf("unexpected result for iterator on step %d: %v - %v",
+						j, tc.ri.s, tc.ri.e)
+				}
+				s, e := tc.ri.s, tc.ri.e
+				expS, expE := tc.result[j][0], tc.result[j][1]
+				if s != expS {
+					t.Fatalf("expected to get start=%v; got %v", expS, s)
+				}
+				if e != expE {
+					t.Fatalf("expected to get end=%v; got %v", expE, e)
+				}
+				j++
+			}
+		})
+	}
+}
+
+func parseTime(t *testing.T, s string) time.Time {
+	t.Helper()
+	tt, err := time.Parse("2006-01-02T15:04:05.000Z", s)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return tt
+}
--- a/app/vmalert/rule/recording.go
+++ b/app/vmalert/rule/recording.go
@@ -1,4 +1,4 @@
-package main
+package rule

 import (
 	"context"
@@ -17,12 +17,14 @@ import (
 // to evaluate configured Expression and
 // return TimeSeries as result.
 type RecordingRule struct {
-	Type    config.Type
-	RuleID  uint64
-	Name    string
-	Expr    string
-	Labels  map[string]string
-	GroupID uint64
+	Type      config.Type
+	RuleID    uint64
+	Name      string
+	Expr      string
+	Labels    map[string]string
+	GroupID   uint64
+	GroupName string
+	File      string

 	q datasource.Querier

@@ -49,15 +51,18 @@ func (rr *RecordingRule) ID() uint64 {
 	return rr.RuleID
 }

-func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
+// NewRecordingRule creates a new RecordingRule
+func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
 	rr := &RecordingRule{
-		Type:    group.Type,
-		RuleID:  cfg.ID,
-		Name:    cfg.Record,
-		Expr:    cfg.Expr,
-		Labels:  cfg.Labels,
-		GroupID: group.ID(),
-		metrics: &recordingRuleMetrics{},
+		Type:      group.Type,
+		RuleID:    cfg.ID,
+		Name:      cfg.Record,
+		Expr:      cfg.Expr,
+		Labels:    cfg.Labels,
+		GroupID:   group.ID(),
+		GroupName: group.Name,
+		File:      group.File,
+		metrics:   &recordingRuleMetrics{},
 		q: qb.BuildWithParams(datasource.QuerierParams{
 			DataSourceType:     group.Type.String(),
 			EvaluationInterval: group.Interval,
@@ -66,17 +71,22 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
 		}),
 	}

+	entrySize := *ruleUpdateEntriesLimit
 	if cfg.UpdateEntriesLimit != nil {
-		rr.state = newRuleState(*cfg.UpdateEntriesLimit)
-	} else {
-		rr.state = newRuleState(*ruleUpdateEntriesLimit)
+		entrySize = *cfg.UpdateEntriesLimit
+	}
+	if entrySize < 1 {
+		entrySize = 1
+	}
+	rr.state = &ruleState{
+		entries: make([]StateEntry, entrySize),
 	}

-	labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
+	labels := fmt.Sprintf(`recording=%q, group=%q, file=%q, id="%d"`, rr.Name, group.Name, group.File, rr.ID())
 	rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
 		func() float64 {
 			e := rr.state.getLast()
-			if e.err == nil {
+			if e.Err == nil {
 				return 0
 			}
 			return 1
@@ -84,21 +94,21 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
 	rr.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_last_evaluation_samples{%s}`, labels),
 		func() float64 {
 			e := rr.state.getLast()
-			return float64(e.samples)
+			return float64(e.Samples)
 		})
 	return rr
 }

-// Close unregisters rule metrics
-func (rr *RecordingRule) Close() {
+// close unregisters rule metrics
+func (rr *RecordingRule) close() {
 	rr.metrics.errors.Unregister()
 	rr.metrics.samples.Unregister()
 }

-// ExecRange executes recording rule on the given time range similarly to Exec.
+// execRange executes recording rule on the given time range similarly to Exec.
 // It doesn't update internal states of the Rule and meant to be used just
 // to get time series for backfilling.
-func (rr *RecordingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
+func (rr *RecordingRule) execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
 	res, err := rr.q.QueryRange(ctx, rr.Expr, start, end)
 	if err != nil {
 		return nil, err
@@ -117,17 +127,17 @@ func (rr *RecordingRule) ExecRange(ctx context.Context, start, end time.Time) ([
 	return tss, nil
 }

-// Exec executes RecordingRule expression via the given Querier.
-func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
+// exec executes RecordingRule expression via the given Querier.
+func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
 	start := time.Now()
 	res, req, err := rr.q.Query(ctx, rr.Expr, ts)
-	curState := ruleStateEntry{
-		time:          start,
-		at:            ts,
-		duration:      time.Since(start),
-		samples:       len(res.Data),
-		seriesFetched: res.SeriesFetched,
-		curl:          requestToCurl(req),
+	curState := StateEntry{
+		Time:          start,
+		At:            ts,
+		Duration:      time.Since(start),
+		Samples:       len(res.Data),
+		SeriesFetched: res.SeriesFetched,
+		Curl:          requestToCurl(req),
 	}

 	defer func() {
@@ -135,15 +145,15 @@ func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]p
 	}()

 	if err != nil {
-		curState.err = fmt.Errorf("failed to execute query %q: %w", rr.Expr, err)
-		return nil, curState.err
+		curState.Err = fmt.Errorf("failed to execute query %q: %w", rr.Expr, err)
+		return nil, curState.Err
 	}

 	qMetrics := res.Data
 	numSeries := len(qMetrics)
 	if limit > 0 && numSeries > limit {
-		curState.err = fmt.Errorf("exec exceeded limit of %d with %d series", limit, numSeries)
-		return nil, curState.err
+		curState.Err = fmt.Errorf("exec exceeded limit of %d with %d series", limit, numSeries)
+		return nil, curState.Err
 	}

 	duplicates := make(map[string]struct{}, len(qMetrics))
@@ -152,8 +162,8 @@ func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]p
 		ts := rr.toTimeSeries(r)
 		key := stringifyLabels(ts)
 		if _, ok := duplicates[key]; ok {
-			curState.err = fmt.Errorf("original metric %v; resulting labels %q: %w", r, key, errDuplicate)
-			return nil, curState.err
+			curState.Err = fmt.Errorf("original metric %v; resulting labels %q: %w", r, key, errDuplicate)
+			return nil, curState.Err
 		}
 		duplicates[key] = struct{}{}
 		tss = append(tss, ts)
@@ -193,8 +203,8 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompbmarshal.TimeSer
 	return newTimeSeries(m.Values, m.Timestamps, labels)
 }

-// UpdateWith copies all significant fields.
-func (rr *RecordingRule) UpdateWith(r Rule) error {
+// updateWith copies all significant fields.
+func (rr *RecordingRule) updateWith(r Rule) error {
 	nr, ok := r.(*RecordingRule)
 	if !ok {
 		return fmt.Errorf("BUG: attempt to update recroding rule with wrong type %#v", r)
@@ -204,32 +214,3 @@ func (rr *RecordingRule) UpdateWith(r Rule) error {
 	rr.q = nr.q
 	return nil
 }
-
-// ToAPI returns Rule's representation in form
-// of APIRule
-func (rr *RecordingRule) ToAPI() APIRule {
-	lastState := rr.state.getLast()
-	r := APIRule{
-		Type:              "recording",
-		DatasourceType:    rr.Type.String(),
-		Name:              rr.Name,
-		Query:             rr.Expr,
-		Labels:            rr.Labels,
-		LastEvaluation:    lastState.time,
-		EvaluationTime:    lastState.duration.Seconds(),
-		Health:            "ok",
-		LastSamples:       lastState.samples,
-		LastSeriesFetched: lastState.seriesFetched,
-		MaxUpdates:        rr.state.size(),
-		Updates:           rr.state.getAll(),
-
-		// encode as strings to avoid rounding
-		ID:      fmt.Sprintf("%d", rr.ID()),
-		GroupID: fmt.Sprintf("%d", rr.GroupID),
-	}
-	if lastState.err != nil {
-		r.LastError = lastState.err.Error()
-		r.Health = "err"
-	}
-	return r
-}
--- a/app/vmalert/rule/recording_test.go
+++ b/app/vmalert/rule/recording_test.go
@@ -1,4 +1,4 @@
-package main
+package rule

 import (
 	"context"
@@ -56,10 +56,12 @@ func TestRecordingRule_Exec(t *testing.T) {
 				Name: "job:foo",
 				Labels: map[string]string{
 					"source": "test",
-				}},
+				},
+			},
 			[]datasource.Metric{
 				metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
-				metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar")},
+				metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar"),
+			},
 			[]prompbmarshal.TimeSeries{
 				newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
 					"__name__": "job:foo",
@@ -76,11 +78,11 @@ func TestRecordingRule_Exec(t *testing.T) {
 	}
 	for _, tc := range testCases {
 		t.Run(tc.rule.Name, func(t *testing.T) {
-			fq := &fakeQuerier{}
-			fq.add(tc.metrics...)
+			fq := &datasource.FakeQuerier{}
+			fq.Add(tc.metrics...)
 			tc.rule.q = fq
-			tc.rule.state = newRuleState(10)
-			tss, err := tc.rule.Exec(context.TODO(), time.Now(), 0)
+			tc.rule.state = &ruleState{entries: make([]StateEntry, 10)}
+			tss, err := tc.rule.exec(context.TODO(), time.Now(), 0)
 			if err != nil {
 				t.Fatalf("unexpected Exec err: %s", err)
 			}
@@ -141,7 +143,8 @@ func TestRecordingRule_ExecRange(t *testing.T) {
 			}},
 			[]datasource.Metric{
 				metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
-				metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar")},
+				metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar"),
+			},
 			[]prompbmarshal.TimeSeries{
 				newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
 					"__name__": "job:foo",
@@ -158,10 +161,10 @@ func TestRecordingRule_ExecRange(t *testing.T) {
 	}
 	for _, tc := range testCases {
 		t.Run(tc.rule.Name, func(t *testing.T) {
-			fq := &fakeQuerier{}
-			fq.add(tc.metrics...)
+			fq := &datasource.FakeQuerier{}
+			fq.Add(tc.metrics...)
 			tc.rule.q = fq
-			tss, err := tc.rule.ExecRange(context.TODO(), time.Now(), time.Now())
+			tss, err := tc.rule.execRange(context.TODO(), time.Now(), time.Now())
 			if err != nil {
 				t.Fatalf("unexpected Exec err: %s", err)
 			}
@@ -198,15 +201,15 @@ func TestRecordingRuleLimit(t *testing.T) {
 		metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"),
 		metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"),
 	}
-	rule := &RecordingRule{Name: "job:foo", state: newRuleState(10), Labels: map[string]string{
+	rule := &RecordingRule{Name: "job:foo", state: &ruleState{entries: make([]StateEntry, 10)}, Labels: map[string]string{
 		"source": "test_limit",
 	}}
 	var err error
 	for _, testCase := range testCases {
-		fq := &fakeQuerier{}
-		fq.add(testMetrics...)
+		fq := &datasource.FakeQuerier{}
+		fq.Add(testMetrics...)
 		rule.q = fq
-		_, err = rule.Exec(context.TODO(), timestamp, testCase.limit)
+		_, err = rule.exec(context.TODO(), timestamp, testCase.limit)
 		if err != nil && !strings.EqualFold(err.Error(), testCase.err) {
 			t.Fatal(err)
 		}
@@ -215,18 +218,17 @@ func TestRecordingRuleLimit(t *testing.T) {

 func TestRecordingRule_ExecNegative(t *testing.T) {
 	rr := &RecordingRule{
-		Name:  "job:foo",
-		state: newRuleState(10),
+		Name: "job:foo",
 		Labels: map[string]string{
 			"job": "test",
 		},
+		state: &ruleState{entries: make([]StateEntry, 10)},
 	}
-
-	fq := &fakeQuerier{}
+	fq := &datasource.FakeQuerier{}
 	expErr := "connection reset by peer"
-	fq.setErr(errors.New(expErr))
+	fq.SetErr(errors.New(expErr))
 	rr.q = fq
-	_, err := rr.Exec(context.TODO(), time.Now(), 0)
+	_, err := rr.exec(context.TODO(), time.Now(), 0)
 	if err == nil {
 		t.Fatalf("expected to get err; got nil")
 	}
@@ -234,14 +236,14 @@ func TestRecordingRule_ExecNegative(t *testing.T) {
 		t.Fatalf("expected to get err %q; got %q insterad", expErr, err)
 	}

-	fq.reset()
+	fq.Reset()

 	// add metrics which differs only by `job` label
 	// which will be overridden by rule
-	fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"))
-	fq.add(metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "bar"))
+	fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"))
+	fq.Add(metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "bar"))

-	_, err = rr.Exec(context.TODO(), time.Now(), 0)
+	_, err = rr.exec(context.TODO(), time.Now(), 0)
 	if err == nil {
 		t.Fatalf("expected to get err; got nil")
 	}
--- a/app/vmalert/rule/rule.go
+++ b/app/vmalert/rule/rule.go
@@ -0,0 +1,174 @@
+package rule
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
+)
+
+// Rule represents alerting or recording rule
+// that has unique ID, can be Executed and
+// updated with other Rule.
+type Rule interface {
+	// ID returns unique ID that may be used for
+	// identifying this Rule among others.
+	ID() uint64
+	// exec executes the rule with given context at the given timestamp and limit.
+	// returns an err if number of resulting time series exceeds the limit.
+	exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error)
+	// execRange executes the rule on the given time range.
+	execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error)
+	// updateWith performs modification of current Rule
+	// with fields of the given Rule.
+	updateWith(Rule) error
+	// close performs the shutdown procedures for rule
+	// such as metrics unregister
+	close()
+}
+
+var errDuplicate = errors.New("result contains metrics with the same labelset after applying rule labels. See https://docs.victoriametrics.com/vmalert.html#series-with-the-same-labelset for details")
+
+type ruleState struct {
+	sync.RWMutex
+	entries []StateEntry
+	cur     int
+}
+
+// StateEntry stores rule's execution states
+type StateEntry struct {
+	// stores last moment of time rule.Exec was called
+	Time time.Time `json:"time"`
+	// stores the timesteamp with which rule.Exec was called
+	At time.Time `json:"at"`
+	// stores the duration of the last rule.Exec call
+	Duration time.Duration `json:"duration"`
+	// stores last error that happened in Exec func
+	// resets on every successful Exec
+	// may be used as Health ruleState
+	Err error `json:"error"`
+	// stores the number of samples returned during
+	// the last evaluation
+	Samples int `json:"samples"`
+	// stores the number of time series fetched during
+	// the last evaluation.
+	// Is supported by VictoriaMetrics only, starting from v1.90.0
+	// If seriesFetched == nil, then this attribute was missing in
+	// datasource response (unsupported).
+	SeriesFetched *int `json:"series_fetched"`
+	// stores the curl command reflecting the HTTP request used during rule.Exec
+	Curl string `json:"curl"`
+}
+
+// GetLastEntry returns latest stateEntry of rule
+func GetLastEntry(r Rule) StateEntry {
+	if rule, ok := r.(*AlertingRule); ok {
+		return rule.state.getLast()
+	}
+	if rule, ok := r.(*RecordingRule); ok {
+		return rule.state.getLast()
+	}
+	return StateEntry{}
+}
+
+// GetRuleStateSize returns size of rule stateEntry
+func GetRuleStateSize(r Rule) int {
+	if rule, ok := r.(*AlertingRule); ok {
+		return rule.state.size()
+	}
+	if rule, ok := r.(*RecordingRule); ok {
+		return rule.state.size()
+	}
+	return 0
+}
+
+// GetAllRuleState returns rule entire stateEntries
+func GetAllRuleState(r Rule) []StateEntry {
+	if rule, ok := r.(*AlertingRule); ok {
+		return rule.state.getAll()
+	}
+	if rule, ok := r.(*RecordingRule); ok {
+		return rule.state.getAll()
+	}
+	return []StateEntry{}
+}
+
+func (s *ruleState) size() int {
+	s.RLock()
+	defer s.RUnlock()
+	return len(s.entries)
+}
+
+func (s *ruleState) getLast() StateEntry {
+	s.RLock()
+	defer s.RUnlock()
+	if len(s.entries) == 0 {
+		return StateEntry{}
+	}
+	return s.entries[s.cur]
+}
+
+func (s *ruleState) getAll() []StateEntry {
+	entries := make([]StateEntry, 0)
+
+	s.RLock()
+	defer s.RUnlock()
+
+	cur := s.cur
+	for {
+		e := s.entries[cur]
+		if !e.Time.IsZero() || !e.At.IsZero() {
+			entries = append(entries, e)
+		}
+		cur--
+		if cur < 0 {
+			cur = cap(s.entries) - 1
+		}
+		if cur == s.cur {
+			return entries
+		}
+	}
+}
+
+func (s *ruleState) add(e StateEntry) {
+	s.Lock()
+	defer s.Unlock()
+
+	s.cur++
+	if s.cur > cap(s.entries)-1 {
+		s.cur = 0
+	}
+	s.entries[s.cur] = e
+}
+
+func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
+	var err error
+	var tss []prompbmarshal.TimeSeries
+	for i := 0; i < replayRuleRetryAttempts; i++ {
+		tss, err = r.execRange(context.Background(), start, end)
+		if err == nil {
+			break
+		}
+		logger.Errorf("attempt %d to execute rule %q failed: %s", i+1, r, err)
+		time.Sleep(time.Second)
+	}
+	if err != nil { // means all attempts failed
+		return 0, err
+	}
+	if len(tss) < 1 {
+		return 0, nil
+	}
+	var n int
+	for _, ts := range tss {
+		if err := rw.Push(ts); err != nil {
+			return n, fmt.Errorf("remote write failure: %w", err)
+		}
+		n += len(ts.Samples)
+	}
+	return n, nil
+}
--- a/app/vmalert/rule/rule_test.go
+++ b/app/vmalert/rule/rule_test.go
@@ -0,0 +1,81 @@
+package rule
+
+import (
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestRule_state(t *testing.T) {
+	stateEntriesN := 20
+	r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, stateEntriesN)}}
+	e := r.state.getLast()
+	if !e.At.IsZero() {
+		t.Fatalf("expected entry to be zero")
+	}
+
+	now := time.Now()
+	r.state.add(StateEntry{At: now})
+
+	e = r.state.getLast()
+	if e.At != now {
+		t.Fatalf("expected entry at %v to be equal to %v",
+			e.At, now)
+	}
+
+	time.Sleep(time.Millisecond)
+	now2 := time.Now()
+	r.state.add(StateEntry{At: now2})
+
+	e = r.state.getLast()
+	if e.At != now2 {
+		t.Fatalf("expected entry at %v to be equal to %v",
+			e.At, now2)
+	}
+
+	if len(r.state.getAll()) != 2 {
+		t.Fatalf("expected for state to have 2 entries only; got %d",
+			len(r.state.getAll()),
+		)
+	}
+
+	var last time.Time
+	for i := 0; i < stateEntriesN*2; i++ {
+		last = time.Now()
+		r.state.add(StateEntry{At: last})
+	}
+
+	e = r.state.getLast()
+	if e.At != last {
+		t.Fatalf("expected entry at %v to be equal to %v",
+			e.At, last)
+	}
+
+	if len(r.state.getAll()) != stateEntriesN {
+		t.Fatalf("expected for state to have %d entries only; got %d",
+			stateEntriesN, len(r.state.getAll()),
+		)
+	}
+}
+
+// TestRule_stateConcurrent supposed to test concurrent
+// execution of state updates.
+// Should be executed with -race flag
+func TestRule_stateConcurrent(_ *testing.T) {
+	r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
+	const workers = 50
+	const iterations = 100
+	wg := sync.WaitGroup{}
+	wg.Add(workers)
+	for i := 0; i < workers; i++ {
+		go func() {
+			defer wg.Done()
+			for i := 0; i < iterations; i++ {
+				r.state.add(StateEntry{At: time.Now()})
+				r.state.getAll()
+				r.state.getLast()
+			}
+		}()
+	}
+	wg.Wait()
+}
--- a/app/vmalert/rule/test_helpers.go
+++ b/app/vmalert/rule/test_helpers.go
@@ -1,239 +1,18 @@
-package main
+package rule

 import (
-	"context"
 	"fmt"
-	"net/http"
 	"reflect"
 	"sort"
-	"sync"
 	"testing"
-	"time"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
 )

-type fakeQuerier struct {
-	sync.Mutex
-	metrics []datasource.Metric
-	err     error
-}
-
-func (fq *fakeQuerier) setErr(err error) {
-	fq.Lock()
-	fq.err = err
-	fq.Unlock()
-}
-
-func (fq *fakeQuerier) reset() {
-	fq.Lock()
-	fq.err = nil
-	fq.metrics = fq.metrics[:0]
-	fq.Unlock()
-}
-
-func (fq *fakeQuerier) add(metrics ...datasource.Metric) {
-	fq.Lock()
-	fq.metrics = append(fq.metrics, metrics...)
-	fq.Unlock()
-}
-
-func (fq *fakeQuerier) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
-	return fq
-}
-
-func (fq *fakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) (datasource.Result, error) {
-	req, _, err := fq.Query(ctx, q, time.Now())
-	return req, err
-}
-
-func (fq *fakeQuerier) Query(_ context.Context, _ string, _ time.Time) (datasource.Result, *http.Request, error) {
-	fq.Lock()
-	defer fq.Unlock()
-	if fq.err != nil {
-		return datasource.Result{}, nil, fq.err
-	}
-	cp := make([]datasource.Metric, len(fq.metrics))
-	copy(cp, fq.metrics)
-	req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
-	return datasource.Result{Data: cp}, req, nil
-}
-
-type fakeQuerierWithRegistry struct {
-	sync.Mutex
-	registry map[string][]datasource.Metric
-}
-
-func (fqr *fakeQuerierWithRegistry) set(key string, metrics ...datasource.Metric) {
-	fqr.Lock()
-	if fqr.registry == nil {
-		fqr.registry = make(map[string][]datasource.Metric)
-	}
-	fqr.registry[key] = metrics
-	fqr.Unlock()
-}
-
-func (fqr *fakeQuerierWithRegistry) reset() {
-	fqr.Lock()
-	fqr.registry = nil
-	fqr.Unlock()
-}
-
-func (fqr *fakeQuerierWithRegistry) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
-	return fqr
-}
-
-func (fqr *fakeQuerierWithRegistry) QueryRange(ctx context.Context, q string, _, _ time.Time) (datasource.Result, error) {
-	req, _, err := fqr.Query(ctx, q, time.Now())
-	return req, err
-}
-
-func (fqr *fakeQuerierWithRegistry) Query(_ context.Context, expr string, _ time.Time) (datasource.Result, *http.Request, error) {
-	fqr.Lock()
-	defer fqr.Unlock()
-
-	req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
-	metrics, ok := fqr.registry[expr]
-	if !ok {
-		return datasource.Result{}, req, nil
-	}
-	cp := make([]datasource.Metric, len(metrics))
-	copy(cp, metrics)
-	return datasource.Result{Data: cp}, req, nil
-}
-
-type fakeQuerierWithDelay struct {
-	fakeQuerier
-	delay time.Duration
-}
-
-func (fqd *fakeQuerierWithDelay) Query(ctx context.Context, expr string, ts time.Time) (datasource.Result, *http.Request, error) {
-	timer := time.NewTimer(fqd.delay)
-	select {
-	case <-ctx.Done():
-	case <-timer.C:
-	}
-	return fqd.fakeQuerier.Query(ctx, expr, ts)
-}
-
-func (fqd *fakeQuerierWithDelay) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
-	return fqd
-}
-
-type fakeNotifier struct {
-	sync.Mutex
-	alerts []notifier.Alert
-	// records number of received alerts in total
-	counter int
-}
-
-func (*fakeNotifier) Close()       {}
-func (*fakeNotifier) Addr() string { return "" }
-func (fn *fakeNotifier) Send(_ context.Context, alerts []notifier.Alert, _ map[string]string) error {
-	fn.Lock()
-	defer fn.Unlock()
-	fn.counter += len(alerts)
-	fn.alerts = alerts
-	return nil
-}
-
-func (fn *fakeNotifier) getCounter() int {
-	fn.Lock()
-	defer fn.Unlock()
-	return fn.counter
-}
-
-func (fn *fakeNotifier) getAlerts() []notifier.Alert {
-	fn.Lock()
-	defer fn.Unlock()
-	return fn.alerts
-}
-
-type faultyNotifier struct {
-	fakeNotifier
-}
-
-func (fn *faultyNotifier) Send(ctx context.Context, _ []notifier.Alert, _ map[string]string) error {
-	d, ok := ctx.Deadline()
-	if ok {
-		time.Sleep(time.Until(d))
-	}
-	return fmt.Errorf("send failed")
-}
-
-func metricWithValueAndLabels(t *testing.T, value float64, labels ...string) datasource.Metric {
-	return metricWithValuesAndLabels(t, []float64{value}, labels...)
-}
-
-func metricWithValuesAndLabels(t *testing.T, values []float64, labels ...string) datasource.Metric {
-	t.Helper()
-	m := metricWithLabels(t, labels...)
-	m.Values = values
-	for i := range values {
-		m.Timestamps = append(m.Timestamps, int64(i))
-	}
-	return m
-}
-
-func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
-	t.Helper()
-	if len(labels) == 0 || len(labels)%2 != 0 {
-		t.Fatalf("expected to get even number of labels")
-	}
-	m := datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}
-	for i := 0; i < len(labels); i += 2 {
-		m.Labels = append(m.Labels, datasource.Label{
-			Name:  labels[i],
-			Value: labels[i+1],
-		})
-	}
-	return m
-}
-
-func toPromLabels(t *testing.T, labels ...string) []prompbmarshal.Label {
-	t.Helper()
-	if len(labels) == 0 || len(labels)%2 != 0 {
-		t.Fatalf("expected to get even number of labels")
-	}
-	var ls []prompbmarshal.Label
-	for i := 0; i < len(labels); i += 2 {
-		ls = append(ls, prompbmarshal.Label{
-			Name:  labels[i],
-			Value: labels[i+1],
-		})
-	}
-	return ls
-}
-
-func compareGroups(t *testing.T, a, b *Group) {
-	t.Helper()
-	if a.Name != b.Name {
-		t.Fatalf("expected group name %q; got %q", a.Name, b.Name)
-	}
-	if a.File != b.File {
-		t.Fatalf("expected group %q file name %q; got %q", a.Name, a.File, b.File)
-	}
-	if a.Interval != b.Interval {
-		t.Fatalf("expected group %q interval %v; got %v", a.Name, a.Interval, b.Interval)
-	}
-	if len(a.Rules) != len(b.Rules) {
-		t.Fatalf("expected group %s to have %d rules; got: %d",
-			a.Name, len(a.Rules), len(b.Rules))
-	}
-	for i, r := range a.Rules {
-		got, want := r, b.Rules[i]
-		if a.ID() != b.ID() {
-			t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
-		}
-		if err := compareRules(t, want, got); err != nil {
-			t.Fatalf("comparison error: %s", err)
-		}
-	}
-}
-
-func compareRules(t *testing.T, a, b Rule) error {
+// CompareRules is a test helper func for other tests
+func CompareRules(t *testing.T, a, b Rule) error {
 	t.Helper()
 	switch v := a.(type) {
 	case *AlertingRule:
@@ -287,6 +66,50 @@ func compareAlertingRules(t *testing.T, a, b *AlertingRule) error {
 	return nil
 }

+func metricWithValueAndLabels(t *testing.T, value float64, labels ...string) datasource.Metric {
+	return metricWithValuesAndLabels(t, []float64{value}, labels...)
+}
+
+func metricWithValuesAndLabels(t *testing.T, values []float64, labels ...string) datasource.Metric {
+	t.Helper()
+	m := metricWithLabels(t, labels...)
+	m.Values = values
+	for i := range values {
+		m.Timestamps = append(m.Timestamps, int64(i))
+	}
+	return m
+}
+
+func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
+	t.Helper()
+	if len(labels) == 0 || len(labels)%2 != 0 {
+		t.Fatalf("expected to get even number of labels")
+	}
+	m := datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}
+	for i := 0; i < len(labels); i += 2 {
+		m.Labels = append(m.Labels, datasource.Label{
+			Name:  labels[i],
+			Value: labels[i+1],
+		})
+	}
+	return m
+}
+
+func toPromLabels(t *testing.T, labels ...string) []prompbmarshal.Label {
+	t.Helper()
+	if len(labels) == 0 || len(labels)%2 != 0 {
+		t.Fatalf("expected to get even number of labels")
+	}
+	var ls []prompbmarshal.Label
+	for i := 0; i < len(labels); i += 2 {
+		ls = append(ls, prompbmarshal.Label{
+			Name:  labels[i],
+			Value: labels[i+1],
+		})
+	}
+	return ls
+}
+
 func compareTimeSeries(t *testing.T, a, b []prompbmarshal.TimeSeries) error {
 	t.Helper()
 	if len(a) != len(b) {
--- a/app/vmalert/rule/utils.go
+++ b/app/vmalert/rule/utils.go
@@ -1,4 +1,4 @@
-package main
+package rule

 import (
 	"fmt"
@@ -7,6 +7,7 @@ import (
 	"strings"
 	"time"

+	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
 )

@@ -80,6 +81,9 @@ func requestToCurl(req *http.Request) string {

 	schema := req.URL.Scheme
 	requestURL := req.URL.String()
+	if !datasource.ShowDatasourceURL() {
+		requestURL = req.URL.Redacted()
+	}
 	if schema == "" {
 		schema = "http"
 		if req.TLS != nil {
@@ -103,9 +107,25 @@ func requestToCurl(req *http.Request) string {

 	for _, k := range keys {
 		cw.add("-H")
+		if !datasource.ShowDatasourceURL() && isSecreteHeader(k) {
+			cw.addWithEsc(fmt.Sprintf("%s: <secret>", k))
+			continue
+		}
 		cw.addWithEsc(fmt.Sprintf("%s: %s", k, strings.Join(req.Header[k], " ")))
 	}

 	cw.addWithEsc(requestURL)
 	return cw.string()
 }
+
+var secretWords = []string{"auth", "pass", "key", "secret", "token"}
+
+func isSecreteHeader(str string) bool {
+	s := strings.ToLower(str)
+	for _, secret := range secretWords {
+		if strings.Contains(s, secret) {
+			return true
+		}
+	}
+	return false
+}
--- a/app/vmalert/rule/utils_test.go
+++ b/app/vmalert/rule/utils_test.go
@@ -0,0 +1,75 @@
+package rule
+
+import (
+	"net/http"
+	"testing"
+)
+
+func TestRequestToCurl(t *testing.T) {
+	f := func(req *http.Request, exp string) {
+		t.Helper()
+		got := requestToCurl(req)
+		if got != exp {
+			t.Fatalf("expected to have %q; got %q instead", exp, got)
+		}
+	}
+
+	newReq := func(url string, queryParams ...string) *http.Request {
+		t.Helper()
+		r, err := http.NewRequest(http.MethodPost, url, nil)
+		if err != nil {
+			t.Fatal(err)
+		}
+		params := r.URL.Query()
+		for i := 0; i < len(queryParams); i += 2 {
+			params.Add(queryParams[i], queryParams[i+1])
+		}
+		r.URL.RawQuery = params.Encode()
+
+		return r
+	}
+
+	req := newReq("foo.com")
+	f(req, "curl -X POST 'http://foo.com'")
+
+	req = newReq("foo.com")
+	req.Header.Set("foo", "bar")
+	req.Header.Set("baz", "qux")
+	f(req, "curl -X POST -H 'Baz: qux' -H 'Foo: bar' 'http://foo.com'")
+
+	req = newReq("foo.com", "query", "up", "step", "10")
+	f(req, "curl -X POST 'http://foo.com?query=up&step=10'")
+
+	req = newReq("http://foo.com", "query", "up", "step", "10")
+	f(req, "curl -X POST 'http://foo.com?query=up&step=10'")
+
+	req = newReq("https://foo.com", "query", "up", "step", "10")
+	f(req, "curl -k -X POST 'https://foo.com?query=up&step=10'")
+
+	req = newReq("https://user:pass@foo.com", "query", "up", "step", "10")
+	f(req, "curl -k -X POST 'https://user:xxxxx@foo.com?query=up&step=10'")
+
+	req = newReq("https://user:pass@foo.com")
+	req.Header.Set("Authorisation", "Bearer 123456")
+	f(req, "curl -k -X POST -H 'Authorisation: <secret>' 'https://user:xxxxx@foo.com'")
+
+	req = newReq("https://user:pass@foo.com")
+	req.Header.Set("Authorisation", "Basic 123456")
+	f(req, "curl -k -X POST -H 'Authorisation: <secret>' 'https://user:xxxxx@foo.com'")
+
+	req = newReq("https://foo.com")
+	req.Header.Set("My-Password", "mypassword")
+	f(req, "curl -k -X POST -H 'My-Password: <secret>' 'https://foo.com'")
+
+	req = newReq("https://foo.com")
+	req.Header.Set("key-for", "my-new-key")
+	f(req, "curl -k -X POST -H 'Key-For: <secret>' 'https://foo.com'")
+
+	req = newReq("https://foo.com")
+	req.Header.Set("My-Secret-Org", "secret-organisation")
+	f(req, "curl -k -X POST -H 'My-Secret-Org: <secret>' 'https://foo.com'")
+
+	req = newReq("https://foo.com")
+	req.Header.Set("Token", "secret-token")
+	f(req, "curl -k -X POST -H 'Token: <secret>' 'https://foo.com'")
+}
--- a/app/vmalert/rule_test.go
+++ b/app/vmalert/rule_test.go
@@ -1,100 +0,0 @@
-package main
-
-import (
-	"sync"
-	"testing"
-	"time"
-)
-
-func TestRule_stateDisabled(t *testing.T) {
-	state := newRuleState(-1)
-	e := state.getLast()
-	if !e.at.IsZero() {
-		t.Fatalf("expected entry to be zero")
-	}
-
-	state.add(ruleStateEntry{at: time.Now()})
-	state.add(ruleStateEntry{at: time.Now()})
-	state.add(ruleStateEntry{at: time.Now()})
-
-	if len(state.getAll()) != 1 {
-		// state should store at least one update at any circumstances
-		t.Fatalf("expected for state to have %d entries; got %d",
-			1, len(state.getAll()),
-		)
-	}
-}
-func TestRule_state(t *testing.T) {
-	stateEntriesN := 20
-	state := newRuleState(stateEntriesN)
-	e := state.getLast()
-	if !e.at.IsZero() {
-		t.Fatalf("expected entry to be zero")
-	}
-
-	now := time.Now()
-	state.add(ruleStateEntry{at: now})
-
-	e = state.getLast()
-	if e.at != now {
-		t.Fatalf("expected entry at %v to be equal to %v",
-			e.at, now)
-	}
-
-	time.Sleep(time.Millisecond)
-	now2 := time.Now()
-	state.add(ruleStateEntry{at: now2})
-
-	e = state.getLast()
-	if e.at != now2 {
-		t.Fatalf("expected entry at %v to be equal to %v",
-			e.at, now2)
-	}
-
-	if len(state.getAll()) != 2 {
-		t.Fatalf("expected for state to have 2 entries only; got %d",
-			len(state.getAll()),
-		)
-	}
-
-	var last time.Time
-	for i := 0; i < stateEntriesN*2; i++ {
-		last = time.Now()
-		state.add(ruleStateEntry{at: last})
-	}
-
-	e = state.getLast()
-	if e.at != last {
-		t.Fatalf("expected entry at %v to be equal to %v",
-			e.at, last)
-	}
-
-	if len(state.getAll()) != stateEntriesN {
-		t.Fatalf("expected for state to have %d entries only; got %d",
-			stateEntriesN, len(state.getAll()),
-		)
-	}
-}
-
-// TestRule_stateConcurrent supposed to test concurrent
-// execution of state updates.
-// Should be executed with -race flag
-func TestRule_stateConcurrent(t *testing.T) {
-	state := newRuleState(20)
-
-	const workers = 50
-	const iterations = 100
-	wg := sync.WaitGroup{}
-	wg.Add(workers)
-	for i := 0; i < workers; i++ {
-		go func() {
-			defer wg.Done()
-			for i := 0; i < iterations; i++ {
-				state.add(ruleStateEntry{at: time.Now()})
-				state.getAll()
-				state.getLast()
-			}
-		}()
-	}
-	wg.Wait()
-}
--- a/Show More
+++ b/Show More