Compare commits

..

2 Commits

Author SHA1 Message Date
Zakhar Bessarab
b819157995 docs: add vmanomaly docs about licensing
Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
2023-08-11 20:35:36 +04:00
Zakhar Bessarab
783dc8fd88 wip
Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
2023-08-08 10:59:55 +04:00
1626 changed files with 27516 additions and 145002 deletions

View File

@@ -6,7 +6,7 @@ body:
attributes:
value: |
Before filling a bug report it would be great to [upgrade](https://docs.victoriametrics.com/#how-to-upgrade)
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
to [the latest available release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
and verify whether the bug is reproducible there.
It's also recommended to read the [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html) first.
- type: textarea

View File

@@ -17,7 +17,7 @@ jobs:
- name: Setup Go
uses: actions/setup-go@main
with:
go-version: 1.21.3
go-version: 1.20.7
id: go
- name: Code checkout
uses: actions/checkout@master

View File

@@ -33,7 +33,7 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v3
- name: Initialize CodeQL
uses: github/codeql-action/init@v2

View File

@@ -52,12 +52,12 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v3
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: 1.21.3
go-version: 1.20.7
check-latest: true
cache: true
if: ${{ matrix.language == 'go' }}

View File

@@ -27,12 +27,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v4
uses: actions/checkout@v3
- name: Setup Go
uses: actions/setup-go@v4
with:
go-version: 1.21.3
go-version: 1.20.7
check-latest: true
cache: true
@@ -51,12 +51,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v4
uses: actions/checkout@v3
- name: Setup Go
uses: actions/setup-go@v4
with:
go-version: 1.21.3
go-version: 1.20.7
check-latest: true
cache: true
@@ -75,13 +75,13 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v4
uses: actions/checkout@v3
- name: Setup Go
id: go
uses: actions/setup-go@v4
with:
go-version: 1.21.3
go-version: 1.20.7
check-latest: true
cache: true

View File

@@ -6,8 +6,6 @@ on:
paths:
- 'docs/**'
workflow_dispatch: {}
env:
PAGEFIND_VERSION: "1.0.3"
permissions:
contents: read # This is required for actions/checkout and to commit back image update
deployments: write
@@ -17,25 +15,16 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v4
uses: actions/checkout@v3
with:
path: main
- name: Checkout private code
uses: actions/checkout@v4
uses: actions/checkout@v3
with:
repository: VictoriaMetrics/vmdocs
token: ${{ secrets.VM_BOT_GH_TOKEN }}
path: docs
- uses: peaceiris/actions-hugo@v2
with:
hugo-version: 'latest'
extended: true
- name: Install PageFind #install the static search engine for index build
uses: supplypike/setup-bin@v3
with:
uri: "https://github.com/CloudCannon/pagefind/releases/download/v${{env.PAGEFIND_VERSION}}/pagefind-v${{env.PAGEFIND_VERSION}}-x86_64-unknown-linux-musl.tar.gz"
name: "pagefind"
version: ${{env.PAGEFIND_VERSION}}
- name: Import GPG key
uses: crazy-max/ghaction-import-gpg@v5
with:
@@ -56,7 +45,6 @@ jobs:
rm -rf content
cp -r ../main/docs content
make clean-after-copy
make build-search-index
git config --global user.name "${{ steps.import-gpg.outputs.email }}"
git config --global user.email "${{ steps.import-gpg.outputs.email }}"
git add .

80
.github/workflows/update-sandbox.yml vendored Normal file
View File

@@ -0,0 +1,80 @@
name: sandbox-release
on:
release:
types: [published]
permissions:
contents: write
jobs:
deploy-sandbox:
runs-on: ubuntu-latest
steps:
- name: check inputs
if: github.event.release.tag_name == ''
run: exit 1
- name: Check out code
uses: actions/checkout@v3
with:
repository: VictoriaMetrics/ops
token: ${{ secrets.VM_BOT_GH_TOKEN }}
- name: Import GPG key
id: import-gpg
uses: crazy-max/ghaction-import-gpg@v5
with:
gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }}
passphrase: ${{ secrets.VM_BOT_PASSPHRASE }}
git_user_signingkey: true
git_commit_gpgsign: true
- name: update image tag
uses: fjogeleit/yaml-update-action@main
with:
valueFile: 'gcp-test/sandbox/manifests/benchmark-vm/vmcluster.yaml'
commitChange: false
createPR: false
changes: |
{
"gcp-test/sandbox/manifests/benchmark-vm/vmcluster.yaml": {
"spec.vminsert.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster",
"spec.vmselect.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster",
"spec.vmstorage.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster"
},
"gcp-test/sandbox/manifests/benchmark-vm/vmsingle.yaml": {
"spec.image.tag": "${{ github.event.release.tag_name }}-enterprise"
},
"gcp-test/sandbox/manifests/monitoring/monitoring-vmagent.yaml": {
"spec.image.tag": "${{ github.event.release.tag_name }}"
},
"gcp-test/sandbox/manifests/monitoring/monitoring-vmcluster.yaml": {
"spec.vminsert.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster",
"spec.vmselect.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster",
"spec.vmstorage.image.tag": "${{ github.event.release.tag_name }}-enterprise-cluster"
},
"gcp-test/sandbox/manifests/monitoring/vmalert.yaml": {
"spec.image.tag": "${{ github.event.release.tag_name }}-enterprise"
}
}
- name: commit changes
run: |
git config --global user.name "${{ steps.import-gpg.outputs.email }}"
git config --global user.email "${{ steps.import-gpg.outputs.email }}"
git add .
git commit -S -m "Deploy image tag ${RELEASE_TAG} to sandbox"
env:
RELEASE_TAG: ${{ github.event.release.tag_name }}
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>
branch: release-automation
token: ${{ secrets.VM_BOT_GH_TOKEN }}
delete-branch: true
title: "release ${{ github.event.release.tag_name }}"
body: |
Release [${{ github.event.release.tag_name }}](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/${{ github.event.release.tag_name }}) to sandbox
> Auto-generated by `Github Actions Bot`

View File

@@ -16,7 +16,6 @@ GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TA
include app/*/Makefile
include deployment/*/Makefile
include dashboards/Makefile
include snap/local/Makefile
include package/release/Makefile
@@ -28,21 +27,20 @@ all: \
vmauth-prod \
vmbackup-prod \
vmrestore-prod \
vmctl-prod \
vmalert-tool-prod
vmctl-prod
clean:
rm -rf bin/*
publish: package-base \
publish-victoria-metrics \
publish-victoria-logs \
publish-vmagent \
publish-vmalert \
publish-vmauth \
publish-vmbackup \
publish-vmrestore \
publish-vmctl \
publish-vmalert-tool
publish-vmctl
package: \
package-victoria-metrics \
@@ -52,8 +50,7 @@ package: \
package-vmauth \
package-vmbackup \
package-vmrestore \
package-vmctl \
package-vmalert-tool
package-vmctl
vmutils: \
vmagent \
@@ -61,8 +58,7 @@ vmutils: \
vmauth \
vmbackup \
vmrestore \
vmctl \
vmalert-tool
vmctl
vmutils-pure: \
vmagent-pure \
@@ -70,8 +66,7 @@ vmutils-pure: \
vmauth-pure \
vmbackup-pure \
vmrestore-pure \
vmctl-pure \
vmalert-tool-pure
vmctl-pure
vmutils-linux-amd64: \
vmagent-linux-amd64 \
@@ -79,8 +74,7 @@ vmutils-linux-amd64: \
vmauth-linux-amd64 \
vmbackup-linux-amd64 \
vmrestore-linux-amd64 \
vmctl-linux-amd64 \
vmalert-tool-linux-amd64
vmctl-linux-amd64
vmutils-linux-arm64: \
vmagent-linux-arm64 \
@@ -88,8 +82,7 @@ vmutils-linux-arm64: \
vmauth-linux-arm64 \
vmbackup-linux-arm64 \
vmrestore-linux-arm64 \
vmctl-linux-arm64 \
vmalert-tool-linux-arm64
vmctl-linux-arm64
vmutils-linux-arm: \
vmagent-linux-arm \
@@ -97,8 +90,7 @@ vmutils-linux-arm: \
vmauth-linux-arm \
vmbackup-linux-arm \
vmrestore-linux-arm \
vmctl-linux-arm \
vmalert-tool-linux-arm
vmctl-linux-arm
vmutils-linux-386: \
vmagent-linux-386 \
@@ -106,8 +98,7 @@ vmutils-linux-386: \
vmauth-linux-386 \
vmbackup-linux-386 \
vmrestore-linux-386 \
vmctl-linux-386 \
vmalert-tool-linux-386
vmctl-linux-386
vmutils-linux-ppc64le: \
vmagent-linux-ppc64le \
@@ -115,8 +106,7 @@ vmutils-linux-ppc64le: \
vmauth-linux-ppc64le \
vmbackup-linux-ppc64le \
vmrestore-linux-ppc64le \
vmctl-linux-ppc64le \
vmalert-tool-linux-ppc64le
vmctl-linux-ppc64le
vmutils-darwin-amd64: \
vmagent-darwin-amd64 \
@@ -124,8 +114,7 @@ vmutils-darwin-amd64: \
vmauth-darwin-amd64 \
vmbackup-darwin-amd64 \
vmrestore-darwin-amd64 \
vmctl-darwin-amd64 \
vmalert-tool-darwin-amd64
vmctl-darwin-amd64
vmutils-darwin-arm64: \
vmagent-darwin-arm64 \
@@ -133,8 +122,7 @@ vmutils-darwin-arm64: \
vmauth-darwin-arm64 \
vmbackup-darwin-arm64 \
vmrestore-darwin-arm64 \
vmctl-darwin-arm64 \
vmalert-tool-darwin-arm64
vmctl-darwin-arm64
vmutils-freebsd-amd64: \
vmagent-freebsd-amd64 \
@@ -142,8 +130,7 @@ vmutils-freebsd-amd64: \
vmauth-freebsd-amd64 \
vmbackup-freebsd-amd64 \
vmrestore-freebsd-amd64 \
vmctl-freebsd-amd64 \
vmalert-tool-freebsd-amd64
vmctl-freebsd-amd64
vmutils-openbsd-amd64: \
vmagent-openbsd-amd64 \
@@ -151,8 +138,7 @@ vmutils-openbsd-amd64: \
vmauth-openbsd-amd64 \
vmbackup-openbsd-amd64 \
vmrestore-openbsd-amd64 \
vmctl-openbsd-amd64 \
vmalert-tool-openbsd-amd64
vmctl-openbsd-amd64
vmutils-windows-amd64: \
vmagent-windows-amd64 \
@@ -160,8 +146,7 @@ vmutils-windows-amd64: \
vmauth-windows-amd64 \
vmbackup-windows-amd64 \
vmrestore-windows-amd64 \
vmctl-windows-amd64 \
vmalert-tool-windows-amd64
vmctl-windows-amd64
victoria-metrics-crossbuild: \
victoria-metrics-linux-386 \
@@ -189,7 +174,6 @@ vmutils-crossbuild: \
vmutils-windows-amd64
publish-release:
rm -rf bin/*
git checkout $(TAG) && LATEST_TAG=stable $(MAKE) release publish && \
git checkout $(TAG)-cluster && LATEST_TAG=cluster-stable $(MAKE) release publish && \
git checkout $(TAG)-enterprise && LATEST_TAG=enterprise-stable $(MAKE) release publish && \
@@ -197,6 +181,7 @@ publish-release:
release: \
release-victoria-metrics \
release-victoria-logs \
release-vmutils
release-victoria-metrics: \
@@ -357,8 +342,7 @@ release-vmutils-goos-goarch: \
vmauth-$(GOOS)-$(GOARCH)-prod \
vmbackup-$(GOOS)-$(GOARCH)-prod \
vmrestore-$(GOOS)-$(GOARCH)-prod \
vmctl-$(GOOS)-$(GOARCH)-prod \
vmalert-tool-$(GOOS)-$(GOARCH)-prod
vmctl-$(GOOS)-$(GOARCH)-prod
cd bin && \
tar --transform="flags=r;s|-$(GOOS)-$(GOARCH)||" -czf vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
vmagent-$(GOOS)-$(GOARCH)-prod \
@@ -367,7 +351,6 @@ release-vmutils-goos-goarch: \
vmbackup-$(GOOS)-$(GOARCH)-prod \
vmrestore-$(GOOS)-$(GOARCH)-prod \
vmctl-$(GOOS)-$(GOARCH)-prod \
vmalert-tool-$(GOOS)-$(GOARCH)-prod
&& sha256sum vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG).tar.gz \
vmagent-$(GOOS)-$(GOARCH)-prod \
vmalert-$(GOOS)-$(GOARCH)-prod \
@@ -375,7 +358,6 @@ release-vmutils-goos-goarch: \
vmbackup-$(GOOS)-$(GOARCH)-prod \
vmrestore-$(GOOS)-$(GOARCH)-prod \
vmctl-$(GOOS)-$(GOARCH)-prod \
vmalert-tool-$(GOOS)-$(GOARCH)-prod \
| sed s/-$(GOOS)-$(GOARCH)-prod/-prod/ > vmutils-$(GOOS)-$(GOARCH)-$(PKG_TAG)_checksums.txt
cd bin && rm -rf \
vmagent-$(GOOS)-$(GOARCH)-prod \
@@ -383,8 +365,7 @@ release-vmutils-goos-goarch: \
vmauth-$(GOOS)-$(GOARCH)-prod \
vmbackup-$(GOOS)-$(GOARCH)-prod \
vmrestore-$(GOOS)-$(GOARCH)-prod \
vmctl-$(GOOS)-$(GOARCH)-prod \
vmalert-tool-$(GOOS)-$(GOARCH)-prod
vmctl-$(GOOS)-$(GOARCH)-prod
release-vmutils-windows-goarch: \
vmagent-windows-$(GOARCH)-prod \
@@ -392,8 +373,7 @@ release-vmutils-windows-goarch: \
vmauth-windows-$(GOARCH)-prod \
vmbackup-windows-$(GOARCH)-prod \
vmrestore-windows-$(GOARCH)-prod \
vmctl-windows-$(GOARCH)-prod \
vmalert-tool-windows-$(GOARCH)-prod
vmctl-windows-$(GOARCH)-prod
cd bin && \
zip vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
vmagent-windows-$(GOARCH)-prod.exe \
@@ -402,7 +382,6 @@ release-vmutils-windows-goarch: \
vmbackup-windows-$(GOARCH)-prod.exe \
vmrestore-windows-$(GOARCH)-prod.exe \
vmctl-windows-$(GOARCH)-prod.exe \
vmalert-tool-windows-$(GOARCH)-prod.exe \
&& sha256sum vmutils-windows-$(GOARCH)-$(PKG_TAG).zip \
vmagent-windows-$(GOARCH)-prod.exe \
vmalert-windows-$(GOARCH)-prod.exe \
@@ -410,7 +389,6 @@ release-vmutils-windows-goarch: \
vmbackup-windows-$(GOARCH)-prod.exe \
vmrestore-windows-$(GOARCH)-prod.exe \
vmctl-windows-$(GOARCH)-prod.exe \
vmalert-tool-windows-$(GOARCH)-prod.exe \
> vmutils-windows-$(GOARCH)-$(PKG_TAG)_checksums.txt
cd bin && rm -rf \
vmagent-windows-$(GOARCH)-prod.exe \
@@ -418,8 +396,7 @@ release-vmutils-windows-goarch: \
vmauth-windows-$(GOARCH)-prod.exe \
vmbackup-windows-$(GOARCH)-prod.exe \
vmrestore-windows-$(GOARCH)-prod.exe \
vmctl-windows-$(GOARCH)-prod.exe \
vmalert-tool-windows-$(GOARCH)-prod.exe
vmctl-windows-$(GOARCH)-prod.exe
pprof-cpu:
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
@@ -460,7 +437,7 @@ benchmark-pure:
vendor-update:
go get -u -d ./lib/...
go get -u -d ./app/...
go mod tidy -compat=1.20
go mod tidy -compat=1.19
go mod vendor
app-local:
@@ -486,7 +463,7 @@ golangci-lint: install-golangci-lint
golangci-lint run
install-golangci-lint:
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.54.2
which golangci-lint || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v1.51.2
govulncheck: install-govulncheck
govulncheck ./...
@@ -537,4 +514,3 @@ docs-sync:
SRC=app/vmctl/README.md DST=docs/vmctl.md OLD_URL='/vmctl.html' ORDER=8 TITLE=vmctl $(MAKE) copy-docs
SRC=app/vmgateway/README.md DST=docs/vmgateway.md OLD_URL='/vmgateway.html' ORDER=9 TITLE=vmgateway $(MAKE) copy-docs
SRC=app/vmbackupmanager/README.md DST=docs/vmbackupmanager.md OLD_URL='/vmbackupmanager.html' ORDER=10 TITLE=vmbackupmanager $(MAKE) copy-docs
SRC=app/vmalert-tool/README.md DST=docs/vmalert-tool.md OLD_URL='' ORDER=12 TITLE=vmalert-tool $(MAKE) copy-docs

341
README.md
View File

@@ -13,7 +13,7 @@
VictoriaMetrics is a fast, cost-effective and scalable monitoring solution and time series database.
VictoriaMetrics is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest),
VictoriaMetrics is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
[Docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/), [Snap packages](https://snapcraft.io/victoriametrics)
and [source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
@@ -29,8 +29,7 @@ If you have questions about VictoriaMetrics, then feel free asking them at [Vict
[Contact us](mailto:info@victoriametrics.com) if you need enterprise support for VictoriaMetrics.
See [features available in enterprise package](https://docs.victoriametrics.com/enterprise.html).
Enterprise binaries can be downloaded and evaluated for free
from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).
from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
VictoriaMetrics is developed at a fast pace, so it is recommended periodically checking the [CHANGELOG](https://docs.victoriametrics.com/CHANGELOG.html) and performing [regular upgrades](#how-to-upgrade-victoriametrics).
@@ -111,7 +110,6 @@ Case studies:
* [Brandwatch](https://docs.victoriametrics.com/CaseStudies.html#brandwatch)
* [CERN](https://docs.victoriametrics.com/CaseStudies.html#cern)
* [COLOPL](https://docs.victoriametrics.com/CaseStudies.html#colopl)
* [Criteo](https://docs.victoriametrics.com/CaseStudies.html#criteo)
* [Dig Security](https://docs.victoriametrics.com/CaseStudies.html#dig-security)
* [Fly.io](https://docs.victoriametrics.com/CaseStudies.html#flyio)
* [German Research Center for Artificial Intelligence](https://docs.victoriametrics.com/CaseStudies.html#german-research-center-for-artificial-intelligence)
@@ -137,8 +135,7 @@ See also [articles and slides about VictoriaMetrics from our users](https://docs
### Install
To quickly try VictoriaMetrics, just download [VictoriaMetrics executable](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
or [Docker image](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and start it with the desired command-line flags.
To quickly try VictoriaMetrics, just download [VictoriaMetrics executable](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or [Docker image](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and start it with the desired command-line flags.
See also [QuickStart guide](https://docs.victoriametrics.com/Quick-Start.html) for additional information.
VictoriaMetrics can also be installed via these installation methods:
@@ -155,7 +152,7 @@ VictoriaMetrics can also be installed via these installation methods:
The following command-line flags are used the most:
* `-storageDataPath` - VictoriaMetrics stores all the data in this directory. Default path is `victoria-metrics-data` in the current working directory.
* `-retentionPeriod` - retention for stored data. Older data is automatically deleted. Default retention is 1 month (31 days). The minimum retention period is 24h or 1d. See [these docs](#retention) for more details.
* `-retentionPeriod` - retention for stored data. Older data is automatically deleted. Default retention is 1 month. The minimum retention period is 24h or 1d. See [the Retention section](#retention) for more details.
Other flags have good enough default values, so set them only if you really need this. Pass `-help` to see [all the available flags with description and default values](#list-of-command-line-flags).
@@ -176,8 +173,7 @@ VictoriaMetrics is developed at a fast pace, so it is recommended periodically c
### Environment variables
All the VictoriaMetrics components allow referring environment variables in `yaml` configuration files (such as `-promscrape.config`)
and in command-line flags via `%{ENV_VAR}` syntax.
All the VictoriaMetrics components allow referring environment variables in command-line flags via `%{ENV_VAR}` syntax.
For example, `-metricsAuthKey=%{METRICS_AUTH_KEY}` is automatically expanded to `-metricsAuthKey=top-secret`
if `METRICS_AUTH_KEY=top-secret` environment variable exists at VictoriaMetrics startup.
This expansion is performed by VictoriaMetrics itself.
@@ -213,61 +209,6 @@ vi $SNAP_DATA/var/snap/victoriametrics/current/etc/victoriametrics-scrape-config
After changes were made, trigger config re-read with the command `curl 127.0.0.1:8428/-/reload`.
### Running as Windows service
In order to run VictoriaMetrics as a Windows service it is required to create a service configuration for [WinSW](https://github.com/winsw/winsw)
and then install it as a service according to the following guide:
1. Create a service configuration:
```xml
<service>
<id>VictoriaMetrics</id>
<name>VictoriaMetrics</name>
<description>VictoriaMetrics</description>
<executable>%BASE%\victoria-metrics-windows-amd64-prod.exe"</executable>
<onfailure action="restart" delay="10 sec"/>
<onfailure action="restart" delay="20 sec"/>
<resetfailure>1 hour</resetfailure>
<arguments>-envflag.enable</arguments>
<priority>Normal</priority>
<stoptimeout>15 sec</stoptimeout>
<stopparentprocessfirst>true</stopparentprocessfirst>
<startmode>Automatic</startmode>
<waithint>15 sec</waithint>
<sleeptime>1 sec</sleeptime>
<logpath>%BASE%\logs</logpath>
<log mode="roll">
<sizeThreshold>10240</sizeThreshold>
<keepFiles>8</keepFiles>
</log>
<env name="loggerFormat" value="json" />
<env name="loggerOutput" value="stderr" />
<env name="promscrape_config" value="C:\Program Files\victoria-metrics\promscrape.yml" />
</service>
```
1. Install WinSW by following this [documentation](https://github.com/winsw/winsw#download).
1. Install VictoriaMetrics as a service by running the following from elevated PowerShell:
```console
winsw install VictoriaMetrics.xml
Get-Service VictoriaMetrics | Start-Service
```
See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3781) for more details.
## Prometheus setup
Add the following lines to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`) in order to send data to VictoriaMetrics:
@@ -329,7 +270,7 @@ too high memory consumption of Prometheus, then try to lower `max_samples_per_se
Keep in mind that these two params are tightly connected.
Read more about tuning remote write for Prometheus [here](https://prometheus.io/docs/practices/remote_write).
It is recommended upgrading Prometheus to [v2.12.0](https://github.com/prometheus/prometheus/releases/latest) or newer,
It is recommended upgrading Prometheus to [v2.12.0](https://github.com/prometheus/prometheus/releases) or newer,
since previous versions may have issues with `remote_write`.
Take a look also at [vmagent](https://docs.victoriametrics.com/vmagent.html)
@@ -356,11 +297,9 @@ See more in [description](https://github.com/VictoriaMetrics/grafana-datasource#
VictoriaMetrics is developed at a fast pace, so it is recommended periodically checking [the CHANGELOG page](https://docs.victoriametrics.com/CHANGELOG.html) and performing regular upgrades.
It is safe upgrading VictoriaMetrics to new versions unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) say otherwise.
It is safe skipping multiple versions during the upgrade unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) say otherwise.
It is recommended performing regular upgrades to the latest version, since it may contain important bug fixes, performance optimizations or new features.
It is safe upgrading VictoriaMetrics to new versions unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) say otherwise. It is safe skipping multiple versions during the upgrade unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) say otherwise. It is recommended performing regular upgrades to the latest version, since it may contain important bug fixes, performance optimizations or new features.
It is also safe downgrading to older versions unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) say otherwise.
It is also safe downgrading to older versions unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) say otherwise.
The following steps must be performed during the upgrade / downgrade procedure:
@@ -424,8 +363,6 @@ See the [example VMUI at VictoriaMetrics playground](https://play.victoriametric
* queries with the biggest average execution duration;
* queries that took the most summary time for execution.
This information is obtained from the `/api/v1/status/top_queries` HTTP endpoint.
## Active queries
[VMUI](#vmui) provides `active queries` tab, which shows currently execute queries.
@@ -435,8 +372,6 @@ It provides the following information per each query:
- The duration of the query execution.
- The client address, who initiated the query execution.
This information is obtained from the `/api/v1/status/active_queries` HTTP endpoint.
## Metrics explorer
[VMUI](#vmui) provides an ability to explore metrics exported by a particular `job` / `instance` in the following way:
@@ -468,16 +403,14 @@ matching the specified [series selector](https://prometheus.io/docs/prometheus/l
Cardinality explorer is built on top of [/api/v1/status/tsdb](#tsdb-stats).
See [cardinality explorer playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/#/cardinality).
See the example of using the cardinality explorer [here](https://victoriametrics.com/blog/cardinality-explorer/).
## Cardinality explorer statistic inaccuracy
In [cluster version of VictoriaMetrics](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) each vmstorage tracks the stored time series individually.
vmselect requests stats via [/api/v1/status/tsdb](#tsdb-stats) API from each vmstorage node and merges the results by summing per-series stats.
This may lead to inflated values when samples for the same time series are spread across multiple vmstorage nodes
due to [replication](#replication) or [rerouting](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html?highlight=re-routes#cluster-availability).
See [cardinality explorer playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/#/cardinality).
See the example of using the cardinality explorer [here](https://victoriametrics.com/blog/cardinality-explorer/).
## How to apply new config to VictoriaMetrics
VictoriaMetrics is configured via command-line flags, so it must be restarted when new command-line flags should be applied:
@@ -539,7 +472,7 @@ dd_url: http://victoriametrics:8428/datadog
</div>
vmagent also can accept Datadog metrics format. Depending on where vmagent will forward data,
pick [single-node or cluster URL](https://docs.victoriametrics.com/url-examples.html#datadog) formats.
pick [single-node or cluster URL]((https://docs.victoriametrics.com/url-examples.html#datadog)) formats.
### Sending metrics to Datadog and VictoriaMetrics
@@ -682,28 +615,6 @@ Some plugins for Telegraf such as [fluentd](https://github.com/fangli/fluent-plu
or [Juniper/jitmon](https://github.com/Juniper/jtimon) send `SHOW DATABASES` query to `/query` and expect a particular database name in the response.
Comma-separated list of expected databases can be passed to VictoriaMetrics via `-influx.databaseNames` command-line flag.
### How to send data in InfluxDB v2 format
VictoriaMetrics exposes endpoint for InfluxDB v2 HTTP API at `/influx/api/v2/write` and `/api/v2/write`.
In order to write data with InfluxDB line protocol to local VictoriaMetrics using `curl`:
<div class="with-copy" markdown="1">
```console
curl -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST 'http://localhost:8428/api/v2/write'
```
</div>
The `/api/v1/export` endpoint should return the following response:
```json
{"metric":{"__name__":"measurement_field1","tag1":"value1","tag2":"value2"},"values":[123],"timestamps":[1695902762311]}
{"metric":{"__name__":"measurement_field2","tag1":"value1","tag2":"value2"},"values":[1.23],"timestamps":[1695902762311]}
```
## How to send data from Graphite-compatible agents such as [StatsD](https://github.com/etsy/statsd)
Enable Graphite receiver in VictoriaMetrics by setting `-graphiteListenAddr` command line flag. For instance,
@@ -854,79 +765,6 @@ The `/api/v1/export` endpoint should return the following response:
Extra labels may be added to all the imported time series by passing `extra_label=name=value` query args.
For example, `/api/put?extra_label=foo=bar` would add `{foo="bar"}` label to all the ingested metrics.
## How to send data from NewRelic agent
VictoriaMetrics accepts data from [NewRelic infrastructure agent](https://docs.newrelic.com/docs/infrastructure/install-infrastructure-agent)
at `/newrelic/infra/v2/metrics/events/bulk` HTTP path.
VictoriaMetrics receives [Events](https://docs.newrelic.com/docs/infrastructure/manage-your-data/data-instrumentation/default-infrastructure-monitoring-data/#infrastructure-events)
from NewRelic agent at the given path, transforms them to [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples)
according to [these docs](#newrelic-agent-data-mapping) before storing the raw samples to the database.
You need passing `COLLECTOR_URL` and `NRIA_LICENSE_KEY` environment variables to NewRelic infrastructure agent in order to send the collected metrics to VictoriaMetrics.
The `COLLECTOR_URL` must point to `/newrelic` HTTP endpoint at VictoriaMetrics, while the `NRIA_LICENSE_KEY` must contain NewRelic license key,
which can be obtained [here](https://newrelic.com/signup).
For example, if VictoriaMetrics runs at `localhost:8428`, then the following command can be used for running NewRelic infrastructure agent:
```console
COLLECTOR_URL="http://localhost:8428/newrelic" NRIA_LICENSE_KEY="NEWRELIC_LICENSE_KEY" ./newrelic-infra
```
### NewRelic agent data mapping
VictoriaMetrics maps [NewRelic Events](https://docs.newrelic.com/docs/infrastructure/manage-your-data/data-instrumentation/default-infrastructure-monitoring-data/#infrastructure-events)
to [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples) in the following way:
1. Every numeric field is converted into a raw sample with the corresponding name.
1. The `eventType` and all the other fields with `string` value type are attached to every raw sample as [metric labels](https://docs.victoriametrics.com/keyConcepts.html#labels).
1. The `timestamp` field is used as timestamp for the ingested [raw sample](https://docs.victoriametrics.com/keyConcepts.html#raw-samples).
The `timestamp` field may be specified either in seconds or in milliseconds since the [Unix Epoch](https://en.wikipedia.org/wiki/Unix_time).
If the `timestamp` field is missing, then the raw sample is stored with the current timestamp.
For example, let's import the following NewRelic Events request to VictoriaMetrics:
```json
[
{
"Events":[
{
"eventType":"SystemSample",
"entityKey":"macbook-pro.local",
"cpuPercent":25.056660790748904,
"cpuUserPercent":8.687987912389374,
"cpuSystemPercent":16.36867287835953,
"cpuIOWaitPercent":0,
"cpuIdlePercent":74.94333920925109,
"cpuStealPercent":0,
"loadAverageOneMinute":5.42333984375,
"loadAverageFiveMinute":4.099609375,
"loadAverageFifteenMinute":3.58203125
}
]
}
]
```
Save this JSON into `newrelic.json` file and then use the following command in order to import it into VictoriaMetrics:
```console
curl -X POST -H 'Content-Type: application/json' --data-binary @newrelic.json http://localhost:8428/newrelic/infra/v2/metrics/events/bulk
```
Let's fetch the ingested data via [data export API](#how-to-export-data-in-json-line-format):
```console
curl http://localhost:8428/api/v1/export -d 'match={eventType="SystemSample"}'
{"metric":{"__name__":"cpuStealPercent","entityKey":"macbook-pro.local","eventType":"SystemSample"},"values":[0],"timestamps":[1697407970000]}
{"metric":{"__name__":"loadAverageFiveMinute","entityKey":"macbook-pro.local","eventType":"SystemSample"},"values":[4.099609375],"timestamps":[1697407970000]}
{"metric":{"__name__":"cpuIOWaitPercent","entityKey":"macbook-pro.local","eventType":"SystemSample"},"values":[0],"timestamps":[1697407970000]}
{"metric":{"__name__":"cpuSystemPercent","entityKey":"macbook-pro.local","eventType":"SystemSample"},"values":[16.368672878359],"timestamps":[1697407970000]}
{"metric":{"__name__":"loadAverageOneMinute","entityKey":"macbook-pro.local","eventType":"SystemSample"},"values":[5.42333984375],"timestamps":[1697407970000]}
{"metric":{"__name__":"cpuUserPercent","entityKey":"macbook-pro.local","eventType":"SystemSample"},"values":[8.687987912389],"timestamps":[1697407970000]}
{"metric":{"__name__":"cpuIdlePercent","entityKey":"macbook-pro.local","eventType":"SystemSample"},"values":[74.9433392092],"timestamps":[1697407970000]}
{"metric":{"__name__":"loadAverageFifteenMinute","entityKey":"macbook-pro.local","eventType":"SystemSample"},"values":[3.58203125],"timestamps":[1697407970000]}
{"metric":{"__name__":"cpuPercent","entityKey":"macbook-pro.local","eventType":"SystemSample"},"values":[25.056660790748],"timestamps":[1697407970000]}
```
## Prometheus querying API usage
VictoriaMetrics supports the following handlers from [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/):
@@ -991,7 +829,7 @@ Additionally, VictoriaMetrics provides the following handlers:
* `/api/v1/series/count` - returns the total number of time series in the database. Some notes:
* the handler scans all the inverted index, so it can be slow if the database contains tens of millions of time series;
* the handler may count [deleted time series](#how-to-delete-time-series) additionally to normal time series due to internal implementation restrictions;
* `/api/v1/status/active_queries` - returns the list of currently running queries. This list is also available at [`active queries` page at VMUI](#active-queries).
* `/api/v1/status/active_queries` - returns a list of currently running queries.
* `/api/v1/status/top_queries` - returns the following query lists:
* the most frequently executed queries - `topByCount`
* queries with the biggest average execution duration - `topByAvgDuration`
@@ -1001,8 +839,6 @@ Additionally, VictoriaMetrics provides the following handlers:
For example, request to `/api/v1/status/top_queries?topN=5&maxLifetime=30s` would return up to 5 queries per list, which were executed during the last 30 seconds.
VictoriaMetrics tracks the last `-search.queryStats.lastQueriesCount` queries with durations at least `-search.queryStats.minQueryDuration`.
See also [`top queries` page at VMUI](#top-queries).
### Timestamp formats
VictoriaMetrics accepts the following formats for `time`, `start` and `end` query args
@@ -1071,14 +907,14 @@ VictoriaMetrics supports the following handlers from [Graphite Tags API](https:/
## How to build from sources
We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) or
We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
[docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/) instead of building VictoriaMetrics
from sources. Building from sources is reasonable when developing additional features specific
to your needs or when testing bugfixes.
### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
1. Run `make victoria-metrics` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `victoria-metrics` binary and puts it into the `bin` folder.
@@ -1094,7 +930,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
### Development ARM build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
1. Run `make victoria-metrics-linux-arm` or `make victoria-metrics-linux-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `victoria-metrics-linux-arm` or `victoria-metrics-linux-arm64` binary respectively and puts it into the `bin` folder.
@@ -1108,7 +944,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
`Pure Go` mode builds only Go code without [cgo](https://golang.org/cmd/cgo/) dependencies.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
1. Run `make victoria-metrics-pure` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `victoria-metrics-pure` binary and puts it into the `bin` folder.
@@ -1229,9 +1065,7 @@ VictoriaMetrics provides the following handlers for exporting data:
Send a request to `http://<victoriametrics-addr>:8428/api/v1/export?match[]=<timeseries_selector_for_export>`,
where `<timeseries_selector_for_export>` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
for metrics to export. Use `{__name__!=""}` selector for fetching all the time series.
The response would contain all the data for the selected time series in JSON line format - see [these docs](#json-line-format) for details on this format.
The response would contain all the data for the selected time series in [JSON streaming format](http://ndjson.org/).
Each JSON line contains samples for a single time series. An example output:
```json
@@ -1361,8 +1195,6 @@ check for changes in `vm_rows_invalid_total` (exported by server side) metric.
### How to import data in JSON line format
VictoriaMetrics accepts metrics data in JSON line format at `/api/v1/import` endpoint. See [these docs](#json-line-format) for details on this format.
Example for importing data obtained via [/api/v1/export](#how-to-export-data-in-json-line-format):
```console
@@ -1527,54 +1359,13 @@ Note that it could be required to flush response cache after importing historica
VictoriaMetrics also may scrape Prometheus targets - see [these docs](#how-to-scrape-prometheus-exporters-such-as-node-exporter).
### Sending data via OpenTelemetry
## Sending data via OpenTelemetry
VictoriaMetrics supports data ingestion via [OpenTelemetry protocol for metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/ffddc289462dfe0c2041e3ca42a7b1df805706de/specification/metrics/data-model.md) at `/opentelemetry/api/v1/push` path.
VictoriaMetrics expects `protobuf`-encoded requests at `/opentelemetry/api/v1/push`.
Set HTTP request header `Content-Encoding: gzip` when sending gzip-compressed data to `/opentelemetry/api/v1/push`.
## JSON line format
VictoriaMetrics accepts data in JSON line format at [/api/v1/import](#how-to-import-data-in-json-line-format)
and exports data in this format at [/api/v1/export](#how-to-export-data-in-json-line-format).
The format follows [JSON streaming concept](http://ndjson.org/), e.g. each line contains JSON object with metrics data in the following format:
```
{
// metric contans metric name plus labels for a particular time series
"metric":{
"__name__": "metric_name", // <- this is metric name
// Other labels for the time series
"label1": "value1",
"label2": "value2",
...
"labelN": "valueN"
},
// values contains raw sample values for the given time series
"values": [1, 2.345, -678],
// timestamps contains raw sample UNIX timestamps in milliseconds for the given time series
// every timestamp is associated with the value at the corresponding position
"timestamps": [1549891472010,1549891487724,1549891503438]
}
```
Note that every JSON object must be written in a single line, e.g. all the newline chars must be removed from it.
Every line length is limited by the value passed to `-import.maxLineLen` command-line flag (by default this is 100MB).
It is recommended passing 1K-10K samples per line for achieving the maximum data ingestion performance at [/api/v1/import](#how-to-import-data-in-json-line-format).
Too long JSON lines may increase RAM usage at VictoriaMetrics side.
It is OK to split [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples)
for the same [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series) across multiple lines.
The number of lines in JSON line document can be arbitrary.
## Relabeling
VictoriaMetrics supports Prometheus-compatible relabeling for all the ingested metrics if `-relabelConfig` command-line flag points
@@ -1818,8 +1609,8 @@ See also [how to work with snapshots](#how-to-work-with-snapshots).
## Retention
Retention is configured with the `-retentionPeriod` command-line flag, which takes a number followed by a time unit
character - `h(ours)`, `d(ays)`, `w(eeks)`, `y(ears)`. If the time unit is not specified, a month (31 days) is assumed.
For instance, `-retentionPeriod=3` means that the data will be stored for 3 months (93 days) and then deleted.
character - `h(ours)`, `d(ays)`, `w(eeks)`, `y(ears)`. If the time unit is not specified, a month is assumed.
For instance, `-retentionPeriod=3` means that the data will be stored for 3 months and then deleted.
The default retention period is one month. The **minimum retention** period is 24h or 1d.
Data is split in per-month partitions inside `<-storageDataPath>/data/{small,big}` folders.
@@ -1862,10 +1653,9 @@ See [these docs](https://docs.victoriametrics.com/guides/guide-vmcluster-multipl
which allow configuring multiple retentions for distinct sets of time series matching the configured [series filters](https://docs.victoriametrics.com/keyConcepts.html#filtering)
via `-retentionFilter` command-line flag. This flag accepts `filter:duration` options, where `filter` must be
a valid [series filter](https://docs.victoriametrics.com/keyConcepts.html#filtering), while the `duration`
must contain valid [retention](#retention) for time series matching the given `filter`.
The `duration` of the `-retentionFilter` must be lower or equal to [-retentionPeriod](#retention) flag value.
If series doesn't match any configured `-retentionFilter`, then the retention configured via [-retentionPeriod](#retention)
command-line flag is applied to it. If series matches multiple configured retention filters, then the smallest retention is applied.
must contain valid [retention](#retention) for time series matching the given `filter`. If series doesn't match
any configured `-retentionFilter`, then the retention configured via [-retentionPeriod](#retention) command-line flag is applied to it.
If series matches multiple configured retention filters, then the smallest retention is applied.
For example, the following config sets 3 days retention for time series with `team="juniors"` label,
30 days retention for time series with `env="dev"` or `env="staging"` label and 1 year retention for the remaining time series:
@@ -1886,8 +1676,7 @@ to historical data.
See [how to configure multiple retentions in VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#retention-filters).
Retention filters can be evaluated for free by downloading and using enterprise binaries from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).
Retention filters can be evaluated for free by downloading and using enterprise binaries from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
## Downsampling
@@ -1904,12 +1693,7 @@ Downsampling happens during [background merges](https://docs.victoriametrics.com
and can't be performed if there is not enough of free disk space or if vmstorage
is in [read-only mode](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#readonly-mode).
Please, note that intervals of `-downsampling.period` must be multiples of each other.
In case [deduplication](https://docs.victoriametrics.com/#deduplication) is enabled value of `-dedup.minScrapeInterval` must also be multiple of `-downsampling.period` intervals.
This is required to ensure consistency of deduplication and downsampling results.
The downsampling can be evaluated for free by downloading and using enterprise binaries from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).
The downsampling can be evaluated for free by downloading and using enterprise binaries from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
## Multi-tenancy
@@ -1972,7 +1756,7 @@ and [the general security page at VictoriaMetrics website](https://victoriametri
The only option is increasing the limit on [the number of open files in the OS](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a).
The recommendation is not specific for VictoriaMetrics only but also for any service which handles many HTTP connections and stores data on disk.
* VictoriaMetrics is a write-heavy application and its performance depends on disk performance. So be careful with other
applications or utilities (like [fstrim](https://manpages.ubuntu.com/manpages/lunar/en/man8/fstrim.8.html))
applications or utilities (like [fstrim](http://manpages.ubuntu.com/manpages/bionic/man8/fstrim.8.html))
which could [exhaust disk resources](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1521).
* The recommended filesystem is `ext4`, the recommended persistent storage is [persistent HDD-based disk on GCP](https://cloud.google.com/compute/docs/disks/#pdspecs),
since it is protected from hardware failures via internal replication and it can be [resized on the fly](https://cloud.google.com/compute/docs/disks/add-persistent-disk#resize_pd).
@@ -2001,9 +1785,9 @@ Graphs on the dashboards contain useful hints - hover the `i` icon in the top le
We recommend setting up [alerts](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#alerts)
via [vmalert](https://docs.victoriametrics.com/vmalert.html) or via Prometheus.
VictoriaMetrics exposes currently running queries and their execution times at [`active queries` page](#active-queries).
VictoriaMetrics exposes currently running queries and their execution times at `/api/v1/status/active_queries` page.
VictoriaMetrics exposes queries, which take the most time to execute, at [`top queries` page](#top-queries).
VictoriaMetrics exposes queries, which take the most time to execute, at `/api/v1/status/top_queries` page.
See also [VictoriaMetrics Monitoring](https://victoriametrics.com/blog/victoriametrics-monitoring/)
and [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html).
@@ -2138,7 +1922,7 @@ and [cardinality explorer docs](#cardinality-explorer).
* It is recommended inspecting logs during troubleshooting, since they may contain useful information.
* It is recommended upgrading to the latest available release from [this page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest),
* It is recommended upgrading to the latest available release from [this page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
since the encountered issue could be already fixed there.
* It is recommended to have at least 50% of spare resources for CPU, disk IO and RAM, so VictoriaMetrics could handle short spikes in the workload without performance issues.
@@ -2148,11 +1932,9 @@ and [cardinality explorer docs](#cardinality-explorer).
has at least 20% of free space. The remaining amount of free space
can be [monitored](#monitoring) via `vm_free_disk_space_bytes` metric. The total size of data
stored on the disk can be monitored via sum of `vm_data_size_bytes` metrics.
* If you run VictoriaMetrics on a host with 16 or more CPU cores, then it may be needed to tune the `-search.maxWorkersPerQuery` command-line flag
in order to improve query performance. If VictoriaMetrics serves big number of concurrent `select` queries, then try reducing the value for this flag.
If VcitoriaMetrics serves heavy queries, which select `>10K` of [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series) and/or process `>100M`
of [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples) per query, then try setting the value for this flag to the number of available CPU cores.
See also `vm_merge_need_free_disk_space` metrics, which are set to values higher than 0
if background merge cannot be initiated due to free disk space shortage. The value shows the number of per-month partitions,
which would start background merge if they had more free disk space.
* VictoriaMetrics buffers incoming data in memory for up to a few seconds before flushing it to persistent storage.
This may lead to the following "issues":
@@ -2190,19 +1972,15 @@ and [cardinality explorer docs](#cardinality-explorer).
This suppresses default gap filling algorithm used by VictoriaMetrics - by default it assumes
each time series is continuous instead of discrete, so it fills gaps between real samples with regular intervals.
* Metrics and labels leading to [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)
or [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) can be determined
via [cardinality explorer](#cardinality-explorer) and via [/api/v1/status/tsdb](#tsdb-stats) endpoint.
* Metrics and labels leading to [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) or [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) can be determined via [cardinality explorer](#cardinality-explorer) and via [/api/v1/status/tsdb](#tsdb-stats) endpoint.
* New time series can be logged if `-logNewSeries` command-line flag is passed to VictoriaMetrics.
* VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag
and drops superflouos labels. This prevents from ingesting metrics with too many labels.
It is recommended [monitoring](#monitoring) `vm_metrics_with_dropped_labels_total`
* VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.
This prevents from ingesting metrics with too many labels. It is recommended [monitoring](#monitoring) `vm_metrics_with_dropped_labels_total`
metric in order to determine whether `-maxLabelsPerTimeseries` must be adjusted for your workload.
* If you store Graphite metrics like `foo.bar.baz` in VictoriaMetrics, then `{__graphite__="foo.*.baz"}` filter can be used for selecting such metrics.
See [these docs](#selecting-graphite-metrics) for details. You can also query Graphite metrics with [Graphite querying API](#graphite-render-api-usage).
* If you store Graphite metrics like `foo.bar.baz` in VictoriaMetrics, then `{__graphite__="foo.*.baz"}` filter can be used for selecting such metrics. See [these docs](#selecting-graphite-metrics) for details.
* VictoriaMetrics ignores `NaN` values during data ingestion.
@@ -2344,8 +2122,7 @@ See also [high availability docs](#high-availability) and [backup docs](#backups
VictoriaMetrics supports backups via [vmbackup](https://docs.victoriametrics.com/vmbackup.html)
and [vmrestore](https://docs.victoriametrics.com/vmrestore.html) tools.
We also provide [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager.html) tool for enterprise subscribers.
Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).
Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
## vmalert
@@ -2420,14 +2197,12 @@ Contact us with any questions regarding VictoriaMetrics at [info@victoriametrics
Feel free asking any questions regarding VictoriaMetrics:
* [Slack](https://slack.victoriametrics.com/)
* [Twitter](https://twitter.com/VictoriaMetrics/)
* [Linkedin](https://www.linkedin.com/company/victoriametrics/)
* [Reddit](https://www.reddit.com/r/VictoriaMetrics/)
* [Telegram-en](https://t.me/VictoriaMetrics_en)
* [Telegram-ru](https://t.me/VictoriaMetrics_ru1)
* [Google groups](https://groups.google.com/forum/#!forum/victorametrics-users)
* [Mastodon](https://mastodon.social/@victoriametrics/)
* [slack](https://slack.victoriametrics.com/)
* [linkedin](https://www.linkedin.com/company/victoriametrics/)
* [reddit](https://www.reddit.com/r/VictoriaMetrics/)
* [telegram-en](https://t.me/VictoriaMetrics_en)
* [telegram-ru](https://t.me/VictoriaMetrics_ru1)
* [google groups](https://groups.google.com/forum/#!forum/victorametrics-users)
If you like VictoriaMetrics and want to contribute, then we need the following:
@@ -2507,7 +2282,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-denyQueryTracing
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
-downsampling.period array
Comma-separated downsampling periods in the format 'offset:period'. For example, '30d:10m' instructs to leave a single sample per 10 minutes for samples older than 30 days. When setting multiple downsampling periods, it is necessary for the periods to be multiples of each other. See https://docs.victoriametrics.com/#downsampling for details. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Comma-separated downsampling periods in the format 'offset:period'. For example, '30d:10m' instructs to leave a single sample per 10 minutes for samples older than 30 days. When setting multiple downsampling periods, it is necessary for the periods to be multiples of each other. See https://docs.victoriametrics.com/#downsampling for details. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
-dryRun
Whether to check config files without running VictoriaMetrics. The following config files are checked: -promscrape.config, -relabelConfig and -streamAggr.config. Unknown config entries aren't allowed in -promscrape.config by default. This can be changed with -promscrape.config.strictParse=false command-line flag
@@ -2518,9 +2293,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-envflag.prefix string
Prefix for environment variables if -envflag.enable is set
-eula
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
-filestream.disableFadvise
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
-finalMergeDelay duration
The delay before starting final merge for per-month partition after no new data is ingested into it. Final merge may require additional disk IO and CPU resources. Final merge may increase query speed and reduce disk space usage in some cases. Zero value disables final merge
-flagsAuthKey string
@@ -2590,12 +2363,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
-internStringMaxLen int
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
-license string
Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
-license.forceOffline
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
-licenseFile string
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
-logNewSeries
Whether to log new series. This option is for debug purposes only. It can lead to performance issues when big number of new series are ingested into VictoriaMetrics
-loggerDisableTimestamps
@@ -2615,7 +2382,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-loggerWarnsPerSecondLimit int
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
-maxConcurrentInserts int
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
The maximum number of concurrent insert requests. The default value should work for most cases, since it minimizes memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
-maxInsertRequestSize size
The maximum size in bytes of a single Prometheus remote_write API request
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
@@ -2653,16 +2420,14 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Items in the previous caches are removed when the percent of requests it serves becomes lower than this value. Higher values reduce memory usage at the cost of higher CPU usage. See also -cacheExpireDuration (default 0.1)
-promscrape.azureSDCheckInterval duration
Interval for checking for changes in Azure. This works only if azure_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#azure_sd_configs for details (default 1m0s)
-promscrape.cluster.memberLabel string
If non-empty, then the label with this name and the -promscrape.cluster.memberNum value is added to all the scraped metrics. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info
-promscrape.cluster.memberNum string
The number of vmagent instance in the cluster of scrapers. It must be a unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name. See also -promscrape.cluster.memberLabel . See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info (default "0")
The number of number in the cluster of scrapers. It must be a unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name (default "0")
-promscrape.cluster.membersCount int
The number of members in a cluster of scrapers. Each member must have a unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . Each member then scrapes roughly 1/N of all the targets. By default, cluster scraping is disabled, i.e. a single scraper scrapes all the targets. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info (default 1)
The number of members in a cluster of scrapers. Each member must have a unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . Each member then scrapes roughly 1/N of all the targets. By default, cluster scraping is disabled, i.e. a single scraper scrapes all the targets
-promscrape.cluster.name string
Optional name of the cluster. If multiple vmagent clusters scrape the same targets, then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info
Optional name of the cluster. If multiple vmagent clusters scrape the same targets, then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2679
-promscrape.cluster.replicationFactor int
The number of members in the cluster, which scrape the same targets. If the replication factor is greater than 1, then the deduplication must be enabled at remote storage side. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info (default 1)
The number of members in the cluster, which scrape the same targets. If the replication factor is greater than 1, then the deduplication must be enabled at remote storage side. See https://docs.victoriametrics.com/#deduplication (default 1)
-promscrape.config string
Optional path to Prometheus config file with 'scrape_configs' section containing targets to scrape. The path can point to local file and to http url. See https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter for details
-promscrape.config.dryRun
@@ -2753,7 +2518,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-relabelConfig string
Optional path to a file with relabeling rules, which are applied to all the ingested metrics. The path can point either to local file or to http url. See https://docs.victoriametrics.com/#relabeling for details. The config is reloaded on SIGHUP signal
-retentionFilter array
Retention filter in the format 'filter:retention'. For example, '{env="dev"}:3d' configures the retention for time series with env="dev" label to 3 days. See https://docs.victoriametrics.com/#retention-filters for details. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Retention filter in the format 'filter:retention'. For example, '{env="dev"}:3d' configures the retention for time series with env="dev" label to 3 days. See https://docs.victoriametrics.com/#retention-filters for details. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
-retentionPeriod value
Data with timestamps outside the retentionPeriod is automatically deleted. The minimum retentionPeriod is 24h or 1d. See also -retentionFilter
@@ -2831,8 +2596,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
The maximum number of tag values returned from /api/v1/label/<label_name>/values (default 100000)
-search.maxUniqueTimeseries int
The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage (default 300000)
-search.maxWorkersPerQuery int
The maximum number of CPU cores a single query can use. The default value should work good for most cases. The flag can be set to lower values for improving performance of big number of concurrently executed queries. The flag can be set to bigger values for improving performance of heavy queries, which scan big number of time series (>10K) and/or big number of samples (>100M). There is no sense in setting this flag to values bigger than the number of CPU cores available on the system (default 4)
-search.minStalenessInterval duration
The minimum interval for staleness calculations. This flag could be useful for removing gaps on graphs generated from time series with irregular intervals between samples. See also '-search.maxStalenessInterval'
-search.noStaleMarkers

View File

@@ -5,8 +5,8 @@
| Version | Supported |
|---------|--------------------|
| [latest release](https://docs.victoriametrics.com/CHANGELOG.html) | :white_check_mark: |
| v1.93.x LTS release | :white_check_mark: |
| v1.87.x LTS release | :white_check_mark: |
| v1.79.x LTS release | :white_check_mark: |
| other releases | :x: |
## Reporting a Vulnerability

View File

@@ -117,7 +117,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
{"expand-with-exprs", "WITH expressions' tutorial"},
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
{"config", "-promscrape.config contents"},
{"stream-agg", "streaming aggregation status"},
{"metrics", "available service metrics"},
{"flags", "command-line flags"},
{"api/v1/status/tsdb", "tsdb status page"},

View File

@@ -3,7 +3,6 @@ package elasticsearch
import (
"bufio"
"errors"
"flag"
"fmt"
"io"
"math"
@@ -12,8 +11,6 @@ import (
"strings"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bufferedwriter"
@@ -24,10 +21,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
)
var (
elasticsearchVersion = flag.String("elasticsearch.version", "8.9.0", "Elasticsearch version to report to client")
"github.com/VictoriaMetrics/metrics"
)
// RequestHandler processes Elasticsearch insert requests
@@ -66,9 +60,9 @@ func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
// See the latest available version for Elasticsearch at https://github.com/elastic/elasticsearch/releases
fmt.Fprintf(w, `{
"version": {
"number": %q
"number": "8.8.0"
}
}`, *elasticsearchVersion)
}`)
case http.MethodHead:
// Return empty response for Logstash ping request.
}
@@ -94,32 +88,22 @@ func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
httpserver.Errorf(w, r, "%s", err)
return true
}
if err := vlstorage.CanWriteData(); err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
processLogMessage := cp.GetProcessLogMessageFunc(lr)
isGzip := r.Header.Get("Content-Encoding") == "gzip"
n, err := readBulkRequest(r.Body, isGzip, cp.TimeField, cp.MsgField, processLogMessage)
vlstorage.MustAddRows(lr)
logstorage.PutLogRows(lr)
if err != nil {
logger.Warnf("cannot decode log message #%d in /_bulk request: %s", n, err)
return true
}
vlstorage.MustAddRows(lr)
logstorage.PutLogRows(lr)
tookMs := time.Since(startTime).Milliseconds()
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteBulkResponse(bw, n, tookMs)
_ = bw.Flush()
// update bulkRequestDuration only for successfully parsed requests
// There is no need in updating bulkRequestDuration for request errors,
// since their timings are usually much smaller than the timing for successful request parsing.
bulkRequestDuration.UpdateDuration(startTime)
return true
default:
return false
@@ -127,9 +111,7 @@ func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
}
var (
bulkRequestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/elasticsearch/_bulk"}`)
rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="elasticsearch_bulk"}`)
bulkRequestDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/elasticsearch/_bulk"}`)
bulkRequestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/elasticsearch/_bulk"}`)
)
func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string,
@@ -175,6 +157,8 @@ func readBulkRequest(r io.Reader, isGzip bool, timeField, msgField string,
var lineBufferPool bytesutil.ByteBufferPool
var rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="elasticsearch_bulk"}`)
func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
processLogMessage func(timestamp int64, fields []logstorage.Field),
) (bool, error) {
@@ -225,7 +209,6 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
p.RenameField(msgField, "_msg")
processLogMessage(ts, p.Fields)
logjson.PutParser(p)
return true, nil
}

View File

@@ -3,13 +3,12 @@ package insertutils
import (
"net/http"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/metrics"
)
// CommonParams contains common HTTP parameters used by log ingestion APIs.
@@ -74,19 +73,12 @@ func GetCommonParams(r *http.Request) (*CommonParams, error) {
// GetProcessLogMessageFunc returns a function, which adds parsed log messages to lr.
func (cp *CommonParams) GetProcessLogMessageFunc(lr *logstorage.LogRows) func(timestamp int64, fields []logstorage.Field) {
return func(timestamp int64, fields []logstorage.Field) {
if len(fields) > *MaxFieldsPerLine {
rf := logstorage.RowFormatter(fields)
logger.Warnf("dropping log line with %d fields; it exceeds -insert.maxFieldsPerLine=%d; %s", len(fields), *MaxFieldsPerLine, rf)
rowsDroppedTotalTooManyFields.Inc()
return
}
lr.MustAdd(cp.TenantID, timestamp, fields)
if cp.Debug {
s := lr.GetRowString(0)
lr.ResetKeepSettings()
logger.Infof("remoteAddr=%s; requestURI=%s; ignoring log entry because of `debug` query arg: %s", cp.DebugRemoteAddr, cp.DebugRequestURI, s)
rowsDroppedTotalDebug.Inc()
rowsDroppedTotal.Inc()
return
}
if lr.NeedFlush() {
@@ -96,5 +88,4 @@ func (cp *CommonParams) GetProcessLogMessageFunc(lr *logstorage.LogRows) func(ti
}
}
var rowsDroppedTotalDebug = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`)
var rowsDroppedTotalTooManyFields = metrics.NewCounter(`vl_rows_dropped_total{reason="too_many_fields"}`)
var rowsDroppedTotal = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`)

View File

@@ -1,15 +1,10 @@
package insertutils
import (
"flag"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
)
var (
// MaxLineSizeBytes is the maximum length of a single line for /insert/* handlers
MaxLineSizeBytes = flagutil.NewBytes("insert.maxLineSizeBytes", 256*1024, "The maximum size of a single line, which can be read by /insert/* handlers")
// MaxFieldsPerLine is the maximum number of fields per line for /insert/* handlers
MaxFieldsPerLine = flag.Int("insert.maxFieldsPerLine", 1000, "The maximum number of log fields per line, which can be read by /insert/* handlers")
)

View File

@@ -21,7 +21,6 @@ import (
// RequestHandler processes jsonline insert requests
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
startTime := time.Now()
w.Header().Add("Content-Type", "application/json")
if r.Method != "POST" {
@@ -36,10 +35,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
httpserver.Errorf(w, r, "%s", err)
return true
}
if err := vlstorage.CanWriteData(); err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
processLogMessage := cp.GetProcessLogMessageFunc(lr)
@@ -82,11 +77,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
vlstorage.MustAddRows(lr)
logstorage.PutLogRows(lr)
// update jsonlineRequestDuration only for successfully parsed requests.
// There is no need in updating jsonlineRequestDuration for request errors,
// since their timings are usually much smaller than the timing for successful request parsing.
jsonlineRequestDuration.UpdateDuration(startTime)
return true
}
@@ -119,7 +109,6 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, processLogMessage f
p.RenameField(msgField, "_msg")
processLogMessage(ts, p.Fields)
logjson.PutParser(p)
return true, nil
}
@@ -155,7 +144,6 @@ func parseISO8601Timestamp(s string) (int64, error) {
var lineBufferPool bytesutil.ByteBufferPool
var (
requestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/jsonline"}`)
rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="jsonline"}`)
jsonlineRequestDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/jsonline"}`)
requestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/jsonline"}`)
rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="jsonline"}`)
)

View File

@@ -5,31 +5,29 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/metrics"
)
var (
lokiRequestsJSONTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/loki/api/v1/push",format="json"}`)
lokiRequestsProtobufTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/loki/api/v1/push",format="protobuf"}`)
)
// RequestHandler processes Loki insert requests
//
// See https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki
func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
switch path {
case "/api/v1/push":
return handleInsert(r, w)
case "/ready":
// See https://grafana.com/docs/loki/latest/api/#identify-ready-loki-instance
w.WriteHeader(http.StatusOK)
w.Write([]byte("ready"))
return true
default:
if path != "/api/v1/push" {
return false
}
}
// See https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki
func handleInsert(r *http.Request, w http.ResponseWriter) bool {
contentType := r.Header.Get("Content-Type")
switch contentType {
case "application/json":
lokiRequestsJSONTotal.Inc()
return handleJSON(r, w)
default:
// Protobuf request body should be handled by default according to https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki
// Protobuf request body should be handled by default accoring to https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki
lokiRequestsProtobufTotal.Inc()
return handleProtobuf(r, w)
}
}

View File

@@ -18,11 +18,12 @@ import (
"github.com/valyala/fastjson"
)
var parserPool fastjson.ParserPool
var (
rowsIngestedJSONTotal = metrics.NewCounter(`vl_rows_ingested_total{type="loki",format="json"}`)
parserPool fastjson.ParserPool
)
func handleJSON(r *http.Request, w http.ResponseWriter) bool {
startTime := time.Now()
lokiRequestsJSONTotal.Inc()
reader := r.Body
if r.Header.Get("Content-Encoding") == "gzip" {
zr, err := common.GetGzipReader(reader)
@@ -47,36 +48,19 @@ func handleJSON(r *http.Request, w http.ResponseWriter) bool {
httpserver.Errorf(w, r, "cannot parse common params from request: %s", err)
return true
}
if err := vlstorage.CanWriteData(); err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
processLogMessage := cp.GetProcessLogMessageFunc(lr)
n, err := parseJSONRequest(data, processLogMessage)
vlstorage.MustAddRows(lr)
logstorage.PutLogRows(lr)
if err != nil {
httpserver.Errorf(w, r, "cannot parse Loki json request: %s", err)
httpserver.Errorf(w, r, "cannot parse Loki request: %s", err)
return true
}
rowsIngestedJSONTotal.Add(n)
// update lokiRequestJSONDuration only for successfully parsed requests
// There is no need in updating lokiRequestJSONDuration for request errors,
// since their timings are usually much smaller than the timing for successful request parsing.
lokiRequestJSONDuration.UpdateDuration(startTime)
return true
}
var (
lokiRequestsJSONTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/loki/api/v1/push",format="json"}`)
rowsIngestedJSONTotal = metrics.NewCounter(`vl_rows_ingested_total{type="loki",format="json"}`)
lokiRequestJSONDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/loki/api/v1/push",format="json"}`)
)
func parseJSONRequest(data []byte, processLogMessage func(timestamp int64, fields []logstorage.Field)) (int, error) {
p := parserPool.Get()
defer parserPool.Put(p)
@@ -171,6 +155,7 @@ func parseJSONRequest(data []byte, processLogMessage func(timestamp int64, field
Value: bytesutil.ToUnsafeString(msg),
})
processLogMessage(ts, fields)
}
rowsIngested += len(lines)
}

View File

@@ -19,13 +19,12 @@ import (
)
var (
bytesBufPool bytesutil.ByteBufferPool
pushReqsPool sync.Pool
rowsIngestedProtobufTotal = metrics.NewCounter(`vl_rows_ingested_total{type="loki",format="protobuf"}`)
bytesBufPool bytesutil.ByteBufferPool
pushReqsPool sync.Pool
)
func handleProtobuf(r *http.Request, w http.ResponseWriter) bool {
startTime := time.Now()
lokiRequestsProtobufTotal.Inc()
wcr := writeconcurrencylimiter.GetReader(r.Body)
data, err := io.ReadAll(wcr)
writeconcurrencylimiter.PutReader(wcr)
@@ -39,36 +38,19 @@ func handleProtobuf(r *http.Request, w http.ResponseWriter) bool {
httpserver.Errorf(w, r, "cannot parse common params from request: %s", err)
return true
}
if err := vlstorage.CanWriteData(); err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
processLogMessage := cp.GetProcessLogMessageFunc(lr)
n, err := parseProtobufRequest(data, processLogMessage)
vlstorage.MustAddRows(lr)
logstorage.PutLogRows(lr)
if err != nil {
httpserver.Errorf(w, r, "cannot parse Loki protobuf request: %s", err)
httpserver.Errorf(w, r, "cannot parse loki request: %s", err)
return true
}
rowsIngestedProtobufTotal.Add(n)
// update lokiRequestProtobufDuration only for successfully parsed requests
// There is no need in updating lokiRequestProtobufDuration for request errors,
// since their timings are usually much smaller than the timing for successful request parsing.
lokiRequestProtobufDuration.UpdateDuration(startTime)
return true
}
var (
lokiRequestsProtobufTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/loki/api/v1/push",format="protobuf"}`)
rowsIngestedProtobufTotal = metrics.NewCounter(`vl_rows_ingested_total{type="loki",format="protobuf"}`)
lokiRequestProtobufDuration = metrics.NewHistogram(`vl_http_request_duration_seconds{path="/insert/loki/api/v1/push",format="protobuf"}`)
)
func parseProtobufRequest(data []byte, processLogMessage func(timestamp int64, fields []logstorage.Field)) (int, error) {
bb := bytesBufPool.Get()
defer bytesBufPool.Put(bb)

View File

@@ -6,9 +6,8 @@ import (
"testing"
"time"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/golang/snappy"
)
func BenchmarkParseProtobufRequest(b *testing.B) {

View File

@@ -88,12 +88,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
}
if strings.HasPrefix(path, "/vmui/") {
if strings.HasPrefix(path, "/vmui/static/") {
// Allow clients caching static contents for long period of time, since it shouldn't change over time.
// Path to static contents (such as js and css) must be changed whenever its contents is changed.
// See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/
w.Header().Set("Cache-Control", "max-age=31536000")
}
r.URL.Path = path
vmuiFileServer.ServeHTTP(w, r)
return true

View File

@@ -1,13 +1,14 @@
{
"files": {
"main.css": "./static/css/main.9a224445.css",
"main.js": "./static/js/main.02178f4b.js",
"main.css": "./static/css/main.5f461a27.css",
"main.js": "./static/js/main.7566144a.js",
"static/js/522.b5ae4365.chunk.js": "./static/js/522.b5ae4365.chunk.js",
"static/media/MetricsQL.md": "./static/media/MetricsQL.957b90ab4cb4852eec26.md",
"static/media/Lato-Regular.ttf": "./static/media/Lato-Regular.d714fec1633b69a9c2e9.ttf",
"static/media/Lato-Bold.ttf": "./static/media/Lato-Bold.32360ba4b57802daa4d6.ttf",
"index.html": "./index.html"
},
"entrypoints": [
"static/css/main.9a224445.css",
"static/js/main.02178f4b.js"
"static/css/main.5f461a27.css",
"static/js/main.7566144a.js"
]
}

View File

@@ -1 +1 @@
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.02178f4b.js"></script><link href="./static/css/main.9a224445.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1,user-scalable=no"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.7566144a.js"></script><link href="./static/css/main.5f461a27.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -3,17 +3,14 @@ package vlstorage
import (
"flag"
"fmt"
"net/http"
"sync"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
"github.com/VictoriaMetrics/metrics"
)
var (
@@ -32,8 +29,6 @@ var (
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#stream-fields ; see also -logIngestedRows")
logIngestedRows = flag.Bool("logIngestedRows", false, "Whether to log all the ingested log entries; this can be useful for debugging of data ingestion; "+
"see https://docs.victoriametrics.com/VictoriaLogs/data-ingestion/ ; see also -logNewStreams")
minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 10e6, "The minimum free disk space at -storageDataPath after which "+
"the storage stops accepting new data")
)
// Init initializes vlstorage.
@@ -44,16 +39,15 @@ func Init() {
logger.Panicf("BUG: Init() has been already called")
}
if retentionPeriod.Duration() < 24*time.Hour {
if retentionPeriod.Msecs < 24*3600*1000 {
logger.Fatalf("-retentionPeriod cannot be smaller than a day; got %s", retentionPeriod)
}
cfg := &logstorage.StorageConfig{
Retention: retentionPeriod.Duration(),
FlushInterval: *inmemoryDataFlushInterval,
FutureRetention: futureRetention.Duration(),
LogNewStreams: *logNewStreams,
LogIngestedRows: *logIngestedRows,
MinFreeDiskSpaceBytes: minFreeDiskSpaceBytes.N,
Retention: time.Millisecond * time.Duration(retentionPeriod.Msecs),
FlushInterval: *inmemoryDataFlushInterval,
FutureRetention: time.Millisecond * time.Duration(futureRetention.Msecs),
LogNewStreams: *logNewStreams,
LogIngestedRows: *logIngestedRows,
}
logger.Infof("opening storage at -storageDataPath=%s", *storageDataPath)
startTime := time.Now()
@@ -80,21 +74,7 @@ func Stop() {
var strg *logstorage.Storage
var storageMetrics *metrics.Set
// CanWriteData returns non-nil error if it cannot write data to vlstorage.
func CanWriteData() error {
if strg.IsReadOnly() {
return &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot add rows into storage in read-only mode; the storage can be in read-only mode "+
"because of lack of free disk space at -storageDataPath=%s", *storageDataPath),
StatusCode: http.StatusTooManyRequests,
}
}
return nil
}
// MustAddRows adds lr to vlstorage
//
// It is advised to call CanWriteData() before calling MustAddRows()
func MustAddRows(lr *logstorage.LogRows) {
strg.MustAddRows(lr)
}
@@ -127,12 +107,6 @@ func initStorageMetrics(strg *logstorage.Storage) *metrics.Set {
ms.NewGauge(fmt.Sprintf(`vl_free_disk_space_bytes{path=%q}`, *storageDataPath), func() float64 {
return float64(fs.MustGetFreeSpace(*storageDataPath))
})
ms.NewGauge(fmt.Sprintf(`vl_storage_is_read_only{path=%q}`, *storageDataPath), func() float64 {
if m().IsReadOnly {
return 1
}
return 0
})
ms.NewGauge(`vl_active_merges{type="inmemory"}`, func() float64 {
return float64(m().InmemoryActiveMerges)

View File

@@ -3,8 +3,7 @@
`vmagent` is a tiny agent which helps you collect metrics from various sources,
[relabel and filter the collected metrics](#relabeling)
and store them in [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
or any other storage systems via Prometheus `remote_write` protocol
or via [VictoriaMetrics `remote_write` protocol](#victoriametrics-remote-write-protocol).
or any other storage systems via Prometheus `remote_write` protocol.
See [Quick Start](#quick-start) for details.
@@ -46,7 +45,7 @@ additionally to [discovering Prometheus-compatible targets and scraping metrics
## Quick Start
Please download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) (
Please download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) (
`vmagent` is also available in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags)),
unpack it and pass the following flags to the `vmagent` binary in order to start scraping Prometheus-compatible targets
and sending the data to the Prometheus-compatible remote storage:
@@ -755,10 +754,10 @@ as soon as it is parsed in stream parsing mode.
A single `vmagent` instance can scrape tens of thousands of scrape targets. Sometimes this isn't enough due to limitations on CPU, network, RAM, etc.
In this case scrape targets can be split among multiple `vmagent` instances (aka `vmagent` horizontal scaling, sharding and clustering).
The number of `vmagent` instances in the cluster must be passed to `-promscrape.cluster.membersCount` command-line flag.
Each `vmagent` instance in the cluster must use identical `-promscrape.config` files with distinct `-promscrape.cluster.memberNum` values
in the range `0 ... N-1`, where `N` is the number of `vmagent` instances in the cluster specified via `-promscrape.cluster.membersCount`.
For example, the following commands spread scrape targets among a cluster of two `vmagent` instances:
Each `vmagent` instance in the cluster must use identical `-promscrape.config` files with distinct `-promscrape.cluster.memberNum` values.
The flag value must be in the range `0 ... N-1`, where `N` is the number of `vmagent` instances in the cluster.
The number of `vmagent` instances in the cluster must be passed to `-promscrape.cluster.membersCount` command-line flag. For example, the following commands
spread scrape targets among a cluster of two `vmagent` instances:
```
/path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=0 -promscrape.config=/path/to/config.yml ...
@@ -766,7 +765,7 @@ For example, the following commands spread scrape targets among a cluster of two
```
The `-promscrape.cluster.memberNum` can be set to a StatefulSet pod name when `vmagent` runs in Kubernetes.
The pod name must end with a number in the range `0 ... promscrape.cluster.membersCount-1`. For example, `-promscrape.cluster.memberNum=vmagent-0`.
The pod name must end with a number in the range `0 ... promscrape.cluster.memberNum-1`. For example, `-promscrape.cluster.memberNum=vmagent-0`.
By default, each scrape target is scraped only by a single `vmagent` instance in the cluster. If there is a need for replicating scrape targets among multiple `vmagent` instances,
then `-promscrape.cluster.replicationFactor` command-line flag must be set to the desired number of replicas. For example, the following commands
@@ -782,14 +781,6 @@ If each target is scraped by multiple `vmagent` instances, then data deduplicati
The `-dedup.minScrapeInterval` must be set to the `scrape_interval` configured at `-promscrape.config`.
See [these docs](https://docs.victoriametrics.com/#deduplication) for details.
The `-promscrape.cluster.memberLabel` command-line flag allows specifying a name for `member num` label to add to all the scraped metrics.
The value of the `member num` label is set to `-promscrape.cluster.memberNum`. For example, the following config instructs adding `vmagent_instance="0"` label
to all the metrics scraped by the given `vmagent` instance:
```
/path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=0 -promscrape.cluster.memberLabel=vmagent_instance
```
See also [how to shard data among multiple remote storage systems](#sharding-among-remote-storages).
@@ -1025,9 +1016,8 @@ See also [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting
* [Reading metrics from Kafka](#reading-metrics-from-kafka)
* [Writing metrics to Kafka](#writing-metrics-to-kafka)
The enterprise version of vmagent is available for evaluation at [releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) page
The enterprise version of vmagent is available for evaluation at [releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) page
in `vmutils-...-enterprise.tar.gz` archives and in [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags) with tags containing `enterprise` suffix.
See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).
### Reading metrics from Kafka
@@ -1074,7 +1064,7 @@ data_format = "influx"
#### Command-line flags for Kafka consumer
These command-line flags are available only in [enterprise](https://docs.victoriametrics.com/enterprise.html) version of `vmagent`,
which can be downloaded for evaluation from [releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) page
which can be downloaded for evaluation from [releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) page
(see `vmutils-...-enterprise.tar.gz` archives) and from [docker images](https://hub.docker.com/r/victoriametrics/vmagent/tags) with tags containing `enterprise` suffix.
```
@@ -1134,13 +1124,13 @@ Two types of auth are supported:
## How to build from sources
We recommend using [official binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) - `vmagent` is located in the `vmutils-...` archives.
We recommend using [official binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) - `vmagent` is located in the `vmutils-...` archives.
It may be needed to build `vmagent` from source code when developing or testing new feature or bugfix.
### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
1. Run `make vmagent` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds the `vmagent` binary and puts it into the `bin` folder.
@@ -1169,7 +1159,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
### Development ARM build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
1. Run `make vmagent-linux-arm` or `make vmagent-linux-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics)
It builds `vmagent-linux-arm` or `vmagent-linux-arm64` binary respectively and puts it into the `bin` folder.
@@ -1246,9 +1236,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-envflag.prefix string
Prefix for environment variables if -envflag.enable is set
-eula
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
-filestream.disableFadvise
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
-flagsAuthKey string
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
-fs.disableMmap
@@ -1311,40 +1299,34 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-internStringMaxLen int
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
-kafka.consumer.topic array
Kafka topic names for data consumption. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Kafka topic names for data consumption. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.basicAuth.password array
Optional basic auth password for -kafka.consumer.topic. Must be used in conjunction with any supported auth methods for kafka client, specified by flag -kafka.consumer.topic.options='security.protocol=SASL_SSL;sasl.mechanisms=PLAIN' . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Optional basic auth password for -kafka.consumer.topic. Must be used in conjunction with any supported auth methods for kafka client, specified by flag -kafka.consumer.topic.options='security.protocol=SASL_SSL;sasl.mechanisms=PLAIN' . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.basicAuth.username array
Optional basic auth username for -kafka.consumer.topic. Must be used in conjunction with any supported auth methods for kafka client, specified by flag -kafka.consumer.topic.options='security.protocol=SASL_SSL;sasl.mechanisms=PLAIN' . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Optional basic auth username for -kafka.consumer.topic. Must be used in conjunction with any supported auth methods for kafka client, specified by flag -kafka.consumer.topic.options='security.protocol=SASL_SSL;sasl.mechanisms=PLAIN' . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.brokers array
List of brokers to connect for given topic, e.g. -kafka.consumer.topic.broker=host-1:9092;host-2:9092 . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
List of brokers to connect for given topic, e.g. -kafka.consumer.topic.broker=host-1:9092;host-2:9092 . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.concurrency array
Configures consumer concurrency for topic specified via -kafka.consumer.topic flag.This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html (default 1)
Configures consumer concurrency for topic specified via -kafka.consumer.topic flag.This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.defaultFormat string
Expected data format in the topic if -kafka.consumer.topic.format is skipped. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html (default "promremotewrite")
Expected data format in the topic if -kafka.consumer.topic.format is skipped. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html (default "promremotewrite")
-kafka.consumer.topic.format array
data format for corresponding kafka topic. Valid formats: influx, prometheus, promremotewrite, graphite, jsonline . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
data format for corresponding kafka topic. Valid formats: influx, prometheus, promremotewrite, graphite, jsonline . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.groupID array
Defines group.id for topic. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Defines group.id for topic. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.isGzipped array
Enables gzip setting for topic messages payload. Only prometheus, jsonline and influx formats accept gzipped messages.This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Enables gzip setting for topic messages payload. Only prometheus, jsonline and influx formats accept gzipped messages.This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports array of values separated by comma or specified via multiple flags.
-kafka.consumer.topic.options array
Optional key=value;key1=value2 settings for topic consumer. See full configuration options at https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Optional key=value;key1=value2 settings for topic consumer. See full configuration options at https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
Supports an array of values separated by comma or specified via multiple flags.
-license string
Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
-license.forceOffline
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
-licenseFile string
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
-loggerDisableTimestamps
Whether to disable writing timestamps in logs
-loggerErrorsPerSecondLimit int
@@ -1362,7 +1344,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-loggerWarnsPerSecondLimit int
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
-maxConcurrentInserts int
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
The maximum number of concurrent insert requests. The default value should work for most cases, since it minimizes memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
-maxInsertRequestSize size
The maximum size in bytes of a single Prometheus remote_write API request
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
@@ -1394,16 +1376,14 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Items in the previous caches are removed when the percent of requests it serves becomes lower than this value. Higher values reduce memory usage at the cost of higher CPU usage. See also -cacheExpireDuration (default 0.1)
-promscrape.azureSDCheckInterval duration
Interval for checking for changes in Azure. This works only if azure_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#azure_sd_configs for details (default 1m0s)
-promscrape.cluster.memberLabel string
If non-empty, then the label with this name and the -promscrape.cluster.memberNum value is added to all the scraped metrics. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info
-promscrape.cluster.memberNum string
The number of vmagent instance in the cluster of scrapers. It must be a unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name. See also -promscrape.cluster.memberLabel . See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info (default "0")
The number of number in the cluster of scrapers. It must be a unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name (default "0")
-promscrape.cluster.membersCount int
The number of members in a cluster of scrapers. Each member must have a unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . Each member then scrapes roughly 1/N of all the targets. By default, cluster scraping is disabled, i.e. a single scraper scrapes all the targets. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info (default 1)
The number of members in a cluster of scrapers. Each member must have a unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . Each member then scrapes roughly 1/N of all the targets. By default, cluster scraping is disabled, i.e. a single scraper scrapes all the targets
-promscrape.cluster.name string
Optional name of the cluster. If multiple vmagent clusters scrape the same targets, then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info
Optional name of the cluster. If multiple vmagent clusters scrape the same targets, then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2679
-promscrape.cluster.replicationFactor int
The number of members in the cluster, which scrape the same targets. If the replication factor is greater than 1, then the deduplication must be enabled at remote storage side. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info (default 1)
The number of members in the cluster, which scrape the same targets. If the replication factor is greater than 1, then the deduplication must be enabled at remote storage side. See https://docs.victoriametrics.com/#deduplication (default 1)
-promscrape.config string
Optional path to Prometheus config file with 'scrape_configs' section containing targets to scrape. The path can point to local file and to http url. See https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter for details
-promscrape.config.dryRun
@@ -1553,7 +1533,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter
-remoteWrite.maxDiskUsagePerURL array
The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. Buffered data is stored in ~500MB chunks. It is recommended to set the value for this flag to a multiple of the block size 500MB. Disk usage is unlimited if the value is set to 0
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB. (default 0)
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB.
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.maxHourlySeries int
The maximum number of unique series vmagent can send to remote storage systems during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter
@@ -1583,28 +1563,28 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-remoteWrite.queues int
The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues isn't enough for sending high volume of collected data to remote storage. Default value is 2 * numberOfAvailableCPUs (default 8)
-remoteWrite.rateLimit array
Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data is sent after temporary unavailability of the remote storage (default 0)
Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data is sent after temporary unavailability of the remote storage
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.relabelConfig string
Optional path to file with relabeling configs, which are applied to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. The path can point either to local file or to http url. See https://docs.victoriametrics.com/vmagent.html#relabeling
-remoteWrite.roundDigits array
Round metric values to this number of decimal digits after the point before writing them to remote storage. Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. By default, digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. This option may be used for improving data compression for the stored metrics (default 100)
Round metric values to this number of decimal digits after the point before writing them to remote storage. Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. By default, digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. This option may be used for improving data compression for the stored metrics
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.sendTimeout array
Timeout for sending a single block of data to the corresponding -remoteWrite.url (default 1m0s)
Timeout for sending a single block of data to the corresponding -remoteWrite.url (default 1m)
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.shardByURL
Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages
-remoteWrite.showURL
Whether to show -remoteWrite.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
-remoteWrite.significantFigures array
The number of significant figures to leave in metric values before writing them to remote storage. See https://en.wikipedia.org/wiki/Significant_figures . Zero value saves all the significant figures. This option may be used for improving data compression for the stored metrics. See also -remoteWrite.roundDigits (default 0)
The number of significant figures to leave in metric values before writing them to remote storage. See https://en.wikipedia.org/wiki/Significant_figures . Zero value saves all the significant figures. This option may be used for improving data compression for the stored metrics. See also -remoteWrite.roundDigits
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.streamAggr.config array
Optional path to file with stream aggregation config. See https://docs.victoriametrics.com/stream-aggregation.html . See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.streamAggr.dedupInterval array
Input samples are de-duplicated with this interval before being aggregated. Only the last sample per each time series per each interval is aggregated if the interval is greater than zero (default 0s)
Input samples are de-duplicated with this interval before being aggregated. Only the last sample per each time series per each interval is aggregated if the interval is greater than zero
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.streamAggr.dropInput array
Whether to drop all the input samples after the aggregation with -remoteWrite.streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/stream-aggregation.html

View File

@@ -8,7 +8,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/datadog/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
@@ -29,12 +29,12 @@ func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
return err
}
ce := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, ce, func(series []datadog.Series) error {
return stream.Parse(req.Body, ce, func(series []parser.Series) error {
return insertRows(at, series, extraLabels)
})
}
func insertRows(at *auth.Token, series []datadog.Series, extraLabels []prompbmarshal.Label) error {
func insertRows(at *auth.Token, series []parser.Series, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
@@ -63,7 +63,7 @@ func insertRows(at *auth.Token, series []datadog.Series, extraLabels []prompbmar
})
}
for _, tag := range ss.Tags {
name, value := datadog.SplitTag(tag)
name, value := parser.SplitTag(tag)
if name == "host" {
name = "exported_host"
}

View File

@@ -11,14 +11,11 @@ import (
"sync/atomic"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/csvimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadog"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/graphite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/influx"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/native"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/newrelic"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentelemetry"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/opentsdbhttp"
@@ -42,7 +39,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/metrics"
)
var (
@@ -211,7 +208,7 @@ func getAuthTokenFromPath(path string) (*auth.Token, error) {
if p.Suffix != "opentsdb/api/put" {
return nil, fmt.Errorf("unsupported path requested: %q; expecting 'opentsdb/api/put'", p.Suffix)
}
return auth.NewTokenPossibleMultitenant(p.AuthToken)
return auth.NewToken(p.AuthToken)
}
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
@@ -229,7 +226,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
{"metric-relabel-debug", "debug metric relabeling"},
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
{"config", "-promscrape.config contents"},
{"stream-agg", "streaming aggregation status"},
{"metrics", "available service metrics"},
{"flags", "command-line flags"},
{"-/reload", "reload configuration"},
@@ -255,7 +251,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
w.WriteHeader(statusCode)
return true
}
if strings.HasPrefix(path, "/datadog/") {
if strings.HasPrefix(path, "datadog/") {
// Trim suffix from paths starting from /datadog/ in order to support legacy DataDog agent.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2670
path = strings.TrimSuffix(path, "/")
@@ -322,29 +318,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.WriteHeader(http.StatusOK)
return true
case "/newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "/newrelic/inventory/deltas":
newrelicInventoryRequests.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"payload":{"version": 1, "state": {}, "reset": "false"}}`)
return true
case "/newrelic/infra/v2/metrics/events/bulk":
newrelicWriteRequests.Inc()
if err := newrelic.InsertHandlerForHTTP(nil, r); err != nil {
newrelicWriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "/datadog/api/v1/series":
datadogWriteRequests.Inc()
if err := datadog.InsertHandlerForHTTP(nil, r); err != nil {
@@ -434,9 +407,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
procutil.SelfSIGHUP()
w.WriteHeader(http.StatusOK)
return true
case "/stream-agg":
streamaggr.WriteHumanReadableState(w, r, remotewrite.GetAggregators())
return true
case "/ready":
if rdy := atomic.LoadInt32(&promscrape.PendingScrapeConfigs); rdy > 0 {
errMsg := fmt.Sprintf("waiting for scrapes to init, left: %d", rdy)
@@ -548,29 +518,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
}
w.WriteHeader(http.StatusOK)
return true
case "newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "newrelic/inventory/deltas":
newrelicInventoryRequests.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"payload":{"version": 1, "state": {}, "reset": "false"}}`)
return true
case "newrelic/infra/v2/metrics/events/bulk":
newrelicWriteRequests.Inc()
if err := newrelic.InsertHandlerForHTTP(at, r); err != nil {
newrelicWriteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(202)
fmt.Fprintf(w, `{"status":"ok"}`)
return true
case "datadog/api/v1/series":
datadogWriteRequests.Inc()
if err := datadog.InsertHandlerForHTTP(at, r); err != nil {
@@ -643,12 +590,6 @@ var (
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/api/v1/push", protocol="opentelemetry"}`)
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/api/v1/push", protocol="opentelemetry"}`)
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
newrelicInventoryRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/inventory/deltas", protocol="newrelic"}`)
newrelicCheckRequest = metrics.NewCounter(`vm_http_requests_total{path="/newrelic", protocol="newrelic"}`)
promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`)
promscrapeServiceDiscoveryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/service-discovery"}`)

View File

@@ -1,86 +0,0 @@
package newrelic
import (
"net/http"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
parserCommon "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/newrelic"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/newrelic/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="newrelic"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="newrelic"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="newrelic"}`)
)
// InsertHandlerForHTTP processes remote write for NewRelic POST /infra/v2/metrics/events/bulk request.
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := parserCommon.GetExtraLabels(req)
if err != nil {
return err
}
ce := req.Header.Get("Content-Encoding")
isGzip := ce == "gzip"
return stream.Parse(req.Body, isGzip, func(rows []newrelic.Row) error {
return insertRows(at, rows, extraLabels)
})
}
func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompbmarshal.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
samplesCount := 0
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
tags := r.Tags
srcSamples := r.Samples
for j := range srcSamples {
s := &srcSamples[j]
labelsLen := len(labels)
labels = append(labels, prompbmarshal.Label{
Name: "__name__",
Value: bytesutil.ToUnsafeString(s.Name),
})
for k := range tags {
t := &tags[k]
labels = append(labels, prompbmarshal.Label{
Name: bytesutil.ToUnsafeString(t.Key),
Value: bytesutil.ToUnsafeString(t.Value),
})
}
samples = append(samples, prompbmarshal.Sample{
Value: s.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[len(samples)-1:],
})
labels = append(labels, extraLabels...)
}
samplesCount += len(srcSamples)
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
remotewrite.Push(at, &ctx.WriteRequest)
rowsInserted.Add(len(rows))
if at != nil {
rowsTenantInserted.Get(at).Add(samplesCount)
}
rowsPerInsert.Update(float64(samplesCount))
return nil
}

View File

@@ -1,7 +1,6 @@
package opentelemetry
import (
"fmt"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
@@ -27,9 +26,6 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
return err
}
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
if req.Header.Get("Content-Type") == "application/json" {
return fmt.Errorf("json encoding isn't supported for opentelemetry format. Use protobuf encoding")
}
return stream.ParseStream(req.Body, isGzipped, func(tss []prompbmarshal.TimeSeries) error {
return insertRows(at, tss, extraLabels)
})

View File

@@ -32,7 +32,7 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
return err
}
isGzipped := req.Header.Get("Content-Encoding") == "gzip"
return stream.Parse(req.Body, defaultTimestamp, isGzipped, true, func(rows []parser.Row) error {
return stream.Parse(req.Body, defaultTimestamp, isGzipped, func(rows []parser.Row) error {
return insertRows(at, rows, extraLabels)
}, func(s string) {
httpserver.LogError(req, s)

View File

@@ -2,7 +2,6 @@ package remotewrite
import (
"bytes"
"errors"
"fmt"
"io"
"net/http"
@@ -27,10 +26,10 @@ var (
forceVMProto = flagutil.NewArrayBool("remoteWrite.forceVMProto", "Whether to force VictoriaMetrics remote write protocol for sending data "+
"to the corresponding -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol")
rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", 0, "Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. "+
rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", "Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. "+
"By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+
"is sent after temporary unavailability of the remote storage")
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", time.Minute, "Timeout for sending a single block of data to the corresponding -remoteWrite.url")
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", "Timeout for sending a single block of data to the corresponding -remoteWrite.url (default 1m)")
proxyURL = flagutil.NewArrayString("remoteWrite.proxyURL", "Optional proxy URL for writing data to the corresponding -remoteWrite.url. "+
"Supported proxies: http, https, socks5. Example: -remoteWrite.proxyURL=socks5://proxy:1234")
@@ -135,7 +134,7 @@ func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persiste
}
hc := &http.Client{
Transport: tr,
Timeout: sendTimeout.GetOptionalArg(argIdx),
Timeout: sendTimeout.GetOptionalArgOrDefault(argIdx, time.Minute),
}
c := &client{
sanitizedURL: sanitizedURL,
@@ -170,7 +169,7 @@ func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persiste
}
func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
if bytesPerSec := rateLimit.GetOptionalArg(argIdx); bytesPerSec > 0 {
if bytesPerSec := rateLimit.GetOptionalArgOrDefault(argIdx, 0); bytesPerSec > 0 {
logger.Infof("applying %d bytes per second rate limit for -remoteWrite.url=%q", bytesPerSec, sanitizedURL)
c.rl.perSecondLimit = int64(bytesPerSec)
}
@@ -179,7 +178,7 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
c.bytesSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_bytes_sent_total{url=%q}`, c.sanitizedURL))
c.blocksSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_blocks_sent_total{url=%q}`, c.sanitizedURL))
c.rateLimit = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_rate_limit{url=%q}`, c.sanitizedURL), func() float64 {
return float64(rateLimit.GetOptionalArg(argIdx))
return float64(rateLimit.GetOptionalArgOrDefault(argIdx, 0))
})
c.requestDuration = metrics.GetOrCreateHistogram(fmt.Sprintf(`vmagent_remotewrite_duration_seconds{url=%q}`, c.sanitizedURL))
c.requestsOKCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="2XX"}`, c.sanitizedURL))
@@ -323,32 +322,12 @@ func (c *client) runWorker() {
}
func (c *client) doRequest(url string, body []byte) (*http.Response, error) {
req, err := c.newRequest(url, body)
if err != nil {
return nil, err
}
resp, err := c.hc.Do(req)
if err != nil && errors.Is(err, io.EOF) {
// it is likely connection became stale.
// So we do one more attempt in hope request will succeed.
// If not, the error should be handled by the caller as usual.
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
req, _ = c.newRequest(url, body)
resp, err = c.hc.Do(req)
}
return resp, err
}
func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
reqBody := bytes.NewBuffer(body)
req, err := http.NewRequest(http.MethodPost, url, reqBody)
if err != nil {
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", url, err)
}
err = c.authCfg.SetHeaders(req, true)
if err != nil {
return nil, err
}
c.authCfg.SetHeaders(req, true)
h := req.Header
h.Set("User-Agent", "vmagent")
h.Set("Content-Type", "application/x-protobuf")
@@ -366,7 +345,7 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
logger.Warnf("cannot sign remoteWrite request with AWS sigv4: %s", err)
}
}
return req, nil
return c.hc.Do(req)
}
// sendBlockHTTP sends the given block to c.remoteWriteURL.

View File

@@ -87,8 +87,8 @@ func initLabelsGlobal() {
}
}
func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, pcs *promrelabel.ParsedConfigs) []prompbmarshal.TimeSeries {
if pcs.Len() == 0 && !*usePromCompatibleNaming {
func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs) []prompbmarshal.TimeSeries {
if len(extraLabels) == 0 && pcs.Len() == 0 && !*usePromCompatibleNaming {
// Nothing to change.
return tss
}
@@ -98,15 +98,34 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, pcs *pro
ts := &tss[i]
labelsLen := len(labels)
labels = append(labels, ts.Labels...)
// extraLabels must be added before applying relabeling according to https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write
for j := range extraLabels {
extraLabel := &extraLabels[j]
tmp := promrelabel.GetLabelByName(labels[labelsLen:], extraLabel.Name)
if tmp != nil {
tmp.Value = extraLabel.Value
} else {
labels = append(labels, *extraLabel)
}
}
if *usePromCompatibleNaming {
// Replace unsupported Prometheus chars in label names and metric names with underscores.
tmpLabels := labels[labelsLen:]
for j := range tmpLabels {
label := &tmpLabels[j]
if label.Name == "__name__" {
label.Value = promrelabel.SanitizeName(label.Value)
} else {
label.Name = promrelabel.SanitizeName(label.Name)
}
}
}
labels = pcs.Apply(labels, labelsLen)
labels = promrelabel.FinalizeLabels(labels[:labelsLen], labels[labelsLen:])
if len(labels) == labelsLen {
// Drop the current time series, since relabeling removed all the labels.
continue
}
if *usePromCompatibleNaming {
fixPromCompatibleNaming(labels[labelsLen:])
}
tssDst = append(tssDst, prompbmarshal.TimeSeries{
Labels: labels[labelsLen:],
Samples: ts.Samples,
@@ -116,29 +135,6 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, pcs *pro
return tssDst
}
func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraLabels []prompbmarshal.Label) {
if len(extraLabels) == 0 {
return
}
labels := rctx.labels[:0]
for i := range tss {
ts := &tss[i]
labelsLen := len(labels)
labels = append(labels, ts.Labels...)
for j := range extraLabels {
extraLabel := extraLabels[j]
tmp := promrelabel.GetLabelByName(labels[labelsLen:], extraLabel.Name)
if tmp != nil {
tmp.Value = extraLabel.Value
} else {
labels = append(labels, extraLabel)
}
}
ts.Labels = labels[labelsLen:]
}
rctx.labels = labels
}
type relabelCtx struct {
// pool for labels, which are used during the relabeling.
labels []prompbmarshal.Label
@@ -163,15 +159,3 @@ func putRelabelCtx(rctx *relabelCtx) {
rctx.labels = rctx.labels[:0]
relabelCtxPool.Put(rctx)
}
func fixPromCompatibleNaming(labels []prompbmarshal.Label) {
// Replace unsupported Prometheus chars in label names and metric names with underscores.
for i := range labels {
label := &labels[i]
if label.Name == "__name__" {
label.Value = promrelabel.SanitizeMetricName(label.Value)
} else {
label.Name = promrelabel.SanitizeLabelName(label.Name)
}
}
}

View File

@@ -10,16 +10,18 @@ import (
)
func TestApplyRelabeling(t *testing.T) {
f := func(pcs *promrelabel.ParsedConfigs, sTss, sExpTss string) {
f := func(extraLabels []prompbmarshal.Label, pcs *promrelabel.ParsedConfigs, sTss, sExpTss string) {
rctx := &relabelCtx{}
tss, expTss := parseSeries(sTss), parseSeries(sExpTss)
gotTss := rctx.applyRelabeling(tss, pcs)
gotTss := rctx.applyRelabeling(tss, extraLabels, pcs)
if !reflect.DeepEqual(gotTss, expTss) {
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expTss, gotTss)
}
}
f(nil, "up", "up")
f(nil, nil, "up", "up")
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, nil, "up", `up{foo="bar"}`)
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, nil, `up{foo="baz"}`, `up{foo="bar"}`)
pcs, err := promrelabel.ParseRelabelConfigsData([]byte(`
- target_label: "foo"
@@ -30,33 +32,11 @@ func TestApplyRelabeling(t *testing.T) {
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
f(pcs, `up{foo="baz", env="prod"}`, `up{foo="aaa"}`)
f(nil, pcs, `up{foo="baz", env="prod"}`, `up{foo="aaa"}`)
oldVal := *usePromCompatibleNaming
*usePromCompatibleNaming = true
f(nil, `foo.bar`, `foo_bar`)
*usePromCompatibleNaming = oldVal
}
func TestAppendExtraLabels(t *testing.T) {
f := func(extraLabels []prompbmarshal.Label, sTss, sExpTss string) {
t.Helper()
rctx := &relabelCtx{}
tss, expTss := parseSeries(sTss), parseSeries(sExpTss)
rctx.appendExtraLabels(tss, extraLabels)
if !reflect.DeepEqual(tss, expTss) {
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expTss, tss)
}
}
f(nil, "up", "up")
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, "up", `up{foo="bar"}`)
f([]prompbmarshal.Label{{Name: "foo", Value: "bar"}}, `up{foo="baz"}`, `up{foo="bar"}`)
f([]prompbmarshal.Label{{Name: "baz", Value: "qux"}}, `up{foo="baz"}`, `up{foo="baz",baz="qux"}`)
oldVal := *usePromCompatibleNaming
*usePromCompatibleNaming = true
f([]prompbmarshal.Label{{Name: "foo.bar", Value: "baz"}}, "up", `up{foo.bar="baz"}`)
f(nil, nil, `foo.bar`, `foo_bar`)
*usePromCompatibleNaming = oldVal
}

View File

@@ -6,11 +6,12 @@ import (
"net/url"
"path/filepath"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/cespare/xxhash/v2"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -27,7 +28,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
"github.com/cespare/xxhash/v2"
)
var (
@@ -41,8 +41,6 @@ var (
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
shardByURL = flag.Bool("remoteWrite.shardByURL", false, "Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . "+
"By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
shardByURLLabels = flag.String("remoteWrite.shardByURL.labels", "", "Comma-separated list of label names for sharding across all the -remoteWrite.url. All labels of timeseries are used by default. "+
"See also -remoteWrite.shardByURL and https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
"See also -remoteWrite.maxDiskUsagePerURL")
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
@@ -51,15 +49,14 @@ var (
"isn't enough for sending high volume of collected data to remote storage. Default value is 2 * numberOfAvailableCPUs")
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
"It is hidden by default, since it can contain sensitive info such as auth key")
maxPendingBytesPerURL = flagutil.NewArrayBytes("remoteWrite.maxDiskUsagePerURL", 0, "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
maxPendingBytesPerURL = flagutil.NewArrayBytes("remoteWrite.maxDiskUsagePerURL", "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
"for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. "+
"Buffered data is stored in ~500MB chunks. It is recommended to set the value for this flag to a multiple of the block size 500MB. "+
"Disk usage is unlimited if the value is set to 0")
significantFigures = flagutil.NewArrayInt("remoteWrite.significantFigures", 0, "The number of significant figures to leave in metric values before writing them "+
significantFigures = flagutil.NewArrayInt("remoteWrite.significantFigures", "The number of significant figures to leave in metric values before writing them "+
"to remote storage. See https://en.wikipedia.org/wiki/Significant_figures . Zero value saves all the significant figures. "+
"This option may be used for improving data compression for the stored metrics. See also -remoteWrite.roundDigits")
roundDigits = flagutil.NewArrayInt("remoteWrite.roundDigits", 100, "Round metric values to this number of decimal digits after the point before "+
"writing them to remote storage. "+
roundDigits = flagutil.NewArrayInt("remoteWrite.roundDigits", "Round metric values to this number of decimal digits after the point before writing them to remote storage. "+
"Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. "+
"By default, digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. "+
"This option may be used for improving data compression for the stored metrics")
@@ -81,7 +78,7 @@ var (
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop all the input samples after the aggregation "+
"with -remoteWrite.streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/stream-aggregation.html")
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before being aggregated. "+
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", "Input samples are de-duplicated with this interval before being aggregated. "+
"Only the last sample per each time series per each interval is aggregated if the interval is greater than zero")
)
@@ -95,8 +92,6 @@ var (
// Data without tenant id is written to defaultAuthToken if -remoteWrite.multitenantURL is specified.
defaultAuthToken = &auth.Token{}
shardLabelsFilter map[string]struct{}
)
// MultitenancyEnabled returns true if -remoteWrite.multitenantURL is specified.
@@ -176,12 +171,6 @@ func Init() {
rwctxsDefault = newRemoteWriteCtxs(nil, *remoteWriteURLs)
}
if *shardByURLLabels != "" {
for _, label := range strings.Split(*shardByURLLabels, ",") {
shardLabelsFilter[strings.TrimSpace(label)] = struct{}{}
}
}
// Start config reloader.
configReloaderWG.Add(1)
go func() {
@@ -269,7 +258,7 @@ func newRemoteWriteCtxs(at *auth.Token, urls []string) []*remoteWriteCtx {
if *showRemoteWriteURL {
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
}
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
rwctxs[i] = newRemoteWriteCtx(i, at, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
}
if !*keepDanglingQueues {
@@ -366,7 +355,7 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
var rctx *relabelCtx
rcs := allRelabelConfigs.Load()
pcsGlobal := rcs.global
if pcsGlobal.Len() > 0 {
if pcsGlobal.Len() > 0 || len(labelsGlobal) > 0 {
rctx = getRelabelCtx()
}
tss := wr.Timeseries
@@ -397,7 +386,7 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
}
if rctx != nil {
rowsCountBeforeRelabel := getRowsCount(tssBlock)
tssBlock = rctx.applyRelabeling(tssBlock, pcsGlobal)
tssBlock = rctx.applyRelabeling(tssBlock, labelsGlobal, pcsGlobal)
rowsCountAfterRelabel := getRowsCount(tssBlock)
rowsDroppedByGlobalRelabel.Add(rowsCountBeforeRelabel - rowsCountAfterRelabel)
}
@@ -430,7 +419,7 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
// Shard the data among rwctxs
tssByURL := make([][]prompbmarshal.TimeSeries, len(rwctxs))
for _, ts := range tssBlock {
h := getLabelsHash(ts.Labels, shardLabelsFilter)
h := getLabelsHash(ts.Labels)
idx := h % uint64(len(tssByURL))
tssByURL[idx] = append(tssByURL[idx], ts)
}
@@ -483,7 +472,7 @@ func limitSeriesCardinality(tss []prompbmarshal.TimeSeries) []prompbmarshal.Time
dst := make([]prompbmarshal.TimeSeries, 0, len(tss))
for i := range tss {
labels := tss[i].Labels
h := getLabelsHash(labels, nil)
h := getLabelsHash(labels)
if hourlySeriesLimiter != nil && !hourlySeriesLimiter.Add(h) {
hourlySeriesLimitRowsDropped.Add(len(tss[i].Samples))
logSkippedSeries(labels, "-remoteWrite.maxHourlySeries", hourlySeriesLimiter.MaxItems())
@@ -507,16 +496,10 @@ var (
dailySeriesLimitRowsDropped = metrics.NewCounter(`vmagent_daily_series_limit_rows_dropped_total`)
)
func getLabelsHash(labels []prompbmarshal.Label, filterLabels map[string]struct{}) uint64 {
func getLabelsHash(labels []prompbmarshal.Label) uint64 {
bb := labelsHashBufPool.Get()
b := bb.B[:0]
for _, label := range labels {
if len(filterLabels) > 0 {
_, ok := filterLabels[label.Name]
if !ok {
continue
}
}
b = append(b, label.Name...)
b = append(b, label.Value...)
}
@@ -576,14 +559,14 @@ type remoteWriteCtx struct {
rowsDroppedByRelabel *metrics.Counter
}
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
// strip query params, otherwise changing params resets pq
pqURL := *remoteWriteURL
pqURL.RawQuery = ""
pqURL.Fragment = ""
h := xxhash.Sum64([]byte(pqURL.String()))
queuePath := filepath.Join(*tmpDataPath, persistentQueueDirname, fmt.Sprintf("%d_%016X", argIdx+1, h))
maxPendingBytes := maxPendingBytesPerURL.GetOptionalArg(argIdx)
maxPendingBytes := maxPendingBytesPerURL.GetOptionalArgOrDefault(argIdx, 0)
if maxPendingBytes != 0 && maxPendingBytes < persistentqueue.DefaultChunkFileSize {
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4195
logger.Warnf("rounding the -remoteWrite.maxDiskUsagePerURL=%d to the minimum supported value: %d", maxPendingBytes, persistentqueue.DefaultChunkFileSize)
@@ -607,8 +590,8 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
c.init(argIdx, *queues, sanitizedURL)
// Initialize pss
sf := significantFigures.GetOptionalArg(argIdx)
rd := roundDigits.GetOptionalArg(argIdx)
sf := significantFigures.GetOptionalArgOrDefault(argIdx, 0)
rd := roundDigits.GetOptionalArgOrDefault(argIdx, 100)
pssLen := *queues
if n := cgroup.AvailableCPUs(); pssLen > n {
// There is no sense in running more than availableCPUs concurrent pendingSeries,
@@ -633,7 +616,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
// Initialize sas
sasFile := streamAggrConfig.GetOptionalArg(argIdx)
if sasFile != "" {
dedupInterval := streamAggrDedupInterval.GetOptionalArg(argIdx)
dedupInterval := streamAggrDedupInterval.GetOptionalArgOrDefault(argIdx, 0)
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
if err != nil {
logger.Fatalf("cannot initialize stream aggregators from -remoteWrite.streamAggr.config=%q: %s", sasFile, err)
@@ -685,7 +668,7 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
v = tssPool.Get().(*[]prompbmarshal.TimeSeries)
tss = append(*v, tss...)
rowsCountBeforeRelabel := getRowsCount(tss)
tss = rctx.applyRelabeling(tss, pcs)
tss = rctx.applyRelabeling(tss, nil, pcs)
rowsCountAfterRelabel := getRowsCount(tss)
rwctx.rowsDroppedByRelabel.Add(rowsCountBeforeRelabel - rowsCountAfterRelabel)
}
@@ -722,13 +705,11 @@ var matchIdxsPool bytesutil.ByteBufferPool
func dropAggregatedSeries(src []prompbmarshal.TimeSeries, matchIdxs []byte, dropInput bool) []prompbmarshal.TimeSeries {
dst := src[:0]
if !dropInput {
for i, match := range matchIdxs {
if match == 1 {
continue
}
dst = append(dst, src[i])
for i, match := range matchIdxs {
if match == 0 {
continue
}
dst = append(dst, src[i])
}
tail := src[len(dst):]
_ = prompbmarshal.ResetTimeSeries(tail)
@@ -736,38 +717,22 @@ func dropAggregatedSeries(src []prompbmarshal.TimeSeries, matchIdxs []byte, drop
}
func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
var rctx *relabelCtx
var v *[]prompbmarshal.TimeSeries
if len(labelsGlobal) > 0 {
// Make a copy of tss before adding extra labels in order to prevent
// from affecting time series for other remoteWrite.url configs.
rctx = getRelabelCtx()
v = tssPool.Get().(*[]prompbmarshal.TimeSeries)
tss = append(*v, tss...)
rctx.appendExtraLabels(tss, labelsGlobal)
}
pss := rwctx.pss
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
pss[idx].Push(tss)
if rctx != nil {
*v = prompbmarshal.ResetTimeSeries(tss)
tssPool.Put(v)
putRelabelCtx(rctx)
}
}
func (rwctx *remoteWriteCtx) reinitStreamAggr() {
sasFile := streamAggrConfig.GetOptionalArg(rwctx.idx)
if sasFile == "" {
sas := rwctx.sas.Load()
if sas == nil {
// There is no stream aggregation for rwctx
return
}
sasFile := streamAggrConfig.GetOptionalArg(rwctx.idx)
logger.Infof("reloading stream aggregation configs pointed by -remoteWrite.streamAggr.config=%q", sasFile)
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_total{path=%q}`, sasFile)).Inc()
dedupInterval := streamAggrDedupInterval.GetOptionalArg(rwctx.idx)
dedupInterval := streamAggrDedupInterval.GetOptionalArgOrDefault(rwctx.idx, 0)
sasNew, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
if err != nil {
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_errors_total{path=%q}`, sasFile)).Inc()
@@ -775,7 +740,6 @@ func (rwctx *remoteWriteCtx) reinitStreamAggr() {
logger.Errorf("cannot reload stream aggregation config from -remoteWrite.streamAggr.config=%q; continue using the previously loaded config; error: %s", sasFile, err)
return
}
sas := rwctx.sas.Load()
if !sasNew.Equal(sas) {
sasOld := rwctx.sas.Swap(sasNew)
sasOld.MustStop()
@@ -810,7 +774,7 @@ func CheckStreamAggrConfigs() error {
if sasFile == "" {
continue
}
dedupInterval := streamAggrDedupInterval.GetOptionalArg(idx)
dedupInterval := streamAggrDedupInterval.GetOptionalArgOrDefault(idx, 0)
sas, err := streamaggr.LoadFromFile(sasFile, pushNoop, dedupInterval)
if err != nil {
return fmt.Errorf("cannot load -remoteWrite.streamAggr.config=%q: %w", sasFile, err)
@@ -819,23 +783,3 @@ func CheckStreamAggrConfigs() error {
}
return nil
}
func GetAggregators() map[string]*streamaggr.Aggregators {
var result = map[string]*streamaggr.Aggregators{}
if len(*remoteWriteMultitenantURLs) > 0 {
rwctxsMapLock.Lock()
for tenant, rwctxs := range rwctxsMap {
for rwNum, rw := range rwctxs {
result[fmt.Sprintf("rw %d for tenant %v:%v", rwNum, tenant.AccountID, tenant.ProjectID)] = rw.sas.Load()
}
}
rwctxsMapLock.Unlock()
} else {
for rwNum, rw := range rwctxsDefault {
result[fmt.Sprintf("remote write %d", rwNum)] = rw.sas.Load()
}
}
return result
}

View File

@@ -27,7 +27,7 @@ var (
stdDialerOnce sync.Once
)
func statDial(ctx context.Context, _, addr string) (conn net.Conn, err error) {
func statDial(ctx context.Context, networkUnused, addr string) (conn net.Conn, err error) {
network := netutil.GetTCPNetwork()
d := getStdDialer()
conn, err = d.DialContext(ctx, network, addr)

View File

@@ -1,103 +0,0 @@
# All these commands must run from repository root.
vmalert-tool:
APP_NAME=vmalert-tool $(MAKE) app-local
vmalert-tool-race:
APP_NAME=vmalert-tool RACE=-race $(MAKE) app-local
vmalert-tool-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker
vmalert-tool-pure-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-pure
vmalert-tool-linux-amd64-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-amd64
vmalert-tool-linux-arm-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-arm
vmalert-tool-linux-arm64-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-arm64
vmalert-tool-linux-ppc64le-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-ppc64le
vmalert-tool-linux-386-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
vmalert-tool-darwin-amd64-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64
vmalert-tool-darwin-arm64-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-arm64
vmalert-tool-freebsd-amd64-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-freebsd-amd64
vmalert-tool-openbsd-amd64-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-openbsd-amd64
vmalert-tool-windows-amd64-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-windows-amd64
package-vmalert-tool:
APP_NAME=vmalert-tool $(MAKE) package-via-docker
package-vmalert-tool-pure:
APP_NAME=vmalert-tool $(MAKE) package-via-docker-pure
package-vmalert-tool-amd64:
APP_NAME=vmalert-tool $(MAKE) package-via-docker-amd64
package-vmalert-tool-arm:
APP_NAME=vmalert-tool $(MAKE) package-via-docker-arm
package-vmalert-tool-arm64:
APP_NAME=vmalert-tool $(MAKE) package-via-docker-arm64
package-vmalert-tool-ppc64le:
APP_NAME=vmalert-tool $(MAKE) package-via-docker-ppc64le
package-vmalert-tool-386:
APP_NAME=vmalert-tool $(MAKE) package-via-docker-386
publish-vmalert-tool:
APP_NAME=vmalert-tool $(MAKE) publish-via-docker
vmalert-tool-linux-amd64:
APP_NAME=vmalert-tool CGO_ENABLED=1 GOOS=linux GOARCH=amd64 $(MAKE) app-local-goos-goarch
vmalert-tool-linux-arm:
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=arm $(MAKE) app-local-goos-goarch
vmalert-tool-linux-arm64:
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=arm64 $(MAKE) app-local-goos-goarch
vmalert-tool-linux-ppc64le:
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
vmalert-tool-linux-s390x:
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
vmalert-tool-linux-386:
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
vmalert-tool-darwin-amd64:
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 $(MAKE) app-local-goos-goarch
vmalert-tool-darwin-arm64:
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 $(MAKE) app-local-goos-goarch
vmalert-tool-freebsd-amd64:
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
vmalert-tool-openbsd-amd64:
APP_NAME=vmalert-tool CGO_ENABLED=0 GOOS=openbsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
vmalert-tool-windows-amd64:
GOARCH=amd64 APP_NAME=vmalert-tool $(MAKE) app-local-windows-goarch
vmalert-tool-pure:
APP_NAME=vmalert-tool $(MAKE) app-local-pure

View File

@@ -1,246 +0,0 @@
# vmalert-tool
VMAlert command-line tool
## Unit testing for rules
You can use `vmalert-tool` to run unit tests for alerting and recording rules.
It will perform the following actions:
* sets up an isolated VictoriaMetrics instance;
* simulates the periodic ingestion of time series;
* queries the ingested data for recording and alerting rules evaluation like [vmalert](https://docs.victoriametrics.com/vmalert.html);
* checks whether the firing alerts or resulting recording rules match the expected results.
See how to run vmalert-tool for unit test below:
```
# Run vmalert-tool with one or multiple test files via --files cmd-line flag
./vmalert-tool unittest --files test1.yaml --files test2.yaml
```
vmalert-tool unittest is compatible with [Prometheus config format for tests](https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/#test-file-format)
except `promql_expr_test` field. Use `metricsql_expr_test` field name instead. The name is different because vmalert-tool
validates and executes [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html) expressions,
which aren't always backward compatible with [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/).
### Test file format
The configuration format for files specified in `--files` cmd-line flag is the following:
```yaml
# Path to the files or http url containing [rule groups](https://docs.victoriametrics.com/vmalert.html#groups) configuration.
# Enterprise version of vmalert-tool supports S3 and GCS paths to rules.
rule_files:
[ - <string> ]
# The evaluation interval for rules specified in `rule_files`
[ evaluation_interval: <duration> | default = 1m ]
# Groups listed below will be evaluated by order.
# Not All the groups need not be mentioned, if not, they will be evaluated by define order in rule_files.
group_eval_order:
[ - <string> ]
# The list of unit test files to be checked during evaluation.
tests:
[ - <test_group> ]
```
#### `<test_group>`
```yaml
# Interval between samples for input series
interval: <duration>
# Time series to persist into the database according to configured <interval> before running tests.
input_series:
[ - <series> ]
# Name of the test group, optional
[ name: <string> ]
# Unit tests for alerting rules
alert_rule_test:
[ - <alert_test_case> ]
# Unit tests for Metricsql expressions.
metricsql_expr_test:
[ - <metricsql_expr_test> ]
# External labels accessible for templating.
external_labels:
[ <labelname>: <string> ... ]
```
#### `<series>`
```yaml
# series in the following format '<metric name>{<label name>=<label value>, ...}'
# Examples:
# series_name{label1="value1", label2="value2"}
# go_goroutines{job="prometheus", instance="localhost:9090"}
series: <string>
# values support several special equations:
# 'a+bxc' becomes 'a a+b a+(2*b) a+(3*b) … a+(c*b)'
# Read this as series starts at a, then c further samples incrementing by b.
# 'a-bxc' becomes 'a a-b a-(2*b) a-(3*b) … a-(c*b)'
# Read this as series starts at a, then c further samples decrementing by b (or incrementing by negative b).
# '_' represents a missing sample from scrape
# 'stale' indicates a stale sample
# Examples:
# 1. '-2+4x3' becomes '-2 2 6 10' - series starts at -2, then 3 further samples incrementing by 4.
# 2. ' 1-2x4' becomes '1 -1 -3 -5 -7' - series starts at 1, then 4 further samples decrementing by 2.
# 3. ' 1x4' becomes '1 1 1 1 1' - shorthand for '1+0x4', series starts at 1, then 4 further samples incrementing by 0.
# 4. ' 1 _x3 stale' becomes '1 _ _ _ stale' - the missing sample cannot increment, so 3 missing samples are produced by the '_x3' expression.
values: <string>
```
#### `<alert_test_case>`
vmalert by default adds `alertgroup` and `alertname` to the generated alerts and time series.
So you will need to specify both `groupname` and `alertname` under a single `<alert_test_case>`,
but no need to add them under `exp_alerts`.
You can also pass `--disableAlertgroupLabel` to skip `alertgroup` check.
```yaml
# The time elapsed from time=0s when this alerting rule should be checked.
# Means this rule should be firing at this point, or shouldn't be firing if 'exp_alerts' is empty.
eval_time: <duration>
# Name of the group name to be tested.
groupname: <string>
# Name of the alert to be tested.
alertname: <string>
# List of the expected alerts that are firing under the given alertname at
# the given evaluation time. If you want to test if an alerting rule should
# not be firing, then you can mention only the fields above and leave 'exp_alerts' empty.
exp_alerts:
[ - <alert> ]
```
#### `<alert>`
```yaml
# These are the expanded labels and annotations of the expected alert.
# Note: labels also include the labels of the sample associated with the alert
exp_labels:
[ <labelname>: <string> ]
exp_annotations:
[ <labelname>: <string> ]
```
#### `<metricsql_expr_test>`
```yaml
# Expression to evaluate
expr: <string>
# The time elapsed from time=0s when this expression be evaluated.
eval_time: <duration>
# Expected samples at the given evaluation time.
exp_samples:
[ - <sample> ]
```
#### `<sample>`
```yaml
# Labels of the sample in usual series notation '<metric name>{<label name>=<label value>, ...}'
# Examples:
# series_name{label1="value1", label2="value2"}
# go_goroutines{job="prometheus", instance="localhost:9090"}
labels: <string>
# The expected value of the Metricsql expression.
value: <number>
```
### Example
This is an example input file for unit testing which will pass.
`test.yaml` is the test file which follows the syntax above and `alerts.yaml` contains the alerting rules.
With `rules.yaml` in the same directory, run `./vmalert-tool unittest --files=./unittest/testdata/test.yaml`.
#### `test.yaml`
```yaml
rule_files:
- rules.yaml
evaluation_interval: 1m
tests:
- interval: 1m
input_series:
- series: 'up{job="prometheus", instance="localhost:9090"}'
values: "0+0x1440"
metricsql_expr_test:
- expr: suquery_interval_test
eval_time: 4m
exp_samples:
- labels: '{__name__="suquery_interval_test", datacenter="dc-123", instance="localhost:9090", job="prometheus"}'
value: 1
alert_rule_test:
- eval_time: 2h
groupname: group1
alertname: InstanceDown
exp_alerts:
- exp_labels:
job: prometheus
severity: page
instance: localhost:9090
datacenter: dc-123
exp_annotations:
summary: "Instance localhost:9090 down"
description: "localhost:9090 of job prometheus has been down for more than 5 minutes."
- eval_time: 0
groupname: group1
alertname: AlwaysFiring
exp_alerts:
- exp_labels:
datacenter: dc-123
- eval_time: 0
groupname: group1
alertname: InstanceDown
exp_alerts: []
external_labels:
datacenter: dc-123
```
#### `alerts.yaml`
```yaml
# This is the rules file.
groups:
- name: group1
rules:
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: AlwaysFiring
expr: 1
- name: group2
rules:
- record: job:test:count_over_time1m
expr: sum without(instance) (count_over_time(test[1m]))
- record: suquery_interval_test
expr: count_over_time(up[5m:])
```

View File

@@ -1,54 +0,0 @@
package main
import (
"fmt"
"log"
"os"
"time"
"github.com/urfave/cli/v2"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert-tool/unittest"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
)
func main() {
start := time.Now()
app := &cli.App{
Name: "vmalert-tool",
Usage: "VMAlert command-line tool",
UsageText: "More info in https://docs.victoriametrics.com/vmalert-tool.html",
Version: buildinfo.Version,
Commands: []*cli.Command{
{
Name: "unittest",
Usage: "Run unittest for alerting and recording rules.",
UsageText: "More info in https://docs.victoriametrics.com/vmalert-tool.html#Unit-testing-for-rules",
Flags: []cli.Flag{
&cli.StringSliceFlag{
Name: "files",
Usage: "files to run unittest with. Supports an array of values separated by comma or specified via multiple flags.",
Required: true,
},
&cli.BoolFlag{
Name: "disableAlertgroupLabel",
Usage: "disable adding group's Name as label to generated alerts and time series.",
Required: false,
},
},
Action: func(c *cli.Context) error {
if failed := unittest.UnitTest(c.StringSlice("files"), c.Bool("disableAlertgroupLabel")); failed {
return fmt.Errorf("unittest failed")
}
return nil
},
},
},
}
err := app.Run(os.Args)
if err != nil {
log.Fatalln(err)
}
log.Printf("Total time: %v", time.Since(start))
}

View File

@@ -1,19 +0,0 @@
package unittest
import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
// alertTestCase holds alert_rule_test cases defined in test file
type alertTestCase struct {
EvalTime *promutils.Duration `yaml:"eval_time"`
GroupName string `yaml:"groupname"`
Alertname string `yaml:"alertname"`
ExpAlerts []expAlert `yaml:"exp_alerts"`
}
// expAlert holds exp_alerts defined in test file
type expAlert struct {
ExpLabels map[string]string `yaml:"exp_labels"`
ExpAnnotations map[string]string `yaml:"exp_annotations"`
}

View File

@@ -1,182 +0,0 @@
package unittest
import (
"bytes"
"fmt"
"io"
"net/http"
"regexp"
"strconv"
"strings"
"time"
testutil "github.com/VictoriaMetrics/VictoriaMetrics/app/victoria-metrics/test"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/metricsql"
)
// series holds input_series defined in the test file
type series struct {
Series string `yaml:"series"`
Values string `yaml:"values"`
}
// sequenceValue is an omittable value in a sequence of time series values.
type sequenceValue struct {
Value float64
Omitted bool
}
func httpWrite(address string, r io.Reader) {
resp, err := http.Post(address, "", r)
if err != nil {
logger.Fatalf("failed to send to storage: %v", err)
}
resp.Body.Close()
}
// writeInputSeries send input series to vmstorage and flush them
func writeInputSeries(input []series, interval *promutils.Duration, startStamp time.Time, dst string) error {
r := testutil.WriteRequest{}
for _, data := range input {
expr, err := metricsql.Parse(data.Series)
if err != nil {
return fmt.Errorf("failed to parse series %s: %v", data.Series, err)
}
promvals, err := parseInputValue(data.Values, true)
if err != nil {
return fmt.Errorf("failed to parse input series value %s: %v", data.Values, err)
}
metricExpr, ok := expr.(*metricsql.MetricExpr)
if !ok {
return fmt.Errorf("failed to parse series %s to metric expr: %v", data.Series, err)
}
samples := make([]testutil.Sample, 0, len(promvals))
ts := startStamp
for _, v := range promvals {
if !v.Omitted {
samples = append(samples, testutil.Sample{
Timestamp: ts.UnixMilli(),
Value: v.Value,
})
}
ts = ts.Add(interval.Duration())
}
var ls []testutil.Label
for _, filter := range metricExpr.LabelFilterss[0] {
ls = append(ls, testutil.Label{Name: filter.Label, Value: filter.Value})
}
r.Timeseries = append(r.Timeseries, testutil.TimeSeries{Labels: ls, Samples: samples})
}
data, err := testutil.Compress(r)
if err != nil {
return fmt.Errorf("failed to compress data: %v", err)
}
// write input series to vm
httpWrite(dst, bytes.NewBuffer(data))
vmstorage.Storage.DebugFlush()
return nil
}
// parseInputValue support input like "1", "1+1x1 _ -4 3+20x1", see more examples in test.
func parseInputValue(input string, origin bool) ([]sequenceValue, error) {
var res []sequenceValue
items := strings.Split(input, " ")
reg := regexp.MustCompile(`\D?\d*\D?`)
for _, item := range items {
if item == "stale" {
res = append(res, sequenceValue{Value: decimal.StaleNaN})
continue
}
vals := reg.FindAllString(item, -1)
switch len(vals) {
case 1:
if vals[0] == "_" {
res = append(res, sequenceValue{Omitted: true})
continue
}
v, err := strconv.ParseFloat(vals[0], 64)
if err != nil {
return nil, err
}
res = append(res, sequenceValue{Value: v})
continue
case 2:
p1 := vals[0][:len(vals[0])-1]
v2, err := strconv.ParseInt(vals[1], 10, 64)
if err != nil {
return nil, err
}
option := vals[0][len(vals[0])-1]
switch option {
case '+':
v1, err := strconv.ParseFloat(p1, 64)
if err != nil {
return nil, err
}
res = append(res, sequenceValue{Value: v1 + float64(v2)})
case 'x':
for i := int64(0); i <= v2; i++ {
if p1 == "_" {
if i == 0 {
i = 1
}
res = append(res, sequenceValue{Omitted: true})
continue
}
v1, err := strconv.ParseFloat(p1, 64)
if err != nil {
return nil, err
}
if !origin || v1 == 0 {
res = append(res, sequenceValue{Value: v1 * float64(i)})
continue
}
newVal := fmt.Sprintf("%s+0x%s", p1, vals[1])
newRes, err := parseInputValue(newVal, false)
if err != nil {
return nil, err
}
res = append(res, newRes...)
break
}
default:
return nil, fmt.Errorf("got invalid operation %b", option)
}
case 3:
r1, err := parseInputValue(fmt.Sprintf("%s%s", vals[1], vals[2]), false)
if err != nil {
return nil, err
}
p1 := vals[0][:len(vals[0])-1]
v1, err := strconv.ParseFloat(p1, 64)
if err != nil {
return nil, err
}
option := vals[0][len(vals[0])-1]
var isAdd bool
if option == '+' {
isAdd = true
}
for _, r := range r1 {
if isAdd {
res = append(res, sequenceValue{
Value: r.Value + v1,
})
} else {
res = append(res, sequenceValue{
Value: v1 - r.Value,
})
}
}
default:
return nil, fmt.Errorf("unsupported input %s", input)
}
}
return res, nil
}

View File

@@ -1,93 +0,0 @@
package unittest
import (
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
)
func TestParseInputValue(t *testing.T) {
testCases := []struct {
input string
exp []sequenceValue
failed bool
}{
{
"",
nil,
true,
},
{
"testfailed",
nil,
true,
},
// stale doesn't support operations
{
"stalex3",
nil,
true,
},
{
"-4",
[]sequenceValue{{Value: -4}},
false,
},
{
"_",
[]sequenceValue{{Omitted: true}},
false,
},
{
"stale",
[]sequenceValue{{Value: decimal.StaleNaN}},
false,
},
{
"-4x1",
[]sequenceValue{{Value: -4}, {Value: -4}},
false,
},
{
"_x1",
[]sequenceValue{{Omitted: true}},
false,
},
{
"1+1x4",
[]sequenceValue{{Value: 1}, {Value: 2}, {Value: 3}, {Value: 4}, {Value: 5}},
false,
},
{
"2-1x4",
[]sequenceValue{{Value: 2}, {Value: 1}, {Value: 0}, {Value: -1}, {Value: -2}},
false,
},
{
"1+1x1 _ -4 stale 3+20x1",
[]sequenceValue{{Value: 1}, {Value: 2}, {Omitted: true}, {Value: -4}, {Value: decimal.StaleNaN}, {Value: 3}, {Value: 23}},
false,
},
}
for _, tc := range testCases {
output, err := parseInputValue(tc.input, true)
if err != nil != tc.failed {
t.Fatalf("failed to parse %s, expect %t, got %t", tc.input, tc.failed, err != nil)
}
if len(tc.exp) != len(output) {
t.Fatalf("expect %v, got %v", tc.exp, output)
}
for i := 0; i < len(tc.exp); i++ {
if tc.exp[i].Omitted != output[i].Omitted {
t.Fatalf("expect %v, got %v", tc.exp, output)
}
if tc.exp[i].Value != output[i].Value {
if decimal.IsStaleNaN(tc.exp[i].Value) && decimal.IsStaleNaN(output[i].Value) {
continue
}
t.Fatalf("expect %v, got %v", tc.exp, output)
}
}
}
}

View File

@@ -1,100 +0,0 @@
package unittest
import (
"context"
"fmt"
"net/url"
"reflect"
"sort"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/metricsql"
)
// metricsqlTestCase holds metricsql_expr_test cases defined in test file
type metricsqlTestCase struct {
Expr string `yaml:"expr"`
EvalTime *promutils.Duration `yaml:"eval_time"`
ExpSamples []expSample `yaml:"exp_samples"`
}
type expSample struct {
Labels string `yaml:"labels"`
Value float64 `yaml:"value"`
}
// checkMetricsqlCase will check metricsql_expr_test cases
func checkMetricsqlCase(cases []metricsqlTestCase, q datasource.QuerierBuilder) (checkErrs []error) {
queries := q.BuildWithParams(datasource.QuerierParams{QueryParams: url.Values{"nocache": {"1"}, "latency_offset": {"1ms"}}, DataSourceType: "prometheus"})
Outer:
for _, mt := range cases {
result, _, err := queries.Query(context.Background(), mt.Expr, durationToTime(mt.EvalTime))
if err != nil {
checkErrs = append(checkErrs, fmt.Errorf(" expr: %q, time: %s, err: %w", mt.Expr,
mt.EvalTime.Duration().String(), err))
continue
}
var gotSamples []parsedSample
for _, s := range result.Data {
sort.Slice(s.Labels, func(i, j int) bool {
return s.Labels[i].Name < s.Labels[j].Name
})
gotSamples = append(gotSamples, parsedSample{
Labels: s.Labels,
Value: s.Values[0],
})
}
var expSamples []parsedSample
for _, s := range mt.ExpSamples {
expLb := datasource.Labels{}
if s.Labels != "" {
metricsqlExpr, err := metricsql.Parse(s.Labels)
if err != nil {
checkErrs = append(checkErrs, fmt.Errorf("\n expr: %q, time: %s, err: %v", mt.Expr,
mt.EvalTime.Duration().String(), fmt.Errorf("failed to parse labels %q: %w", s.Labels, err)))
continue Outer
}
metricsqlMetricExpr, ok := metricsqlExpr.(*metricsql.MetricExpr)
if !ok {
checkErrs = append(checkErrs, fmt.Errorf("\n expr: %q, time: %s, err: %v", mt.Expr,
mt.EvalTime.Duration().String(), fmt.Errorf("got unsupported metricsql type")))
continue Outer
}
for _, l := range metricsqlMetricExpr.LabelFilterss[0] {
expLb = append(expLb, datasource.Label{
Name: l.Label,
Value: l.Value,
})
}
}
sort.Slice(expLb, func(i, j int) bool {
return expLb[i].Name < expLb[j].Name
})
expSamples = append(expSamples, parsedSample{
Labels: expLb,
Value: s.Value,
})
}
sort.Slice(expSamples, func(i, j int) bool {
return datasource.LabelCompare(expSamples[i].Labels, expSamples[j].Labels) <= 0
})
sort.Slice(gotSamples, func(i, j int) bool {
return datasource.LabelCompare(gotSamples[i].Labels, gotSamples[j].Labels) <= 0
})
if !reflect.DeepEqual(expSamples, gotSamples) {
checkErrs = append(checkErrs, fmt.Errorf("\n expr: %q, time: %s,\n exp: %v\n got: %v", mt.Expr,
mt.EvalTime.Duration().String(), parsedSamplesString(expSamples), parsedSamplesString(gotSamples)))
}
}
return
}
func durationToTime(pd *promutils.Duration) time.Time {
if pd == nil {
return time.Time{}
}
return time.UnixMilli(pd.Duration().Milliseconds())
}

View File

@@ -1,43 +0,0 @@
rule_files:
- rules.yaml
evaluation_interval: 1m
tests:
- interval: 1m
input_series:
- series: 'up{job="vmagent2", instance="localhost:9090"}'
values: "0+0x1440"
metricsql_expr_test:
- expr: suquery_interval_test
eval_time: 4m
exp_samples:
- labels: '{__name__="suquery_interval_test",datacenter="dc-123", instance="localhost:9090", job="vmagent2"}'
value: 1
alert_rule_test:
- eval_time: 2h
alertname: InstanceDown
exp_alerts:
- exp_labels:
job: vmagent2
severity: page
instance: localhost:9090
datacenter: dc-123
exp_annotations:
summary: "Instance localhost:9090 down"
description: "localhost:9090 of job vmagent2 has been down for more than 5 minutes."
- eval_time: 0
alertname: AlwaysFiring
exp_alerts:
- exp_labels:
datacenter: dc-123
- eval_time: 0
alertname: InstanceDown
exp_alerts: []
external_labels:
datacenter: dc-123

View File

@@ -1,49 +0,0 @@
rule_files:
- rules.yaml
tests:
- interval: 1m
name: "Failing test"
input_series:
- series: test
values: "0"
metricsql_expr_test:
- expr: test
eval_time: 0m
exp_samples:
- value: 0
labels: test
# will failed cause there is no "Test" group and rule defined
alert_rule_test:
- eval_time: 0m
groupname: Test
alertname: Test
exp_alerts:
- exp_labels: {}
- interval: 1m
name: Failing alert test
input_series:
- series: 'up{job="test"}'
values: 0x10
alert_rule_test:
# will failed cause rule is firing
- eval_time: 5m
groupname: group1
alertname: InstanceDown
exp_alerts: []
- interval: 1m
name: Failing alert test with missing groupname
input_series:
- series: 'up{job="test"}'
values: 0x10
alert_rule_test:
# will failed cause missing groupname
- eval_time: 5m
alertname: AlwaysFiring
exp_alerts: []

View File

@@ -1,30 +0,0 @@
# can be executed successfully but will take more than 1 minute
# not included in unit test now
evaluation_interval: 100d
rule_files:
- rules.yaml
tests:
- interval: 1d
input_series:
- series: test
# Max time in time.Duration is 106751d from 1970 (2^63/10^9), i.e. 2262.
# But VictoriaMetrics supports maxTimestamp value +2 days from now. see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/827.
# We input series to 2024-01-01T00:00:00 here.
values: "0+1x19723"
metricsql_expr_test:
- expr: timestamp(test)
eval_time: 0m
exp_samples:
- value: 0
- expr: test
eval_time: 100d
exp_samples:
- labels: test
value: 100
- expr: timestamp(test)
eval_time: 19000d
exp_samples:
- value: 1641600000 # 19000d -> seconds.

View File

@@ -1,39 +0,0 @@
groups:
- name: group1
rules:
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: AlwaysFiring
expr: 1
- alert: SameAlertNameWithDifferentGroup
expr: absent(test)
for: 1m
- name: group2
rules:
- record: t1
expr: test
- record: job:test:count_over_time1m
expr: sum without(instance) (count_over_time(test[1m]))
- record: suquery_interval_test
expr: count_over_time(up[5m:])
- alert: SameAlertNameWithDifferentGroup
expr: absent(test)
for: 5m
- name: group3
rules:
- record: t2
expr: t1
- name: group4
rules:
- record: t3
expr: t1

View File

@@ -1,99 +0,0 @@
rule_files:
- rules.yaml
evaluation_interval: 1m
group_eval_order: ["group4", "group2", "group3"]
tests:
- interval: 1m
name: "basic test"
input_series:
- series: "test"
values: "_x5 1x5 _ stale"
alert_rule_test:
- eval_time: 1m
groupname: group1
alertname: SameAlertNameWithDifferentGroup
exp_alerts:
- {}
- eval_time: 1m
groupname: group2
alertname: SameAlertNameWithDifferentGroup
exp_alerts: []
- eval_time: 6m
groupname: group1
alertname: SameAlertNameWithDifferentGroup
exp_alerts: []
metricsql_expr_test:
- expr: test
eval_time: 11m
exp_samples:
- labels: '{__name__="test"}'
value: 1
- expr: test
eval_time: 12m
exp_samples: []
- interval: 1m
name: "basic test2"
input_series:
- series: 'up{job="vmagent1", instance="localhost:9090"}'
values: "0+0x1440"
- series: "test"
values: "0+1x1440"
metricsql_expr_test:
- expr: count(ALERTS) by (alertgroup, alertname, alertstate)
eval_time: 4m
exp_samples:
- labels: '{alertgroup="group1", alertname="AlwaysFiring", alertstate="firing"}'
value: 1
- labels: '{alertgroup="group1", alertname="InstanceDown", alertstate="pending"}'
value: 1
- expr: t1
eval_time: 4m
exp_samples:
- value: 4
labels: '{__name__="t1", datacenter="dc-123"}'
- expr: t2
eval_time: 4m
exp_samples:
- value: 4
labels: '{__name__="t2", datacenter="dc-123"}'
- expr: t3
eval_time: 4m
exp_samples:
# t3 is 3 instead of 4 cause it's rules3 is evaluated before rules1
- value: 3
labels: '{__name__="t3", datacenter="dc-123"}'
alert_rule_test:
- eval_time: 10m
groupname: group1
alertname: InstanceDown
exp_alerts:
- exp_labels:
job: vmagent1
severity: page
instance: localhost:9090
datacenter: dc-123
exp_annotations:
summary: "Instance localhost:9090 down"
description: "localhost:9090 of job vmagent1 has been down for more than 5 minutes."
- eval_time: 0
groupname: group1
alertname: AlwaysFiring
exp_alerts:
- exp_labels:
datacenter: dc-123
- eval_time: 0
groupname: alerts
alertname: InstanceDown
exp_alerts: []
external_labels:
datacenter: dc-123

View File

@@ -1,46 +0,0 @@
rule_files:
- rules.yaml
evaluation_interval: 1m
tests:
- interval: 1m
input_series:
- series: 'up{job="vmagent2", instance="localhost:9090"}'
values: "0+0x1440"
metricsql_expr_test:
- expr: suquery_interval_test
eval_time: 4m
exp_samples:
- labels: '{__name__="suquery_interval_test",datacenter="dc-123", instance="localhost:9090", job="vmagent2"}'
value: 1
alert_rule_test:
- eval_time: 2h
groupname: group1
alertname: InstanceDown
exp_alerts:
- exp_labels:
job: vmagent2
severity: page
instance: localhost:9090
datacenter: dc-123
exp_annotations:
summary: "Instance localhost:9090 down"
description: "localhost:9090 of job vmagent2 has been down for more than 5 minutes."
- eval_time: 0
groupname: group1
alertname: AlwaysFiring
exp_alerts:
- exp_labels:
datacenter: dc-123
- eval_time: 0
groupname: group1
alertname: InstanceDown
exp_alerts: []
external_labels:
datacenter: dc-123

View File

@@ -1,83 +0,0 @@
package unittest
import (
"fmt"
"strconv"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
)
// parsedSample is a sample with parsed Labels
type parsedSample struct {
Labels datasource.Labels
Value float64
}
func (ps *parsedSample) String() string {
return ps.Labels.String() + " " + strconv.FormatFloat(ps.Value, 'E', -1, 64)
}
func parsedSamplesString(pss []parsedSample) string {
if len(pss) == 0 {
return "nil"
}
s := pss[0].String()
for _, ps := range pss[1:] {
s += ", " + ps.String()
}
return s
}
// labelAndAnnotation holds labels and annotations
type labelAndAnnotation struct {
Labels datasource.Labels
Annotations datasource.Labels
}
func (la *labelAndAnnotation) String() string {
return "Labels:" + la.Labels.String() + "\nAnnotations:" + la.Annotations.String()
}
// labelsAndAnnotations is collection of LabelAndAnnotation
type labelsAndAnnotations []labelAndAnnotation
func (la labelsAndAnnotations) Len() int { return len(la) }
func (la labelsAndAnnotations) Swap(i, j int) { la[i], la[j] = la[j], la[i] }
func (la labelsAndAnnotations) Less(i, j int) bool {
diff := datasource.LabelCompare(la[i].Labels, la[j].Labels)
if diff != 0 {
return diff < 0
}
return datasource.LabelCompare(la[i].Annotations, la[j].Annotations) < 0
}
func (la labelsAndAnnotations) String() string {
if len(la) == 0 {
return "[]"
}
s := "[\n0:" + indentLines("\n"+la[0].String(), " ")
for i, l := range la[1:] {
s += ",\n" + fmt.Sprintf("%d", i+1) + ":" + indentLines("\n"+l.String(), " ")
}
s += "\n]"
return s
}
// indentLines prefixes each line in the supplied string with the given "indent" string.
func indentLines(lines, indent string) string {
sb := strings.Builder{}
n := strings.Split(lines, "\n")
for i, l := range n {
if i > 0 {
sb.WriteString(indent)
}
sb.WriteString(l)
if i != len(n)-1 {
sb.WriteRune('\n')
}
}
return sb.String()
}

View File

@@ -1,443 +0,0 @@
package unittest
import (
"context"
"flag"
"fmt"
"net/http"
"os"
"path/filepath"
"reflect"
"sort"
"time"
"gopkg.in/yaml.v2"
vmalertconfig "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/metrics"
)
var (
storagePath string
httpListenAddr = ":8880"
// insert series from 1970-01-01T00:00:00
testStartTime = time.Unix(0, 0).UTC()
testPromWriteHTTPPath = "http://127.0.0.1" + httpListenAddr + "/api/v1/write"
testDataSourcePath = "http://127.0.0.1" + httpListenAddr + "/prometheus"
testRemoteWritePath = "http://127.0.0.1" + httpListenAddr
testHealthHTTPPath = "http://127.0.0.1" + httpListenAddr + "/health"
disableAlertgroupLabel bool
)
const (
testStoragePath = "vmalert-unittest"
testLogLevel = "ERROR"
)
// UnitTest runs unittest for files
func UnitTest(files []string, disableGroupLabel bool) bool {
if err := templates.Load([]string{}, true); err != nil {
logger.Fatalf("failed to load template: %v", err)
}
storagePath = filepath.Join(os.TempDir(), testStoragePath)
processFlags()
vminsert.Init()
vmselect.Init()
// storagePath will be created again when closing vmselect, so remove it again.
defer fs.MustRemoveAll(storagePath)
defer vminsert.Stop()
defer vmselect.Stop()
disableAlertgroupLabel = disableGroupLabel
return rulesUnitTest(files)
}
func rulesUnitTest(files []string) bool {
var failed bool
for _, f := range files {
if err := ruleUnitTest(f); err != nil {
fmt.Println(" FAILED")
fmt.Printf("\nfailed to run unit test for file %q: \n%v", f, err)
failed = true
} else {
fmt.Println(" SUCCESS")
}
}
return failed
}
func ruleUnitTest(filename string) []error {
fmt.Println("\nUnit Testing: ", filename)
b, err := os.ReadFile(filename)
if err != nil {
return []error{fmt.Errorf("failed to read file: %w", err)}
}
var unitTestInp unitTestFile
if err := yaml.UnmarshalStrict(b, &unitTestInp); err != nil {
return []error{fmt.Errorf("failed to unmarshal file: %w", err)}
}
if err := resolveAndGlobFilepaths(filepath.Dir(filename), &unitTestInp); err != nil {
return []error{fmt.Errorf("failed to resolve path for `rule_files`: %w", err)}
}
if unitTestInp.EvaluationInterval.Duration() == 0 {
fmt.Println("evaluation_interval set to 1m by default")
unitTestInp.EvaluationInterval = &promutils.Duration{D: 1 * time.Minute}
}
groupOrderMap := make(map[string]int)
for i, gn := range unitTestInp.GroupEvalOrder {
if _, ok := groupOrderMap[gn]; ok {
return []error{fmt.Errorf("group name repeated in `group_eval_order`: %s", gn)}
}
groupOrderMap[gn] = i
}
testGroups, err := vmalertconfig.Parse(unitTestInp.RuleFiles, nil, true)
if err != nil {
return []error{fmt.Errorf("failed to parse `rule_files`: %w", err)}
}
var errs []error
for _, t := range unitTestInp.Tests {
if err := verifyTestGroup(t); err != nil {
errs = append(errs, err)
continue
}
testErrs := t.test(unitTestInp.EvaluationInterval.Duration(), groupOrderMap, testGroups)
errs = append(errs, testErrs...)
}
if len(errs) > 0 {
return errs
}
return nil
}
func verifyTestGroup(group testGroup) error {
var testGroupName string
if group.TestGroupName != "" {
testGroupName = fmt.Sprintf("testGroupName: %s\n", group.TestGroupName)
}
for _, at := range group.AlertRuleTests {
if at.Alertname == "" {
return fmt.Errorf("\n%s missing required filed \"alertname\"", testGroupName)
}
if !disableAlertgroupLabel && at.GroupName == "" {
return fmt.Errorf("\n%s missing required filed \"groupname\" when flag \"disableAlertGroupLabel\" is false", testGroupName)
}
if disableAlertgroupLabel && at.GroupName != "" {
return fmt.Errorf("\n%s shouldn't set filed \"groupname\" when flag \"disableAlertGroupLabel\" is true", testGroupName)
}
if at.EvalTime == nil {
return fmt.Errorf("\n%s missing required filed \"eval_time\"", testGroupName)
}
}
for _, et := range group.MetricsqlExprTests {
if et.Expr == "" {
return fmt.Errorf("\n%s missing required filed \"expr\"", testGroupName)
}
if et.EvalTime == nil {
return fmt.Errorf("\n%s missing required filed \"eval_time\"", testGroupName)
}
}
return nil
}
func processFlags() {
flag.Parse()
for _, fv := range []struct {
flag string
value string
}{
{flag: "storageDataPath", value: storagePath},
{flag: "loggerLevel", value: testLogLevel},
{flag: "search.disableCache", value: "true"},
// set storage retention time to 100 years, allow to store series from 1970-01-01T00:00:00.
{flag: "retentionPeriod", value: "100y"},
{flag: "datasource.url", value: testDataSourcePath},
{flag: "remoteWrite.url", value: testRemoteWritePath},
} {
// panics if flag doesn't exist
if err := flag.Lookup(fv.flag).Value.Set(fv.value); err != nil {
logger.Fatalf("unable to set %q with value %q, err: %v", fv.flag, fv.value, err)
}
}
}
func setUp() {
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
go httpserver.Serve(httpListenAddr, false, func(w http.ResponseWriter, r *http.Request) bool {
switch r.URL.Path {
case "/prometheus/api/v1/query":
if err := prometheus.QueryHandler(nil, time.Now(), w, r); err != nil {
httpserver.Errorf(w, r, "%s", err)
}
return true
case "/prometheus/api/v1/write", "/api/v1/write":
if err := promremotewrite.InsertHandler(r); err != nil {
httpserver.Errorf(w, r, "%s", err)
}
return true
default:
}
return false
})
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
readyCheckFunc := func() bool {
resp, err := http.Get(testHealthHTTPPath)
if err != nil {
return false
}
_ = resp.Body.Close()
return resp.StatusCode == 200
}
checkCheck:
for {
select {
case <-ctx.Done():
logger.Fatalf("http server can't be ready in 30s")
default:
if readyCheckFunc() {
break checkCheck
}
time.Sleep(3 * time.Second)
}
}
}
func tearDown() {
if err := httpserver.Stop(httpListenAddr); err != nil {
logger.Errorf("cannot stop the webservice: %s", err)
}
vmstorage.Stop()
metrics.UnregisterAllMetrics()
fs.MustRemoveAll(storagePath)
}
// resolveAndGlobFilepaths joins all relative paths in a configuration
// with a given base directory and replaces all globs with matching files.
func resolveAndGlobFilepaths(baseDir string, utf *unitTestFile) error {
for i, rf := range utf.RuleFiles {
if rf != "" && !filepath.IsAbs(rf) {
utf.RuleFiles[i] = filepath.Join(baseDir, rf)
}
}
var globbedFiles []string
for _, rf := range utf.RuleFiles {
m, err := filepath.Glob(rf)
if err != nil {
return err
}
if len(m) == 0 {
fmt.Fprintln(os.Stderr, " WARNING: no file match pattern", rf)
}
globbedFiles = append(globbedFiles, m...)
}
utf.RuleFiles = globbedFiles
return nil
}
func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, testGroups []vmalertconfig.Group) (checkErrs []error) {
// set up vmstorage and http server for ingest and read queries
setUp()
// tear down vmstorage and clean the data dir
defer tearDown()
err := writeInputSeries(tg.InputSeries, tg.Interval, testStartTime, testPromWriteHTTPPath)
if err != nil {
return []error{err}
}
q, err := datasource.Init(nil)
if err != nil {
return []error{fmt.Errorf("failed to init datasource: %v", err)}
}
rw, err := remotewrite.NewDebugClient()
if err != nil {
return []error{fmt.Errorf("failed to init wr: %v", err)}
}
alertEvalTimesMap := map[time.Duration]struct{}{}
alertExpResultMap := map[time.Duration]map[string]map[string][]expAlert{}
for _, at := range tg.AlertRuleTests {
et := at.EvalTime.Duration()
alertEvalTimesMap[et] = struct{}{}
if _, ok := alertExpResultMap[et]; !ok {
alertExpResultMap[et] = make(map[string]map[string][]expAlert)
}
if _, ok := alertExpResultMap[et][at.GroupName]; !ok {
alertExpResultMap[et][at.GroupName] = make(map[string][]expAlert)
}
alertExpResultMap[et][at.GroupName][at.Alertname] = at.ExpAlerts
}
alertEvalTimes := make([]time.Duration, 0, len(alertEvalTimesMap))
for k := range alertEvalTimesMap {
alertEvalTimes = append(alertEvalTimes, k)
}
sort.Slice(alertEvalTimes, func(i, j int) bool {
return alertEvalTimes[i] < alertEvalTimes[j]
})
// sort group eval order according to the given "group_eval_order".
sort.Slice(testGroups, func(i, j int) bool {
return groupOrderMap[testGroups[i].Name] < groupOrderMap[testGroups[j].Name]
})
// create groups with given rule
var groups []*rule.Group
for _, group := range testGroups {
ng := rule.NewGroup(group, q, time.Minute, tg.ExternalLabels)
groups = append(groups, ng)
}
evalIndex := 0
maxEvalTime := testStartTime.Add(tg.maxEvalTime())
for ts := testStartTime; ts.Before(maxEvalTime) || ts.Equal(maxEvalTime); ts = ts.Add(evalInterval) {
for _, g := range groups {
errs := g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, rw, ts)
for err := range errs {
if err != nil {
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,
ts, err))
}
}
// flush series after each group evaluation
vmstorage.Storage.DebugFlush()
}
// check alert_rule_test case at every eval time
for evalIndex < len(alertEvalTimes) {
if ts.Sub(testStartTime) > alertEvalTimes[evalIndex] ||
alertEvalTimes[evalIndex] >= ts.Add(evalInterval).Sub(testStartTime) {
break
}
gotAlertsMap := map[string]map[string]labelsAndAnnotations{}
for _, g := range groups {
if disableAlertgroupLabel {
g.Name = ""
}
if _, ok := alertExpResultMap[time.Duration(ts.UnixNano())][g.Name]; !ok {
continue
}
if _, ok := gotAlertsMap[g.Name]; !ok {
gotAlertsMap[g.Name] = make(map[string]labelsAndAnnotations)
}
for _, r := range g.Rules {
ar, isAlertRule := r.(*rule.AlertingRule)
if !isAlertRule {
continue
}
if _, ok := alertExpResultMap[time.Duration(ts.UnixNano())][g.Name][ar.Name]; ok {
for _, got := range ar.GetAlerts() {
if got.State != notifier.StateFiring {
continue
}
if disableAlertgroupLabel {
delete(got.Labels, "alertgroup")
}
laa := labelAndAnnotation{
Labels: datasource.ConvertToLabels(got.Labels),
Annotations: datasource.ConvertToLabels(got.Annotations),
}
gotAlertsMap[g.Name][ar.Name] = append(gotAlertsMap[g.Name][ar.Name], laa)
}
}
}
}
for groupname, gres := range alertExpResultMap[alertEvalTimes[evalIndex]] {
for alertname, res := range gres {
var expAlerts labelsAndAnnotations
for _, expAlert := range res {
if expAlert.ExpLabels == nil {
expAlert.ExpLabels = make(map[string]string)
}
// alertGroupNameLabel is added as additional labels when `disableAlertGroupLabel` is false
if !disableAlertgroupLabel {
expAlert.ExpLabels["alertgroup"] = groupname
}
// alertNameLabel is added as additional labels in vmalert.
expAlert.ExpLabels["alertname"] = alertname
expAlerts = append(expAlerts, labelAndAnnotation{
Labels: datasource.ConvertToLabels(expAlert.ExpLabels),
Annotations: datasource.ConvertToLabels(expAlert.ExpAnnotations),
})
}
sort.Sort(expAlerts)
gotAlerts := gotAlertsMap[groupname][alertname]
sort.Sort(gotAlerts)
if !reflect.DeepEqual(expAlerts, gotAlerts) {
var testGroupName string
if tg.TestGroupName != "" {
testGroupName = fmt.Sprintf("testGroupName: %s,\n", tg.TestGroupName)
}
expString := indentLines(expAlerts.String(), " ")
gotString := indentLines(gotAlerts.String(), " ")
checkErrs = append(checkErrs, fmt.Errorf("\n%s groupname: %s, alertname: %s, time: %s, \n exp:%v, \n got:%v ",
testGroupName, groupname, alertname, alertEvalTimes[evalIndex].String(), expString, gotString))
}
}
}
evalIndex++
}
}
checkErrs = append(checkErrs, checkMetricsqlCase(tg.MetricsqlExprTests, q)...)
return checkErrs
}
// unitTestFile holds the contents of a single unit test file
type unitTestFile struct {
RuleFiles []string `yaml:"rule_files"`
EvaluationInterval *promutils.Duration `yaml:"evaluation_interval"`
GroupEvalOrder []string `yaml:"group_eval_order"`
Tests []testGroup `yaml:"tests"`
}
// testGroup is a group of input series and test cases associated with it
type testGroup struct {
Interval *promutils.Duration `yaml:"interval"`
InputSeries []series `yaml:"input_series"`
AlertRuleTests []alertTestCase `yaml:"alert_rule_test"`
MetricsqlExprTests []metricsqlTestCase `yaml:"metricsql_expr_test"`
ExternalLabels map[string]string `yaml:"external_labels"`
TestGroupName string `yaml:"name"`
}
// maxEvalTime returns the max eval time among all alert_rule_test and metricsql_expr_test
func (tg *testGroup) maxEvalTime() time.Duration {
var maxd time.Duration
for _, alert := range tg.AlertRuleTests {
if alert.EvalTime.Duration() > maxd {
maxd = alert.EvalTime.Duration()
}
}
for _, met := range tg.MetricsqlExprTests {
if met.EvalTime.Duration() > maxd {
maxd = met.EvalTime.Duration()
}
}
return maxd
}

View File

@@ -1,47 +0,0 @@
package unittest
import (
"os"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
)
func TestMain(m *testing.M) {
if err := templates.Load([]string{}, true); err != nil {
os.Exit(1)
}
os.Exit(m.Run())
}
func TestUnitRule(t *testing.T) {
testCases := []struct {
name string
disableGroupLabel bool
files []string
failed bool
}{
{
name: "run multi files",
files: []string{"./testdata/test1.yaml", "./testdata/test2.yaml"},
failed: false,
},
{
name: "disable group label",
disableGroupLabel: true,
files: []string{"./testdata/disable-group-label.yaml"},
failed: false,
},
{
name: "failing test",
files: []string{"./testdata/failed-test.yaml"},
failed: true,
},
}
for _, tc := range testCases {
fail := UnitTest(tc.files, tc.disableGroupLabel)
if fail != tc.failed {
t.Fatalf("failed to test %s, expect %t, got %t", tc.name, tc.failed, fail)
}
}
}

View File

@@ -112,34 +112,18 @@ name: <string>
# How often rules in the group are evaluated.
[ interval: <duration> | default = -evaluationInterval flag ]
# Optional
# Group will be evaluated at the exact offset in the range of [0...interval].
# E.g. for Group with `interval: 1h` and `eval_offset: 5m` the evaluation will
# start at 5th minute of the hour. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3409
# `eval_offset` can't be bigger than `interval`.
[ eval_offset: <duration> ]
# Limit the number of alerts an alerting rule and series a recording
# rule can produce. 0 is no limit.
[ limit: <int> | default = 0 ]
# How many rules execute at once within a group. Increasing concurrency may speed
# up group's evaluation duration (exposed via `vmalert_iteration_duration_seconds` metric).
# up round execution speed.
[ concurrency: <integer> | default = 1 ]
# Optional type for expressions inside the rules. Supported values: "graphite" and "prometheus".
# By default, "prometheus" type is used.
# By default "prometheus" type is used.
[ type: <string> ]
# Optional
# The evaluation timestamp will be aligned with group's interval,
# instead of using the actual timestamp that evaluation happens at.
# By default, it's enabled to get more predictable results
# and to visually align with results plotted via Grafana or vmui.
# See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
# Available starting from v1.95
[ eval_alignment: <bool> | default true]
# Optional list of HTTP URL parameters
# applied for all rules requests within a group
# For example:
@@ -439,7 +423,7 @@ If `-clusterMode` is enabled and the `tenant` in a particular group is missing,
is obtained from `-defaultTenant.prometheus` or `-defaultTenant.graphite` depending on the `type` of the group.
The enterprise version of vmalert is available in `vmutils-*-enterprise.tar.gz` files
at [release page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) and in `*-enterprise`
at [release page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) and in `*-enterprise`
tags at [Docker Hub](https://hub.docker.com/r/victoriametrics/vmalert/tags).
### Reading rules from object storage
@@ -535,7 +519,11 @@ Alertmanagers.
To avoid recording rules results and alerts state duplication in VictoriaMetrics server
don't forget to configure [deduplication](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#deduplication).
The recommended value for `-dedup.minScrapeInterval` must be multiple of vmalert's `-evaluationInterval`.
The recommended value for `-dedup.minScrapeInterval` must be multiple of vmalert's `evaluation_interval`.
If you observe inconsistent or "jumping" values in series produced by vmalert, try disabling `-datasource.queryTimeAlignment`
command line flag. Because of alignment, two or more vmalert HA pairs will produce results with the same timestamps.
But due of backfilling (data delivered to the datasource with some delay) values of such results may differ,
which would affect deduplication logic and result into "jumping" datapoints.
Alertmanager will automatically deduplicate alerts with identical labels, so ensure that
all `vmalert`s are having the same config.
@@ -754,11 +742,6 @@ See full description for these flags in `./vmalert -help`.
* `limit` group's param has no effect during replay (might be changed in future);
* `keep_firing_for` alerting rule param has no effect during replay (might be changed in future).
## Unit Testing for Rules
You can use `vmalert-tool` to test your alerting and recording rules like [promtool does](https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/).
See more details [here](https://docs.victoriametrics.com/vmalert-tool.html#Unit-testing-for-rules).
## Monitoring
`vmalert` exports various metrics in Prometheus exposition format at `http://vmalert-host:8880/metrics` page.
@@ -788,7 +771,7 @@ may get empty response from the datasource and produce empty recording rules or
Try the following recommendations to reduce the chance of hitting the data delay issue:
* Always configure group's `-evaluationInterval` to be bigger or at least equal to
* Always configure group's `evaluationInterval` to be bigger or at least equal to
[time series resolution](https://docs.victoriametrics.com/keyConcepts.html#time-series-resolution);
* Ensure that `[duration]` value is at least twice bigger than
[time series resolution](https://docs.victoriametrics.com/keyConcepts.html#time-series-resolution). For example,
@@ -830,8 +813,7 @@ and check the `Last updates` section:
Rows in the section represent ordered rule evaluations and their results. The column `curl` contains an example of
HTTP request sent by vmalert to the `-datasource.url` during evaluation. If specific state shows that there were
no samples returned and curl command returns data - then it is very likely there was no data in datasource on the
moment when rule was evaluated. Sensitive info is stripped from the `curl` examples - see [security](#security) section
for more details.
moment when rule was evaluated.
### Debug mode
@@ -847,8 +829,6 @@ Just set `debug: true` in rule's configuration and vmalert will start printing a
2022-09-15T13:36:56.153Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:36:56+02:00: alert 10705778000901301787 {alertgroup="TestGroup",alertname="Conns",cluster="east-1",instance="localhost:8429",replica="a"} PENDING => FIRING: 1m0s since becoming active at 2022-09-15 15:35:56.126006 +0200 CEST m=+39.384575417
```
Sensitive info is stripped from the `curl` examples - see [security](#security) section for more details.
### Never-firing alerts
vmalert can detect if alert's expression doesn't match any time series in runtime
@@ -893,20 +873,6 @@ The same issue can be caused by collision of configured `labels` on [Group](#gro
To fix it one should avoid collisions by carefully picking label overrides in configuration.
## Security
See general recommendations regarding security [here](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#security).
vmalert [web UI](#web) exposes configuration details such as list of [Groups](#groups), active alerts,
[alerts state](#alerts-state), [notifiers](#notifier-configuration-file). Notifier addresses (sanitized) are attached
as labels to metrics `vmalert_alerts_sent_.*` on `http://<vmalert>/metrics` page. Consider limiting user's access
to the web UI or `/metrics` page if this information is sensitive.
[Alerts state](#alerts-state) page or [debug mode](#debug-mode) could emit additional information about configured
datasource URL, GET params and headers. Sensitive information such as passwords or auth tokens is stripped by default.
To disable stripping of such info pass `-datasource.showURL` cmd-line flag to vmalert.
## Profiling
`vmalert` provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
@@ -947,7 +913,7 @@ The shortlist of configuration flags is the following:
{% raw %}
```console
-clusterMode
If clusterMode is enabled, then vmalert automatically adds the tenant specified in config groups to -datasource.url, -remoteWrite.url and -remoteRead.url. See https://docs.victoriametrics.com/vmalert.html#multitenancy . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
If clusterMode is enabled, then vmalert automatically adds the tenant specified in config groups to -datasource.url, -remoteWrite.url and -remoteRead.url. See https://docs.victoriametrics.com/vmalert.html#multitenancy . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
-configCheckInterval duration
Interval for checking for changes in '-rule' or '-notifier.config' files. By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.
-datasource.appendTypePrefix
@@ -985,11 +951,11 @@ The shortlist of configuration flags is the following:
-datasource.queryStep duration
How far a value can fallback to when evaluating queries. For example, if -datasource.queryStep=15s then param "step" with value "15s" will be added to every query. If set to 0, rule's evaluation interval will be used instead. (default 5m0s)
-datasource.queryTimeAlignment
Deprecated: please use "eval_alignment" in rule group instead. Whether to align "time" parameter with evaluation interval. Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. See more details at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257 (default true)
Whether to align "time" parameter with evaluation interval.Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257 (default true)
-datasource.roundDigits int
Adds "round_digits" GET param to datasource requests. In VM "round_digits" limits the number of digits after the decimal point in response values.
-datasource.showURL
Whether to avoid stripping sensitive information such as auth headers or passwords from URLs in log messages or UI and exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
Whether to show -datasource.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
-datasource.tlsCAFile string
Optional path to TLS CA file to use for verifying connections to -datasource.url. By default, system CA is used
-datasource.tlsCertFile string
@@ -1003,9 +969,9 @@ The shortlist of configuration flags is the following:
-datasource.url string
Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. E.g. http://127.0.0.1:8428 . See also -remoteRead.disablePathAppend and -datasource.showURL
-defaultTenant.graphite string
Default tenant for Graphite alerting groups. See https://docs.victoriametrics.com/vmalert.html#multitenancy .This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Default tenant for Graphite alerting groups. See https://docs.victoriametrics.com/vmalert.html#multitenancy .This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
-defaultTenant.prometheus string
Default tenant for Prometheus alerting groups. See https://docs.victoriametrics.com/vmalert.html#multitenancy . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Default tenant for Prometheus alerting groups. See https://docs.victoriametrics.com/vmalert.html#multitenancy . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
-disableAlertgroupLabel
Whether to disable adding group's Name as label to generated alerts and time series.
-dryRun
@@ -1017,7 +983,7 @@ The shortlist of configuration flags is the following:
-envflag.prefix string
Prefix for environment variables if -envflag.enable is set
-eula
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
By specifying this flag, you confirm that you have an enterprise license and accept the EULA https://victoriametrics.com/assets/VM_EULA.pdf . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
-evaluationInterval duration
How often to evaluate the rules (default 1m0s)
-external.alert.source string
@@ -1027,8 +993,6 @@ The shortlist of configuration flags is the following:
Supports an array of values separated by comma or specified via multiple flags.
-external.url string
External URL is used as alert's source for sent alerts to the notifier. By default, hostname is used as address.
-filestream.disableFadvise
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
-flagsAuthKey string
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
-fs.disableMmap
@@ -1059,12 +1023,6 @@ The shortlist of configuration flags is the following:
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
-internStringMaxLen int
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
-license string
Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
-license.forceOffline
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
-licenseFile string
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
-loggerDisableTimestamps
Whether to disable writing timestamps in logs
-loggerErrorsPerSecondLimit int
@@ -1103,8 +1061,6 @@ The shortlist of configuration flags is the following:
-notifier.bearerTokenFile array
Optional path to bearer token file for -notifier.url
Supports an array of values separated by comma or specified via multiple flags.
-notifier.blackhole -notifier.url
Whether to blackhole alerting notifications. Enable this flag if you want vmalert to evaluate alerting rules without sending any notifications to external receivers (eg. alertmanager). -notifier.url, `-notifier.config` and `-notifier.blackhole` are mutually exclusive.
-notifier.config string
Path to configuration file for notifiers
-notifier.oauth2.clientID array
@@ -1122,8 +1078,6 @@ The shortlist of configuration flags is the following:
-notifier.oauth2.tokenUrl array
Optional OAuth2 tokenURL to use for -notifier.url. If multiple args are set, then they are applied independently for the corresponding -notifier.url
Supports an array of values separated by comma or specified via multiple flags.
-notifier.showURL
Whether to avoid stripping sensitive information such as passwords from URL in log messages or UI for -notifier.url. It is hidden by default, since it can contain sensitive info such as auth key
-notifier.suppressDuplicateTargetErrors
Whether to suppress 'duplicate target' errors during discovery
-notifier.tlsCAFile array
@@ -1144,6 +1098,8 @@ The shortlist of configuration flags is the following:
-notifier.url array
Prometheus Alertmanager URL, e.g. http://127.0.0.1:9093. List all Alertmanager URLs if it runs in the cluster mode to ensure high availability.
Supports an array of values separated by comma or specified via multiple flags.
-notifier.blackhole bool
Whether to blackhole alerting notifications. Enable this flag if you want vmalert to evaluate alerting rules without sending any notifications to external receivers (eg. alertmanager). `-notifier.url`, `-notifier.config` and `-notifier.blackhole` are mutually exclusive.
-pprofAuthKey string
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
-promscrape.consul.waitTime duration
@@ -1307,18 +1263,22 @@ The shortlist of configuration flags is the following:
Whether to validate rules expressions via MetricsQL engine (default true)
-rule.validateTemplates
Whether to validate annotation and label templates (default true)
-s2a_enable_appengine_dialer
If true, opportunistically use AppEngine-specific dialer to call S2A.
-s2a_timeout duration
Timeout enforced on the connection to the S2A service for handshake. (default 3s)
-s3.configFilePath string
Path to file with S3 configs. Configs are loaded from default location if not set.
See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
-s3.configProfile string
Profile name for S3 configs. If no set, the value of the environment variable will be loaded (AWS_PROFILE or AWS_DEFAULT_PROFILE), or if both not set, DefaultSharedConfigProfile is used. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Profile name for S3 configs. If no set, the value of the environment variable will be loaded (AWS_PROFILE or AWS_DEFAULT_PROFILE), or if both not set, DefaultSharedConfigProfile is used. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
-s3.credsFilePath string
Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html . This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
-s3.customEndpoint string
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html
-s3.forcePathStyle
Prefixing endpoint with bucket name when set false, true by default. This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html (default true)
Prefixing endpoint with bucket name when set false, true by default. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/enterprise.html (default true)
-tls
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
-tlsCertFile string
@@ -1508,7 +1468,7 @@ software. Please keep simplicity as the main priority.
## How to build from sources
It is recommended using
[binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
[binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
* `vmalert` is located in `vmutils-*` archives there.
@@ -1534,7 +1494,7 @@ spec:
### Development build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
1. Run `make vmalert` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmalert` binary and puts it into the `bin` folder.
@@ -1550,7 +1510,7 @@ ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://b
### Development ARM build
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.19.
1. Run `make vmalert-linux-arm` or `make vmalert-linux-arm64` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
It builds `vmalert-linux-arm` or `vmalert-linux-arm64` binary respectively and puts it into the `bin` folder.

View File

@@ -1,10 +1,11 @@
package rule
package main
import (
"context"
"fmt"
"hash/fnv"
"sort"
"strconv"
"strings"
"sync"
"time"
@@ -54,8 +55,7 @@ type alertingRuleMetrics struct {
seriesFetched *utils.Gauge
}
// NewAlertingRule creates a new AlertingRule
func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
ar := &AlertingRule{
Type: group.Type,
RuleID: cfg.ID,
@@ -80,15 +80,10 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
metrics: &alertingRuleMetrics{},
}
entrySize := *ruleUpdateEntriesLimit
if cfg.UpdateEntriesLimit != nil {
entrySize = *cfg.UpdateEntriesLimit
}
if entrySize < 1 {
entrySize = 1
}
ar.state = &ruleState{
entries: make([]StateEntry, entrySize),
ar.state = newRuleState(*cfg.UpdateEntriesLimit)
} else {
ar.state = newRuleState(*ruleUpdateEntriesLimit)
}
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
@@ -119,7 +114,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
ar.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_error{%s}`, labels),
func() float64 {
e := ar.state.getLast()
if e.Err == nil {
if e.err == nil {
return 0
}
return 1
@@ -127,28 +122,28 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
ar.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_samples{%s}`, labels),
func() float64 {
e := ar.state.getLast()
return float64(e.Samples)
return float64(e.samples)
})
ar.metrics.seriesFetched = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_series_fetched{%s}`, labels),
func() float64 {
e := ar.state.getLast()
if e.SeriesFetched == nil {
if e.seriesFetched == nil {
// means seriesFetched is unsupported
return -1
}
seriesFetched := float64(*e.SeriesFetched)
if seriesFetched == 0 && e.Samples > 0 {
seriesFetched := float64(*e.seriesFetched)
if seriesFetched == 0 && e.samples > 0 {
// `alert: 0.95` will fetch no series
// but will get one time series in response.
seriesFetched = float64(e.Samples)
seriesFetched = float64(e.samples)
}
return seriesFetched
})
return ar
}
// close unregisters rule metrics
func (ar *AlertingRule) close() {
// Close unregisters rule metrics
func (ar *AlertingRule) Close() {
ar.metrics.active.Unregister()
ar.metrics.pending.Unregister()
ar.metrics.errors.Unregister()
@@ -167,27 +162,6 @@ func (ar *AlertingRule) ID() uint64 {
return ar.RuleID
}
// GetAlerts returns active alerts of rule
func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
ar.alertsMu.RLock()
defer ar.alertsMu.RUnlock()
var alerts []*notifier.Alert
for _, a := range ar.alerts {
alerts = append(alerts, a)
}
return alerts
}
// GetAlert returns alert if id exists
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
ar.alertsMu.RLock()
defer ar.alertsMu.RUnlock()
if ar.alerts == nil {
return nil
}
return ar.alerts[id]
}
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...interface{}) {
if !ar.Debug {
return
@@ -214,26 +188,6 @@ func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string
logger.Infof("%s", prefix+msg)
}
// updateWith copies all significant fields.
// alerts state isn't copied since
// it should be updated in next 2 Execs
func (ar *AlertingRule) updateWith(r Rule) error {
nr, ok := r.(*AlertingRule)
if !ok {
return fmt.Errorf("BUG: attempt to update alerting rule with wrong type %#v", r)
}
ar.Expr = nr.Expr
ar.For = nr.For
ar.KeepFiringFor = nr.KeepFiringFor
ar.Labels = nr.Labels
ar.Annotations = nr.Annotations
ar.EvalInterval = nr.EvalInterval
ar.Debug = nr.Debug
ar.q = nr.q
ar.state = nr.state
return nil
}
type labelSet struct {
// origin labels extracted from received time series
// plus extra labels (group labels, service labels like alertNameLabel).
@@ -294,11 +248,11 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
return ls, nil
}
// execRange executes alerting rule on the given time range similarly to exec.
// ExecRange executes alerting rule on the given time range similarly to Exec.
// It doesn't update internal states of the Rule and meant to be used just
// to get time series for backfilling.
// It returns ALERT and ALERT_FOR_STATE time series as result.
func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
res, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
if err != nil {
return nil, err
@@ -343,19 +297,19 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
// is kept in memory state and consequently repeatedly sent to the AlertManager.
const resolvedRetention = 15 * time.Minute
// exec executes AlertingRule expression via the given Querier.
// Exec executes AlertingRule expression via the given Querier.
// Based on the Querier results AlertingRule maintains notifier.Alerts
func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
start := time.Now()
res, req, err := ar.q.Query(ctx, ar.Expr, ts)
curState := StateEntry{
Time: start,
At: ts,
Duration: time.Since(start),
Samples: len(res.Data),
SeriesFetched: res.SeriesFetched,
Err: err,
Curl: requestToCurl(req),
curState := ruleStateEntry{
time: start,
at: ts,
duration: time.Since(start),
samples: len(res.Data),
seriesFetched: res.SeriesFetched,
err: err,
curl: requestToCurl(req),
}
defer func() {
@@ -369,7 +323,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
}
ar.logDebugf(ts, nil, "query returned %d samples (elapsed: %s)", curState.Samples, curState.Duration)
ar.logDebugf(ts, nil, "query returned %d samples (elapsed: %s)", curState.samples, curState.duration)
for h, a := range ar.alerts {
// cleanup inactive alerts from previous Exec
@@ -388,15 +342,15 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
for _, m := range res.Data {
ls, err := ar.toLabels(m, qFn)
if err != nil {
curState.Err = fmt.Errorf("failed to expand labels: %s", err)
return nil, curState.Err
curState.err = fmt.Errorf("failed to expand labels: %s", err)
return nil, curState.err
}
h := hash(ls.processed)
if _, ok := updated[h]; ok {
// duplicate may be caused by extra labels
// conflicting with the metric labels
curState.Err = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
return nil, curState.Err
curState.err = fmt.Errorf("labels %v: %w", ls.processed, errDuplicate)
return nil, curState.err
}
updated[h] = struct{}{}
if a, ok := ar.alerts[h]; ok {
@@ -419,8 +373,8 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
}
a, err := ar.newAlert(m, ls, start, qFn)
if err != nil {
curState.Err = fmt.Errorf("failed to create alert: %w", err)
return nil, curState.Err
curState.err = fmt.Errorf("failed to create alert: %w", err)
return nil, curState.err
}
a.ID = h
a.State = notifier.StatePending
@@ -469,8 +423,8 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
}
if limit > 0 && numActivePending > limit {
ar.alerts = map[uint64]*notifier.Alert{}
curState.Err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
return nil, curState.Err
curState.err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
return nil, curState.err
}
return ar.toTimeSeries(ts.Unix()), nil
}
@@ -487,6 +441,26 @@ func (ar *AlertingRule) toTimeSeries(timestamp int64) []prompbmarshal.TimeSeries
return tss
}
// UpdateWith copies all significant fields.
// alerts state isn't copied since
// it should be updated in next 2 Execs
func (ar *AlertingRule) UpdateWith(r Rule) error {
nr, ok := r.(*AlertingRule)
if !ok {
return fmt.Errorf("BUG: attempt to update alerting rule with wrong type %#v", r)
}
ar.Expr = nr.Expr
ar.For = nr.For
ar.KeepFiringFor = nr.KeepFiringFor
ar.Labels = nr.Labels
ar.Annotations = nr.Annotations
ar.EvalInterval = nr.EvalInterval
ar.Debug = nr.Debug
ar.q = nr.q
ar.state = nr.state
return nil
}
// TODO: consider hashing algorithm in VM
func hash(labels map[string]string) uint64 {
hash := fnv.New64a()
@@ -529,6 +503,102 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, ls *labelSet, start time.T
return a, err
}
// AlertAPI generates APIAlert object from alert by its id(hash)
func (ar *AlertingRule) AlertAPI(id uint64) *APIAlert {
ar.alertsMu.RLock()
defer ar.alertsMu.RUnlock()
a, ok := ar.alerts[id]
if !ok {
return nil
}
return ar.newAlertAPI(*a)
}
// ToAPI returns Rule representation in form of APIRule
// Isn't thread-safe. Call must be protected by AlertingRule mutex.
func (ar *AlertingRule) ToAPI() APIRule {
lastState := ar.state.getLast()
r := APIRule{
Type: "alerting",
DatasourceType: ar.Type.String(),
Name: ar.Name,
Query: ar.Expr,
Duration: ar.For.Seconds(),
KeepFiringFor: ar.KeepFiringFor.Seconds(),
Labels: ar.Labels,
Annotations: ar.Annotations,
LastEvaluation: lastState.time,
EvaluationTime: lastState.duration.Seconds(),
Health: "ok",
State: "inactive",
Alerts: ar.AlertsToAPI(),
LastSamples: lastState.samples,
LastSeriesFetched: lastState.seriesFetched,
MaxUpdates: ar.state.size(),
Updates: ar.state.getAll(),
Debug: ar.Debug,
// encode as strings to avoid rounding in JSON
ID: fmt.Sprintf("%d", ar.ID()),
GroupID: fmt.Sprintf("%d", ar.GroupID),
}
if lastState.err != nil {
r.LastError = lastState.err.Error()
r.Health = "err"
}
// satisfy APIRule.State logic
if len(r.Alerts) > 0 {
r.State = notifier.StatePending.String()
stateFiring := notifier.StateFiring.String()
for _, a := range r.Alerts {
if a.State == stateFiring {
r.State = stateFiring
break
}
}
}
return r
}
// AlertsToAPI generates list of APIAlert objects from existing alerts
func (ar *AlertingRule) AlertsToAPI() []*APIAlert {
var alerts []*APIAlert
ar.alertsMu.RLock()
for _, a := range ar.alerts {
if a.State == notifier.StateInactive {
continue
}
alerts = append(alerts, ar.newAlertAPI(*a))
}
ar.alertsMu.RUnlock()
return alerts
}
func (ar *AlertingRule) newAlertAPI(a notifier.Alert) *APIAlert {
aa := &APIAlert{
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", a.ID),
GroupID: fmt.Sprintf("%d", a.GroupID),
RuleID: fmt.Sprintf("%d", ar.RuleID),
Name: a.Name,
Expression: ar.Expr,
Labels: a.Labels,
Annotations: a.Annotations,
State: a.State.String(),
ActiveAt: a.ActiveAt,
Restored: a.Restored,
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
}
if alertURLGeneratorFn != nil {
aa.SourceLink = alertURLGeneratorFn(a)
}
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
aa.Stabilizing = true
}
return aa
}
const (
// alertMetricName is the metric name for synthetic alert timeseries.
alertMetricName = "ALERTS"
@@ -576,10 +646,10 @@ func alertForToTimeSeries(a *notifier.Alert, timestamp int64) prompbmarshal.Time
return newTimeSeries([]float64{float64(a.ActiveAt.Unix())}, []int64{timestamp}, labels)
}
// restore restores the value of ActiveAt field for active alerts,
// Restore restores the value of ActiveAt field for active alerts,
// based on previously written time series `alertForStateMetricName`.
// Only rules with For > 0 can be restored.
func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
if ar.For < 1 {
return nil
}

View File

@@ -1,4 +1,4 @@
package rule
package main
import (
"context"
@@ -303,13 +303,13 @@ func TestAlertingRule_Exec(t *testing.T) {
fakeGroup := Group{Name: "TestRule_Exec"}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
fq := &fakeQuerier{}
tc.rule.q = fq
tc.rule.GroupID = fakeGroup.ID()
for i, step := range tc.steps {
fq.Reset()
fq.Add(step...)
if _, err := tc.rule.exec(context.TODO(), time.Now(), 0); err != nil {
fq.reset()
fq.add(step...)
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
t.Fatalf("unexpected err: %s", err)
}
// artificial delay between applying steps
@@ -427,8 +427,7 @@ func TestAlertingRule_ExecRange(t *testing.T) {
newTestAlertingRule("multi-series-for=>pending=>pending=>firing", 3*time.Second),
[]datasource.Metric{
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
{
Values: []float64{1, 1}, Timestamps: []int64{1, 5},
{Values: []float64{1, 1}, Timestamps: []int64{1, 5},
Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
},
},
@@ -437,26 +436,21 @@ func TestAlertingRule_ExecRange(t *testing.T) {
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
//
{
State: notifier.StatePending, ActiveAt: time.Unix(1, 0),
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0),
Labels: map[string]string{
"foo": "bar",
},
},
{
State: notifier.StatePending, ActiveAt: time.Unix(5, 0),
}},
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0),
Labels: map[string]string{
"foo": "bar",
},
},
}},
},
},
{
newTestRuleWithLabels("multi-series-firing", "source", "vm"),
[]datasource.Metric{
{Values: []float64{1, 1}, Timestamps: []int64{1, 100}},
{
Values: []float64{1, 1}, Timestamps: []int64{1, 5},
{Values: []float64{1, 1}, Timestamps: []int64{1, 5},
Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
},
},
@@ -482,11 +476,11 @@ func TestAlertingRule_ExecRange(t *testing.T) {
fakeGroup := Group{Name: "TestRule_ExecRange"}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
fq := &fakeQuerier{}
tc.rule.q = fq
tc.rule.GroupID = fakeGroup.ID()
fq.Add(tc.data...)
gotTS, err := tc.rule.execRange(context.TODO(), time.Now(), time.Now())
fq.add(tc.data...)
gotTS, err := tc.rule.ExecRange(context.TODO(), time.Now(), time.Now())
if err != nil {
t.Fatalf("unexpected err: %s", err)
}
@@ -518,24 +512,24 @@ func TestAlertingRule_ExecRange(t *testing.T) {
func TestGroup_Restore(t *testing.T) {
defaultTS := time.Now()
fqr := &datasource.FakeQuerierWithRegistry{}
fqr := &fakeQuerierWithRegistry{}
fn := func(rules []config.Rule, expAlerts map[uint64]*notifier.Alert) {
t.Helper()
defer fqr.Reset()
defer fqr.reset()
for _, r := range rules {
fqr.Set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
fqr.set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
}
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
fg := newGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
wg := sync.WaitGroup{}
wg.Add(1)
go func() {
nts := func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} }
fg.Start(context.Background(), nts, nil, fqr)
nts := func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} }
fg.start(context.Background(), nts, nil, fqr)
wg.Done()
}()
fg.Close()
fg.close()
wg.Wait()
gotAlerts := make(map[uint64]*notifier.Alert)
@@ -582,23 +576,22 @@ func TestGroup_Restore(t *testing.T) {
ActiveAt: defaultTS,
},
})
fqr.Reset()
fqr.reset()
// one active alert with state restore
ts := time.Now().Truncate(time.Hour)
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
stateMetric("foo", ts))
fn(
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
ActiveAt: ts,
},
ActiveAt: ts},
})
// two rules, two active alerts, one with state restored
ts = time.Now().Truncate(time.Hour)
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
stateMetric("foo", ts))
fn(
[]config.Rule{
@@ -610,15 +603,14 @@ func TestGroup_Restore(t *testing.T) {
ActiveAt: defaultTS,
},
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
ActiveAt: ts,
},
ActiveAt: ts},
})
// two rules, two active alerts, two with state restored
ts = time.Now().Truncate(time.Hour)
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
stateMetric("foo", ts))
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
stateMetric("bar", ts))
fn(
[]config.Rule{
@@ -630,13 +622,12 @@ func TestGroup_Restore(t *testing.T) {
ActiveAt: ts,
},
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
ActiveAt: ts,
},
ActiveAt: ts},
})
// one active alert but wrong state restore
ts = time.Now().Truncate(time.Hour)
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
stateMetric("wrong alert", ts))
fn(
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
@@ -648,7 +639,7 @@ func TestGroup_Restore(t *testing.T) {
// one active alert with labels
ts = time.Now().Truncate(time.Hour)
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
stateMetric("foo", ts, "env", "dev"))
fn(
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
@@ -660,7 +651,7 @@ func TestGroup_Restore(t *testing.T) {
// one active alert with restore labels missmatch
ts = time.Now().Truncate(time.Hour)
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
fqr.set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
stateMetric("foo", ts, "env", "dev", "team", "foo"))
fn(
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
@@ -672,30 +663,30 @@ func TestGroup_Restore(t *testing.T) {
}
func TestAlertingRule_Exec_Negative(t *testing.T) {
fq := &datasource.FakeQuerier{}
fq := &fakeQuerier{}
ar := newTestAlertingRule("test", 0)
ar.Labels = map[string]string{"job": "test"}
ar.q = fq
// successful attempt
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
_, err := ar.exec(context.TODO(), time.Now(), 0)
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
_, err := ar.Exec(context.TODO(), time.Now(), 0)
if err != nil {
t.Fatal(err)
}
// label `job` will collide with rule extra label and will make both time series equal
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
_, err = ar.exec(context.TODO(), time.Now(), 0)
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
_, err = ar.Exec(context.TODO(), time.Now(), 0)
if !errors.Is(err, errDuplicate) {
t.Fatalf("expected to have %s error; got %s", errDuplicate, err)
}
fq.Reset()
fq.reset()
expErr := "connection reset by peer"
fq.SetErr(errors.New(expErr))
_, err = ar.exec(context.TODO(), time.Now(), 0)
fq.setErr(errors.New(expErr))
_, err = ar.Exec(context.TODO(), time.Now(), 0)
if err == nil {
t.Fatalf("expected to get err; got nil")
}
@@ -705,7 +696,7 @@ func TestAlertingRule_Exec_Negative(t *testing.T) {
}
func TestAlertingRuleLimit(t *testing.T) {
fq := &datasource.FakeQuerier{}
fq := &fakeQuerier{}
ar := newTestAlertingRule("test", 0)
ar.Labels = map[string]string{"job": "test"}
ar.q = fq
@@ -737,15 +728,15 @@ func TestAlertingRuleLimit(t *testing.T) {
err error
timestamp = time.Now()
)
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "bar", "job"))
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "bar", "job"))
for _, testCase := range testCases {
_, err = ar.exec(context.TODO(), timestamp, testCase.limit)
_, err = ar.Exec(context.TODO(), timestamp, testCase.limit)
if err != nil && !strings.EqualFold(err.Error(), testCase.err) {
t.Fatal(err)
}
}
fq.Reset()
fq.reset()
}
func TestAlertingRule_Template(t *testing.T) {
@@ -853,8 +844,7 @@ func TestAlertingRule_Template(t *testing.T) {
hash(map[string]string{
alertNameLabel: "OriginLabels",
alertGroupNameLabel: "Testing",
"instance": "foo",
}): {
"instance": "foo"}): {
Labels: map[string]string{
alertNameLabel: "OriginLabels",
alertGroupNameLabel: "Testing",
@@ -870,18 +860,19 @@ func TestAlertingRule_Template(t *testing.T) {
fakeGroup := Group{Name: "TestRule_Exec"}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
fq := &fakeQuerier{}
tc.rule.GroupID = fakeGroup.ID()
tc.rule.q = fq
tc.rule.state = &ruleState{entries: make([]StateEntry, 10)}
fq.Add(tc.metrics...)
if _, err := tc.rule.exec(context.TODO(), time.Now(), 0); err != nil {
tc.rule.state = newRuleState(10)
fq.add(tc.metrics...)
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
t.Fatalf("unexpected err: %s", err)
}
for hash, expAlert := range tc.expAlerts {
gotAlert := tc.rule.alerts[hash]
if gotAlert == nil {
t.Fatalf("alert %d is missing; labels: %v; annotations: %v", hash, expAlert.Labels, expAlert.Annotations)
break
}
if !reflect.DeepEqual(expAlert.Annotations, gotAlert.Annotations) {
t.Fatalf("expected to have annotations %#v; got %#v", expAlert.Annotations, gotAlert.Annotations)
@@ -989,7 +980,7 @@ func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
For: waitFor,
EvalInterval: waitFor,
alerts: make(map[uint64]*notifier.Alert),
state: &ruleState{entries: make([]StateEntry, 10)},
state: newRuleState(10),
}
return &rule
}

View File

@@ -23,7 +23,6 @@ type Group struct {
File string
Name string `yaml:"name"`
Interval *promutils.Duration `yaml:"interval,omitempty"`
EvalOffset *promutils.Duration `yaml:"eval_offset,omitempty"`
Limit int `yaml:"limit,omitempty"`
Rules []Rule `yaml:"rules"`
Concurrency int `yaml:"concurrency"`
@@ -39,8 +38,6 @@ type Group struct {
Headers []Header `yaml:"headers,omitempty"`
// NotifierHeaders contains optional HTTP headers sent to notifiers for generated notifications
NotifierHeaders []Header `yaml:"notifier_headers,omitempty"`
// EvalAlignment will make the timestamp of group query requests be aligned with interval
EvalAlignment *bool `yaml:"eval_alignment,omitempty"`
// Catches all undefined fields and must be empty after parsing.
XXX map[string]interface{} `yaml:",inline"`
}
@@ -66,27 +63,11 @@ func (g *Group) UnmarshalYAML(unmarshal func(interface{}) error) error {
return nil
}
// Validate checks configuration errors for group and internal rules
// Validate check for internal Group or Rule configuration errors
func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool) error {
if g.Name == "" {
return fmt.Errorf("group name must be set")
}
if g.Interval.Duration() < 0 {
return fmt.Errorf("interval shouldn't be lower than 0")
}
if g.EvalOffset.Duration() < 0 {
return fmt.Errorf("eval_offset shouldn't be lower than 0")
}
// if `eval_offset` is set, interval won't use global evaluationInterval flag and must bigger than offset.
if g.EvalOffset.Duration() > g.Interval.Duration() {
return fmt.Errorf("eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
}
if g.Limit < 0 {
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
}
if g.Concurrency < 0 {
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)
}
uniqueRules := map[uint64]struct{}{}
for _, r := range g.Rules {
@@ -95,26 +76,26 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
ruleName = r.Alert
}
if _, ok := uniqueRules[r.ID]; ok {
return fmt.Errorf("%q is a duplicate in group", r.String())
return fmt.Errorf("%q is a duplicate within the group %q", r.String(), g.Name)
}
uniqueRules[r.ID] = struct{}{}
if err := r.Validate(); err != nil {
return fmt.Errorf("invalid rule %q: %w", ruleName, err)
return fmt.Errorf("invalid rule %q.%q: %w", g.Name, ruleName, err)
}
if validateExpressions {
// its needed only for tests.
// because correct types must be inherited after unmarshalling.
exprValidator := g.Type.ValidateExpr
if err := exprValidator(r.Expr); err != nil {
return fmt.Errorf("invalid expression for rule %q: %w", ruleName, err)
return fmt.Errorf("invalid expression for rule %q.%q: %w", g.Name, ruleName, err)
}
}
if validateTplFn != nil {
if err := validateTplFn(r.Annotations); err != nil {
return fmt.Errorf("invalid annotations for rule %q: %w", ruleName, err)
return fmt.Errorf("invalid annotations for rule %q.%q: %w", g.Name, ruleName, err)
}
if err := validateTplFn(r.Labels); err != nil {
return fmt.Errorf("invalid labels for rule %q: %w", ruleName, err)
return fmt.Errorf("invalid labels for rule %q.%q: %w", g.Name, ruleName, err)
}
}
}

View File

@@ -68,10 +68,6 @@ func TestParseBad(t *testing.T) {
path []string
expErr string
}{
{
[]string{"testdata/rules/rules_interval_bad.rules"},
"eval_offset should be smaller than interval",
},
{
[]string{"testdata/rules/rules0-bad.rules"},
"unexpected token",
@@ -145,35 +141,6 @@ func TestGroup_Validate(t *testing.T) {
group: &Group{},
expErr: "group name must be set",
},
{
group: &Group{
Name: "negative interval",
Interval: promutils.NewDuration(-1),
},
expErr: "interval shouldn't be lower than 0",
},
{
group: &Group{
Name: "wrong eval_offset",
Interval: promutils.NewDuration(time.Minute),
EvalOffset: promutils.NewDuration(2 * time.Minute),
},
expErr: "eval_offset should be smaller than interval",
},
{
group: &Group{
Name: "wrong limit",
Limit: -1,
},
expErr: "invalid limit",
},
{
group: &Group{
Name: "wrong concurrency",
Concurrency: -1,
},
expErr: "invalid concurrency",
},
{
group: &Group{
Name: "test",

View File

@@ -1,13 +0,0 @@
groups:
- name: groupTest
## default interval is 1min, eval_offset shouldn't be greater than interval
eval_offset: 2m
rules:
- alert: VMRows
for: 2s
expr: sum(rate(vm_http_request_errors_total[2s])) > 0
labels:
label: bar
host: "{{ $labels.instance }}"
annotations:
summary: "{{ $value }}"

View File

@@ -1,131 +0,0 @@
package datasource
import (
"context"
"net/http"
"sync"
"time"
)
// FakeQuerier is a mock querier that return predefined results and error message
type FakeQuerier struct {
sync.Mutex
metrics []Metric
err error
}
// SetErr sets query error message
func (fq *FakeQuerier) SetErr(err error) {
fq.Lock()
fq.err = err
fq.Unlock()
}
// Reset reset querier's error message and results
func (fq *FakeQuerier) Reset() {
fq.Lock()
fq.err = nil
fq.metrics = fq.metrics[:0]
fq.Unlock()
}
// Add appends metrics to querier result metrics
func (fq *FakeQuerier) Add(metrics ...Metric) {
fq.Lock()
fq.metrics = append(fq.metrics, metrics...)
fq.Unlock()
}
// BuildWithParams return FakeQuerier itself
func (fq *FakeQuerier) BuildWithParams(_ QuerierParams) Querier {
return fq
}
// QueryRange performs query
func (fq *FakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) (Result, error) {
req, _, err := fq.Query(ctx, q, time.Now())
return req, err
}
// Query returns metrics restored in querier
func (fq *FakeQuerier) Query(_ context.Context, _ string, _ time.Time) (Result, *http.Request, error) {
fq.Lock()
defer fq.Unlock()
if fq.err != nil {
return Result{}, nil, fq.err
}
cp := make([]Metric, len(fq.metrics))
copy(cp, fq.metrics)
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
return Result{Data: cp}, req, nil
}
// FakeQuerierWithRegistry can store different results for different query expr
type FakeQuerierWithRegistry struct {
sync.Mutex
registry map[string][]Metric
}
// Set stores query result for given key
func (fqr *FakeQuerierWithRegistry) Set(key string, metrics ...Metric) {
fqr.Lock()
if fqr.registry == nil {
fqr.registry = make(map[string][]Metric)
}
fqr.registry[key] = metrics
fqr.Unlock()
}
// Reset clean querier's results registry
func (fqr *FakeQuerierWithRegistry) Reset() {
fqr.Lock()
fqr.registry = nil
fqr.Unlock()
}
// BuildWithParams returns itself
func (fqr *FakeQuerierWithRegistry) BuildWithParams(_ QuerierParams) Querier {
return fqr
}
// QueryRange performs query
func (fqr *FakeQuerierWithRegistry) QueryRange(ctx context.Context, q string, _, _ time.Time) (Result, error) {
req, _, err := fqr.Query(ctx, q, time.Now())
return req, err
}
// Query returns metrics restored in querier registry
func (fqr *FakeQuerierWithRegistry) Query(_ context.Context, expr string, _ time.Time) (Result, *http.Request, error) {
fqr.Lock()
defer fqr.Unlock()
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
metrics, ok := fqr.registry[expr]
if !ok {
return Result{}, req, nil
}
cp := make([]Metric, len(metrics))
copy(cp, metrics)
return Result{Data: cp}, req, nil
}
// FakeQuerierWithDelay mock querier with given delay duration
type FakeQuerierWithDelay struct {
FakeQuerier
Delay time.Duration
}
// Query returns query result after delay duration
func (fqd *FakeQuerierWithDelay) Query(ctx context.Context, expr string, ts time.Time) (Result, *http.Request, error) {
timer := time.NewTimer(fqd.Delay)
select {
case <-ctx.Done():
case <-timer.C:
}
return fqd.FakeQuerier.Query(ctx, expr, ts)
}
// BuildWithParams returns itself
func (fqd *FakeQuerierWithDelay) BuildWithParams(_ QuerierParams) Querier {
return fqd
}

View File

@@ -10,14 +10,13 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
var (
addr = flag.String("datasource.url", "", "Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. "+
"E.g. http://127.0.0.1:8428 . See also -remoteRead.disablePathAppend and -datasource.showURL")
appendTypePrefix = flag.Bool("datasource.appendTypePrefix", false, "Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to the vmselect URL.")
showDatasourceURL = flag.Bool("datasource.showURL", false, "Whether to avoid stripping sensitive information such as auth headers or passwords from URLs in log messages or UI and exported metrics. "+
showDatasourceURL = flag.Bool("datasource.showURL", false, "Whether to show -datasource.url in the exported metrics. "+
"It is hidden by default, since it can contain sensitive info such as auth key")
headers = flag.String("datasource.headers", "", "Optional HTTP extraHeaders to send with each request to the corresponding -datasource.url. "+
@@ -47,10 +46,8 @@ var (
queryStep = flag.Duration("datasource.queryStep", 5*time.Minute, "How far a value can fallback to when evaluating queries. "+
"For example, if -datasource.queryStep=15s then param \"step\" with value \"15s\" will be added to every query. "+
"If set to 0, rule's evaluation interval will be used instead.")
queryTimeAlignment = flag.Bool("datasource.queryTimeAlignment", true, `Deprecated: please use "eval_alignment" in rule group instead. `+
`Whether to align "time" parameter with evaluation interval. `+
"Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. "+
"See more details at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257")
queryTimeAlignment = flag.Bool("datasource.queryTimeAlignment", true, `Whether to align "time" parameter with evaluation interval.`+
"Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. See more details here https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257")
maxIdleConnections = flag.Int("datasource.maxIdleConnections", 100, `Defines the number of idle (keep-alive connections) to each configured datasource. Consider setting this value equal to the value: groups_total * group.concurrency. Too low a value may result in a high number of sockets in TIME_WAIT state.`)
disableKeepAlive = flag.Bool("datasource.disableKeepAlive", false, `Whether to disable long-lived connections to the datasource. `+
`If true, disables HTTP keep-alives and will only use the connection to the server for a single HTTP request.`)
@@ -65,11 +62,6 @@ func InitSecretFlags() {
}
}
// ShowDatasourceURL whether to show -datasource.url with sensitive information
func ShowDatasourceURL() bool {
return *showDatasourceURL
}
// Param represents an HTTP GET param
type Param struct {
Key, Value string
@@ -82,9 +74,6 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
if *addr == "" {
return nil, fmt.Errorf("datasource.url is empty")
}
if !*queryTimeAlignment {
logger.Warnf("flag `datasource.queryTimeAlignment` is deprecated and will be removed in next releases, please use `eval_alignment` in rule group instead")
}
tr, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
if err != nil {
@@ -111,10 +100,6 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
if err != nil {
return nil, fmt.Errorf("failed to configure auth: %w", err)
}
_, err = authCfg.GetAuthHeader()
if err != nil {
return nil, fmt.Errorf("failed to set request auth header to datasource %q: %s", *addr, err)
}
return &VMStorage{
c: &http.Client{Transport: tr},

View File

@@ -37,14 +37,11 @@ type VMStorage struct {
appendTypePrefix bool
lookBack time.Duration
queryStep time.Duration
dataSourceType datasourceType
// evaluationInterval will help setting request's `step` param.
dataSourceType datasourceType
evaluationInterval time.Duration
// extraParams contains params to be attached to each HTTP request
extraParams url.Values
// extraHeaders are headers to be attached to each HTTP request
extraHeaders []keyValue
extraParams url.Values
extraHeaders []keyValue
// whether to print additional log messages
// for each sent request
@@ -94,15 +91,8 @@ func (s *VMStorage) ApplyParams(params QuerierParams) *VMStorage {
s.extraParams = url.Values{}
}
for k, vl := range params.QueryParams {
// custom query params are prior to default ones
if s.extraParams.Has(k) {
s.extraParams.Del(k)
}
for _, v := range vl {
// don't use .Set() instead of Del/Add since it is allowed
// for GET params to be duplicated
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4908
s.extraParams.Add(k, v)
for _, v := range vl { // custom query params are prior to default ones
s.extraParams.Set(k, v)
}
}
}
@@ -137,17 +127,21 @@ func NewVMStorage(baseURL string, authCfg *promauth.Config, lookBack time.Durati
// Query executes the given query and returns parsed response
func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error) {
req, err := s.newQueryRequest(query, ts)
req, err := s.newRequestPOST()
if err != nil {
return Result{}, nil, err
}
resp, err := s.do(ctx, req)
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
// something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, _ = s.newQueryRequest(query, ts)
resp, err = s.do(ctx, req)
switch s.dataSourceType {
case "", datasourcePrometheus:
s.setPrometheusInstantReqParams(req, query, ts)
case datasourceGraphite:
s.setGraphiteReqParams(req, query, ts)
default:
return Result{}, nil, fmt.Errorf("engine not found: %q", s.dataSourceType)
}
resp, err := s.do(ctx, req)
if err != nil {
return Result{}, req, err
}
@@ -170,23 +164,18 @@ func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end tim
if s.dataSourceType != datasourcePrometheus {
return res, fmt.Errorf("%q is not supported for QueryRange", s.dataSourceType)
}
req, err := s.newRequestPOST()
if err != nil {
return res, err
}
if start.IsZero() {
return res, fmt.Errorf("start param is missing")
}
if end.IsZero() {
return res, fmt.Errorf("end param is missing")
}
req, err := s.newQueryRangeRequest(query, start, end)
if err != nil {
return Result{}, err
}
s.setPrometheusRangeReqParams(req, query, start, end)
resp, err := s.do(ctx, req)
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
// something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
req, _ = s.newQueryRangeRequest(query, start, end)
resp, err = s.do(ctx, req)
}
if err != nil {
return res, err
}
@@ -197,61 +186,34 @@ func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end tim
}
func (s *VMStorage) do(ctx context.Context, req *http.Request) (*http.Response, error) {
ru := req.URL.Redacted()
if *showDatasourceURL {
ru = req.URL.String()
}
if s.debug {
logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, ru)
logger.Infof("DEBUG datasource request: executing %s request with params %q", req.Method, req.URL.RawQuery)
}
resp, err := s.c.Do(req.WithContext(ctx))
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
// something in the middle between client and datasource might be closing
// the connection. So we do a one more attempt in hope request will succeed.
resp, err = s.c.Do(req.WithContext(ctx))
}
if err != nil {
return nil, fmt.Errorf("error getting response from %s: %w", ru, err)
return nil, fmt.Errorf("error getting response from %s: %w", req.URL.Redacted(), err)
}
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return nil, fmt.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, ru, body)
return nil, fmt.Errorf("unexpected response code %d for %s. Response body %s", resp.StatusCode, req.URL.Redacted(), body)
}
return resp, nil
}
func (s *VMStorage) newQueryRangeRequest(query string, start, end time.Time) (*http.Request, error) {
req, err := s.newRequest()
if err != nil {
return nil, fmt.Errorf("cannot create query_range request to datasource %q: %s", s.datasourceURL, err)
}
s.setPrometheusRangeReqParams(req, query, start, end)
return req, nil
}
func (s *VMStorage) newQueryRequest(query string, ts time.Time) (*http.Request, error) {
req, err := s.newRequest()
if err != nil {
return nil, fmt.Errorf("cannot create query request to datasource %q: %s", s.datasourceURL, err)
}
switch s.dataSourceType {
case "", datasourcePrometheus:
s.setPrometheusInstantReqParams(req, query, ts)
case datasourceGraphite:
s.setGraphiteReqParams(req, query, ts)
default:
logger.Panicf("BUG: engine not found: %q", s.dataSourceType)
}
return req, nil
}
func (s *VMStorage) newRequest() (*http.Request, error) {
func (s *VMStorage) newRequestPOST() (*http.Request, error) {
req, err := http.NewRequest(http.MethodPost, s.datasourceURL, nil)
if err != nil {
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", s.datasourceURL, err)
return nil, err
}
req.Header.Set("Content-Type", "application/json")
if s.authCfg != nil {
err = s.authCfg.SetHeaders(req, true)
if err != nil {
return nil, err
}
s.authCfg.SetHeaders(req, true)
}
for _, h := range s.extraHeaders {
req.Header.Set(h.key, h.value)

View File

@@ -164,6 +164,10 @@ func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string,
if s.lookBack > 0 {
timestamp = timestamp.Add(-s.lookBack)
}
if *queryTimeAlignment && s.evaluationInterval > 0 {
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1232
timestamp = timestamp.Truncate(s.evaluationInterval)
}
q.Set("time", timestamp.Format(time.RFC3339))
if !*disableStepParam && s.evaluationInterval > 0 { // set step as evaluationInterval by default
// always convert to seconds to keep compatibility with older

View File

@@ -506,7 +506,8 @@ func TestRequestParams(t *testing.T) {
},
func(t *testing.T, r *http.Request) {
evalInterval := 15 * time.Second
exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {timestamp.Format(time.RFC3339)}}
tt := timestamp.Truncate(evalInterval)
exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {tt.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
},
},
@@ -520,6 +521,7 @@ func TestRequestParams(t *testing.T) {
func(t *testing.T, r *http.Request) {
evalInterval := 15 * time.Second
tt := timestamp.Add(-time.Minute)
tt = tt.Truncate(evalInterval)
exp := url.Values{"query": {query}, "step": {evalInterval.String()}, "time": {tt.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
},
@@ -547,7 +549,8 @@ func TestRequestParams(t *testing.T) {
},
func(t *testing.T, r *http.Request) {
evalInterval := 3 * time.Hour
exp := url.Values{"query": {query}, "step": {fmt.Sprintf("%ds", int(evalInterval.Seconds()))}, "time": {timestamp.Format(time.RFC3339)}}
tt := timestamp.Truncate(evalInterval)
exp := url.Values{"query": {query}, "step": {fmt.Sprintf("%ds", int(evalInterval.Seconds()))}, "time": {tt.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
},
},
@@ -593,17 +596,6 @@ func TestRequestParams(t *testing.T) {
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
},
},
{
"allow duplicates in query params",
false,
storage.Clone().ApplyParams(QuerierParams{
QueryParams: url.Values{"extra_labels": {"env=dev", "foo=bar"}},
}),
func(t *testing.T, r *http.Request) {
exp := url.Values{"query": {query}, "round_digits": {"10"}, "extra_labels": {"env=dev", "foo=bar"}, "time": {timestamp.Format(time.RFC3339)}}
checkEqualString(t, exp.Encode(), r.URL.RawQuery)
},
},
{
"graphite extra params",
false,
@@ -637,9 +629,9 @@ func TestRequestParams(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
req, err := tc.vm.newRequest()
req, err := tc.vm.newRequestPOST()
if err != nil {
t.Fatal(err)
t.Fatalf("unexpected error: %s", err)
}
switch tc.vm.dataSourceType {
case "", datasourcePrometheus:
@@ -735,9 +727,9 @@ func TestHeaders(t *testing.T) {
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
vm := tt.vmFn()
req, err := vm.newQueryRequest("foo", time.Now())
req, err := vm.newRequestPOST()
if err != nil {
t.Fatal(err)
t.Fatalf("unexpected error: %s", err)
}
tt.checkFn(t, req)
})

View File

@@ -1,10 +1,8 @@
package rule
package main
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"hash/fnv"
"net/url"
@@ -13,7 +11,7 @@ import (
"sync"
"time"
"github.com/cheggaaa/pb/v3"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
@@ -23,18 +21,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/metrics"
)
var (
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
"which by default is 4 times evaluationInterval of the parent ")
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
)
// Group is an entity for grouping rules
@@ -45,7 +31,6 @@ type Group struct {
Rules []Rule
Type config.Type
Interval time.Duration
EvalOffset *time.Duration
Limit int
Concurrency int
Checksum string
@@ -66,9 +51,6 @@ type Group struct {
evalCancel context.CancelFunc
metrics *groupMetrics
// evalAlignment will make the timestamp of group query
// requests be aligned with interval
evalAlignment *bool
}
type groupMetrics struct {
@@ -110,8 +92,7 @@ func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[s
return r
}
// NewGroup returns a new group
func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval time.Duration, labels map[string]string) *Group {
func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval time.Duration, labels map[string]string) *Group {
g := &Group{
Type: cfg.Type,
Name: cfg.Name,
@@ -124,7 +105,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
Headers: make(map[string]string),
NotifierHeaders: make(map[string]string),
Labels: cfg.Labels,
evalAlignment: cfg.EvalAlignment,
doneCh: make(chan struct{}),
finishedCh: make(chan struct{}),
@@ -136,9 +116,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
if g.Concurrency < 1 {
g.Concurrency = 1
}
if cfg.EvalOffset != nil {
g.EvalOffset = &cfg.EvalOffset.D
}
for _, h := range cfg.Headers {
g.Headers[h.Key] = h.Value
}
@@ -168,11 +145,11 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
return g
}
func (g *Group) newRule(qb datasource.QuerierBuilder, r config.Rule) Rule {
if r.Alert != "" {
return NewAlertingRule(qb, g, r)
func (g *Group) newRule(qb datasource.QuerierBuilder, rule config.Rule) Rule {
if rule.Alert != "" {
return newAlertingRule(qb, g, rule)
}
return NewRecordingRule(qb, g, r)
return newRecordingRule(qb, g, rule)
}
// ID return unique group ID that consists of
@@ -186,15 +163,11 @@ func (g *Group) ID() uint64 {
hash.Write([]byte("\xff"))
hash.Write([]byte(g.Name))
hash.Write([]byte(g.Type.Get()))
hash.Write([]byte(g.Interval.String()))
if g.EvalOffset != nil {
hash.Write([]byte(g.EvalOffset.String()))
}
return hash.Sum64()
}
// restore restores alerts state for group rules
func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts time.Time, lookback time.Duration) error {
// Restore restores alerts state for group rules
func (g *Group) Restore(ctx context.Context, qb datasource.QuerierBuilder, ts time.Time, lookback time.Duration) error {
for _, rule := range g.Rules {
ar, ok := rule.(*AlertingRule)
if !ok {
@@ -210,7 +183,7 @@ func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts ti
Headers: g.Headers,
Debug: ar.Debug,
})
if err := ar.restore(ctx, q, ts, lookback); err != nil {
if err := ar.Restore(ctx, q, ts, lookback); err != nil {
return fmt.Errorf("error while restoring rule %q: %w", rule, err)
}
}
@@ -220,7 +193,7 @@ func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts ti
// updateWith updates existing group with
// passed group object. This function ignores group
// evaluation interval change. It supposed to be updated
// in group.Start function.
// in group.start function.
// Not thread-safe.
func (g *Group) updateWith(newGroup *Group) error {
rulesRegistry := make(map[uint64]Rule)
@@ -233,11 +206,11 @@ func (g *Group) updateWith(newGroup *Group) error {
if !ok {
// old rule is not present in the new list
// so we mark it for removing
g.Rules[i].close()
g.Rules[i].Close()
g.Rules[i] = nil
continue
}
if err := or.updateWith(nr); err != nil {
if err := or.UpdateWith(nr); err != nil {
return err
}
delete(rulesRegistry, nr.ID())
@@ -270,10 +243,10 @@ func (g *Group) updateWith(newGroup *Group) error {
return nil
}
// InterruptEval interrupts in-flight rules evaluations
// interruptEval interrupts in-flight rules evaluations
// within the group. It is expected that g.evalCancel
// will be repopulated after the call.
func (g *Group) InterruptEval() {
func (g *Group) interruptEval() {
g.mu.RLock()
defer g.mu.RUnlock()
@@ -282,13 +255,12 @@ func (g *Group) InterruptEval() {
}
}
// Close stops the group and it's rules, unregisters group metrics
func (g *Group) Close() {
func (g *Group) close() {
if g.doneCh == nil {
return
}
close(g.doneCh)
g.InterruptEval()
g.interruptEval()
<-g.finishedCh
g.metrics.iterationDuration.Unregister()
@@ -296,25 +268,24 @@ func (g *Group) Close() {
g.metrics.iterationMissed.Unregister()
g.metrics.iterationInterval.Unregister()
for _, rule := range g.Rules {
rule.close()
rule.Close()
}
}
// SkipRandSleepOnGroupStart will skip random sleep delay in group first evaluation
var SkipRandSleepOnGroupStart bool
var skipRandSleepOnGroupStart bool
// Start starts group's evaluation
func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *remotewrite.Client, rr datasource.QuerierBuilder) {
defer func() { close(g.finishedCh) }()
evalTS := time.Now()
// sleep random duration to spread group rules evaluation
// over time in order to reduce load on datasource.
if !SkipRandSleepOnGroupStart {
sleepBeforeStart := delayBeforeStart(evalTS, g.ID(), g.Interval, g.EvalOffset)
g.infof("will start in %v", sleepBeforeStart)
sleepTimer := time.NewTimer(sleepBeforeStart)
// Spread group rules evaluation over time in order to reduce load on VictoriaMetrics.
if !skipRandSleepOnGroupStart {
randSleep := uint64(float64(g.Interval) * (float64(g.ID()) / (1 << 64)))
sleepOffset := uint64(time.Now().UnixNano()) % uint64(g.Interval)
if randSleep < sleepOffset {
randSleep += uint64(g.Interval)
}
randSleep -= sleepOffset
sleepTimer := time.NewTimer(time.Duration(randSleep))
select {
case <-ctx.Done():
sleepTimer.Stop()
@@ -324,17 +295,18 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
return
case <-sleepTimer.C:
}
evalTS = evalTS.Add(sleepBeforeStart)
}
e := &executor{
Rw: rw,
Notifiers: nts,
rw: rw,
notifiers: nts,
notifierHeaders: g.NotifierHeaders,
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
g.infof("started")
evalTS := time.Now()
logger.Infof("group %q started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
eval := func(ctx context.Context, ts time.Time) {
g.metrics.iterationTotal.Inc()
@@ -348,7 +320,6 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
}
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
ts = g.adjustReqTimestamp(ts)
errs := e.execConcurrently(ctx, g.Rules, ts, g.Concurrency, resolveDuration, g.Limit)
for err := range errs {
if err != nil {
@@ -373,7 +344,7 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
// restore the rules state after the first evaluation
// so only active alerts can be restored.
if rr != nil {
err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
err := g.Restore(ctx, rr, evalTS, *remoteReadLookBack)
if err != nil {
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
}
@@ -404,12 +375,19 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
continue
}
// ensure that staleness is tracked for existing rules only
// ensure that staleness is tracked or existing rules only
e.purgeStaleSeries(g.Rules)
e.notifierHeaders = g.NotifierHeaders
g.mu.Unlock()
g.infof("re-started")
e.notifierHeaders = g.NotifierHeaders
if g.Interval != ng.Interval {
g.Interval = ng.Interval
t.Stop()
t = time.NewTicker(g.Interval)
evalTS = time.Now()
}
g.mu.Unlock()
logger.Infof("group %q re-started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency)
case <-t.C:
missed := (time.Since(evalTS) / g.Interval) - 1
if missed < 0 {
@@ -427,134 +405,6 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
}
}
// UpdateWith inserts new group to updateCh
func (g *Group) UpdateWith(new *Group) {
g.updateCh <- new
}
// DeepCopy returns a deep copy of group
func (g *Group) DeepCopy() *Group {
g.mu.RLock()
data, _ := json.Marshal(g)
g.mu.RUnlock()
newG := Group{}
_ = json.Unmarshal(data, &newG)
newG.Rules = g.Rules
return &newG
}
// delayBeforeStart returns a duration on the interval between [ts..ts+interval].
// delayBeforeStart accounts for `offset`, so returned duration should be always
// bigger than the `offset`.
func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *time.Duration) time.Duration {
var randSleep time.Duration
randSleep = time.Duration(float64(interval) * (float64(key) / (1 << 64)))
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
if randSleep < sleepOffset {
randSleep += interval
}
randSleep -= sleepOffset
// check if `ts` after randSleep is before `offset`,
// if it is, add extra eval_offset to randSleep.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3409.
if offset != nil {
tmpEvalTS := ts.Add(randSleep)
if tmpEvalTS.Before(tmpEvalTS.Truncate(interval).Add(*offset)) {
randSleep += *offset
}
}
return randSleep
}
func (g *Group) infof(format string, args ...interface{}) {
msg := fmt.Sprintf(format, args...)
logger.Infof("group %q %s; interval=%v; eval_offset=%v; concurrency=%d",
g.Name, msg, g.Interval, g.EvalOffset, g.Concurrency)
}
// Replay performs group replay
func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoint, replayRuleRetryAttempts int, replayDelay time.Duration, disableProgressBar bool) int {
var total int
step := g.Interval * time.Duration(maxDataPoint)
ri := rangeIterator{start: start, end: end, step: step}
iterations := int(end.Sub(start)/step) + 1
fmt.Printf("\nGroup %q"+
"\ninterval: \t%v"+
"\nrequests to make: \t%d"+
"\nmax range per request: \t%v\n",
g.Name, g.Interval, iterations, step)
if g.Limit > 0 {
fmt.Printf("\nPlease note, `limit: %d` param has no effect during replay.\n",
g.Limit)
}
for _, rule := range g.Rules {
fmt.Printf("> Rule %q (ID: %d)\n", rule, rule.ID())
var bar *pb.ProgressBar
if !disableProgressBar {
bar = pb.StartNew(iterations)
}
ri.reset()
for ri.next() {
n, err := replayRule(rule, ri.s, ri.e, rw, replayRuleRetryAttempts)
if err != nil {
logger.Fatalf("rule %q: %s", rule, err)
}
total += n
if bar != nil {
bar.Increment()
}
}
if bar != nil {
bar.Finish()
}
// sleep to let remote storage to flush data on-disk
// so chained rules could be calculated correctly
time.Sleep(replayDelay)
}
return total
}
// ExecOnce evaluates all the rules under group for once with given timestamp.
func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
e := &executor{
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
if len(g.Rules) < 1 {
return nil
}
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
return e.execConcurrently(ctx, g.Rules, evalTS, g.Concurrency, resolveDuration, g.Limit)
}
type rangeIterator struct {
step time.Duration
start, end time.Time
iter int
s, e time.Time
}
func (ri *rangeIterator) reset() {
ri.iter = 0
ri.s, ri.e = time.Time{}, time.Time{}
}
func (ri *rangeIterator) next() bool {
ri.s = ri.start.Add(ri.step * time.Duration(ri.iter))
if !ri.end.After(ri.s) {
return false
}
ri.e = ri.s.Add(ri.step)
if ri.e.After(ri.end) {
ri.e = ri.end
}
ri.iter++
return true
}
// getResolveDuration returns the duration after which firing alert
// can be considered as resolved.
func getResolveDuration(groupInterval, delta, maxDuration time.Duration) time.Duration {
@@ -568,48 +418,20 @@ func getResolveDuration(groupInterval, delta, maxDuration time.Duration) time.Du
return resolveDuration
}
func (g *Group) adjustReqTimestamp(timestamp time.Time) time.Time {
if g.EvalOffset != nil {
// calculate the min timestamp on the evaluationInterval
intervalStart := timestamp.Truncate(g.Interval)
ts := intervalStart.Add(*g.EvalOffset)
if timestamp.Before(ts) {
// if passed timestamp is before the expected evaluation offset,
// then we should adjust it to the previous evaluation round.
// E.g. request with evaluationInterval=1h and evaluationOffset=30m
// was evaluated at 11:20. Then the timestamp should be adjusted
// to 10:30, to the previous evaluationInterval.
return ts.Add(-g.Interval)
}
// EvalOffset shouldn't interfere with evalAlignment,
// so we return it immediately
return ts
}
if g.evalAlignment == nil || *g.evalAlignment {
// align query time with interval to get similar result with grafana when plotting time series.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1232
return timestamp.Truncate(g.Interval)
}
return timestamp
}
// executor contains group's notify and rw configs
type executor struct {
Notifiers func() []notifier.Notifier
notifiers func() []notifier.Notifier
notifierHeaders map[string]string
Rw remotewrite.RWClient
rw *remotewrite.Client
previouslySentSeriesToRWMu sync.Mutex
// PreviouslySentSeriesToRW stores series sent to RW on previous iteration
// previouslySentSeriesToRW stores series sent to RW on previous iteration
// map[ruleID]map[ruleLabels][]prompb.Label
// where `ruleID` is ID of the Rule within a Group
// and `ruleLabels` is []prompb.Label marshalled to a string
PreviouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
previouslySentSeriesToRW map[uint64]map[string][]prompbmarshal.Label
}
// execConcurrently executes rules concurrently if concurrency>1
func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.Time, concurrency int, resolveDuration time.Duration, limit int) chan error {
res := make(chan error, len(rules))
if concurrency == 1 {
@@ -624,14 +446,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
sem := make(chan struct{}, concurrency)
go func() {
wg := sync.WaitGroup{}
for _, r := range rules {
for _, rule := range rules {
sem <- struct{}{}
wg.Add(1)
go func(r Rule) {
res <- e.exec(ctx, r, ts, resolveDuration, limit)
<-sem
wg.Done()
}(r)
}(rule)
}
wg.Wait()
close(res)
@@ -649,10 +471,10 @@ var (
remoteWriteTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
)
func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDuration time.Duration, limit int) error {
func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDuration time.Duration, limit int) error {
execTotal.Inc()
tss, err := r.exec(ctx, ts, limit)
tss, err := rule.Exec(ctx, ts, limit)
if err != nil {
if errors.Is(err, context.Canceled) {
// the context can be cancelled on graceful shutdown
@@ -660,17 +482,17 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return nil
}
execErrors.Inc()
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
return fmt.Errorf("rule %q: failed to execute: %w", rule, err)
}
if e.Rw != nil {
if e.rw != nil {
pushToRW := func(tss []prompbmarshal.TimeSeries) error {
var lastErr error
for _, ts := range tss {
remoteWriteTotal.Inc()
if err := e.Rw.Push(ts); err != nil {
if err := e.rw.Push(ts); err != nil {
remoteWriteErrors.Inc()
lastErr = fmt.Errorf("rule %q: remote write failure: %w", r, err)
lastErr = fmt.Errorf("rule %q: remote write failure: %w", rule, err)
}
}
return lastErr
@@ -679,13 +501,13 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return err
}
staleSeries := e.getStaleSeries(r, tss, ts)
staleSeries := e.getStaleSeries(rule, tss, ts)
if err := pushToRW(staleSeries); err != nil {
return err
}
}
ar, ok := r.(*AlertingRule)
ar, ok := rule.(*AlertingRule)
if !ok {
return nil
}
@@ -697,11 +519,11 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
wg := sync.WaitGroup{}
errGr := new(utils.ErrGroup)
for _, nt := range e.Notifiers() {
for _, nt := range e.notifiers() {
wg.Add(1)
go func(nt notifier.Notifier) {
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", rule, nt.Addr(), err))
}
wg.Done()
}(nt)
@@ -711,7 +533,7 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
}
// getStaledSeries checks whether there are stale series from previously sent ones.
func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timestamp time.Time) []prompbmarshal.TimeSeries {
func (e *executor) getStaleSeries(rule Rule, tss []prompbmarshal.TimeSeries, timestamp time.Time) []prompbmarshal.TimeSeries {
ruleLabels := make(map[string][]prompbmarshal.Label, len(tss))
for _, ts := range tss {
// convert labels to strings so we can compare with previously sent series
@@ -719,11 +541,11 @@ func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timest
ruleLabels[key] = ts.Labels
}
rID := r.ID()
rID := rule.ID()
var staleS []prompbmarshal.TimeSeries
// check whether there are series which disappeared and need to be marked as stale
e.previouslySentSeriesToRWMu.Lock()
for key, labels := range e.PreviouslySentSeriesToRW[rID] {
for key, labels := range e.previouslySentSeriesToRW[rID] {
if _, ok := ruleLabels[key]; ok {
continue
}
@@ -732,7 +554,7 @@ func (e *executor) getStaleSeries(r Rule, tss []prompbmarshal.TimeSeries, timest
staleS = append(staleS, ss)
}
// set previous series to current
e.PreviouslySentSeriesToRW[rID] = ruleLabels
e.previouslySentSeriesToRW[rID] = ruleLabels
e.previouslySentSeriesToRWMu.Unlock()
return staleS
@@ -750,14 +572,14 @@ func (e *executor) purgeStaleSeries(activeRules []Rule) {
for _, rule := range activeRules {
id := rule.ID()
prev, ok := e.PreviouslySentSeriesToRW[id]
prev, ok := e.previouslySentSeriesToRW[id]
if ok {
// keep previous series for staleness detection
newPreviouslySentSeriesToRW[id] = prev
}
}
e.PreviouslySentSeriesToRW = nil
e.PreviouslySentSeriesToRW = newPreviouslySentSeriesToRW
e.previouslySentSeriesToRW = nil
e.previouslySentSeriesToRW = newPreviouslySentSeriesToRW
e.previouslySentSeriesToRWMu.Unlock()
}

View File

@@ -1,22 +1,16 @@
package rule
package main
import (
"context"
"fmt"
"math"
"os"
"reflect"
"sort"
"testing"
"time"
"gopkg.in/yaml.v2"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
@@ -25,15 +19,7 @@ import (
func init() {
// Disable rand sleep on group start during tests in order to speed up test execution.
// Rand sleep is needed only in prod code.
SkipRandSleepOnGroupStart = true
}
func TestMain(m *testing.M) {
if err := templates.Load([]string{}, true); err != nil {
fmt.Println("failed to load template for test")
os.Exit(1)
}
os.Exit(m.Run())
skipRandSleepOnGroupStart = true
}
func TestUpdateWith(t *testing.T) {
@@ -49,19 +35,18 @@ func TestUpdateWith(t *testing.T) {
},
{
"update alerting rule",
[]config.Rule{
{
Alert: "foo",
Expr: "up > 0",
For: promutils.NewDuration(time.Second),
Labels: map[string]string{
"bar": "baz",
},
Annotations: map[string]string{
"summary": "{{ $value|humanize }}",
"description": "{{$labels}}",
},
[]config.Rule{{
Alert: "foo",
Expr: "up > 0",
For: promutils.NewDuration(time.Second),
Labels: map[string]string{
"bar": "baz",
},
Annotations: map[string]string{
"summary": "{{ $value|humanize }}",
"description": "{{$labels}}",
},
},
{
Alert: "bar",
Expr: "up > 0",
@@ -69,8 +54,7 @@ func TestUpdateWith(t *testing.T) {
Labels: map[string]string{
"bar": "baz",
},
},
},
}},
[]config.Rule{
{
Alert: "foo",
@@ -91,8 +75,7 @@ func TestUpdateWith(t *testing.T) {
Labels: map[string]string{
"bar": "baz",
},
},
},
}},
},
{
"update recording rule",
@@ -151,7 +134,7 @@ func TestUpdateWith(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
g := &Group{Name: "test"}
qb := &datasource.FakeQuerier{}
qb := &fakeQuerier{}
for _, r := range tc.currentRules {
r.ID = config.HashRule(r)
g.Rules = append(g.Rules, g.newRule(qb, r))
@@ -183,7 +166,7 @@ func TestUpdateWith(t *testing.T) {
if got.ID() != want.ID() {
t.Fatalf("expected to have rule %q; got %q", want, got)
}
if err := CompareRules(t, got, want); err != nil {
if err := compareRules(t, got, want); err != nil {
t.Fatalf("comparison error: %s", err)
}
}
@@ -192,31 +175,17 @@ func TestUpdateWith(t *testing.T) {
}
func TestGroupStart(t *testing.T) {
const (
rules = `
- name: groupTest
rules:
- alert: VMRows
for: 1ms
expr: vm_rows > 0
labels:
label: bar
host: "{{ $labels.instance }}"
annotations:
summary: "{{ $value }}"
`
)
var groups []config.Group
err := yaml.Unmarshal([]byte(rules), &groups)
// TODO: make parsing from string instead of file
groups, err := config.Parse([]string{"config/testdata/rules/rules1-good.rules"}, notifier.ValidateTemplates, true)
if err != nil {
t.Fatalf("failed to parse rules: %s", err)
}
fs := &datasource.FakeQuerier{}
fn := &notifier.FakeNotifier{}
fs := &fakeQuerier{}
fn := &fakeNotifier{}
const evalInterval = time.Millisecond
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
g := newGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
g.Concurrency = 2
const inst1, inst2, job = "foo", "bar", "baz"
@@ -231,7 +200,7 @@ func TestGroupStart(t *testing.T) {
alert1.State = notifier.StateFiring
// add external label
alert1.Labels["cluster"] = "east-1"
// add rule labels
// add rule labels - see config/testdata/rules1-good.rules
alert1.Labels["label"] = "bar"
alert1.Labels["host"] = inst1
// add service labels
@@ -246,7 +215,7 @@ func TestGroupStart(t *testing.T) {
alert2.State = notifier.StateFiring
// add external label
alert2.Labels["cluster"] = "east-1"
// add rule labels
// add rule labels - see config/testdata/rules1-good.rules
alert2.Labels["label"] = "bar"
alert2.Labels["host"] = inst2
// add service labels
@@ -255,40 +224,40 @@ func TestGroupStart(t *testing.T) {
alert2.ID = hash(alert2.Labels)
finished := make(chan struct{})
fs.Add(m1)
fs.Add(m2)
fs.add(m1)
fs.add(m2)
go func() {
g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
g.start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
close(finished)
}()
// wait for multiple evals
time.Sleep(20 * evalInterval)
gotAlerts := fn.GetAlerts()
gotAlerts := fn.getAlerts()
expectedAlerts := []notifier.Alert{*alert1, *alert2}
compareAlerts(t, expectedAlerts, gotAlerts)
gotAlertsNum := fn.GetCounter()
gotAlertsNum := fn.getCounter()
if gotAlertsNum < len(expectedAlerts)*2 {
t.Fatalf("expected to receive at least %d alerts; got %d instead",
len(expectedAlerts)*2, gotAlertsNum)
}
// reset previous data
fs.Reset()
fs.reset()
// and set only one datapoint for response
fs.Add(m1)
fs.add(m1)
// wait for multiple evals
time.Sleep(20 * evalInterval)
gotAlerts = fn.GetAlerts()
gotAlerts = fn.getAlerts()
alert2.State = notifier.StateInactive
expectedAlerts = []notifier.Alert{*alert1, *alert2}
compareAlerts(t, expectedAlerts, gotAlerts)
g.Close()
g.close()
<-finished
}
@@ -321,15 +290,15 @@ func TestResolveDuration(t *testing.T) {
func TestGetStaleSeries(t *testing.T) {
ts := time.Now()
e := &executor{
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
f := func(r Rule, labels, expLabels [][]prompbmarshal.Label) {
f := func(rule Rule, labels, expLabels [][]prompbmarshal.Label) {
t.Helper()
var tss []prompbmarshal.TimeSeries
for _, l := range labels {
tss = append(tss, newTimeSeriesPB([]float64{1}, []int64{ts.Unix()}, l))
}
staleS := e.getStaleSeries(r, tss, ts)
staleS := e.getStaleSeries(rule, tss, ts)
if staleS == nil && expLabels == nil {
return
}
@@ -414,7 +383,7 @@ func TestPurgeStaleSeries(t *testing.T) {
f := func(curRules, newRules, expStaleRules []Rule) {
t.Helper()
e := &executor{
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
// seed executor with series for
// current rules
@@ -424,13 +393,13 @@ func TestPurgeStaleSeries(t *testing.T) {
e.purgeStaleSeries(newRules)
if len(e.PreviouslySentSeriesToRW) != len(expStaleRules) {
if len(e.previouslySentSeriesToRW) != len(expStaleRules) {
t.Fatalf("expected to get %d stale series, got %d",
len(expStaleRules), len(e.PreviouslySentSeriesToRW))
len(expStaleRules), len(e.previouslySentSeriesToRW))
}
for _, exp := range expStaleRules {
if _, ok := e.PreviouslySentSeriesToRW[exp.ID()]; !ok {
if _, ok := e.previouslySentSeriesToRW[exp.ID()]; !ok {
t.Fatalf("expected to have rule %d; got nil instead", exp.ID())
}
}
@@ -465,17 +434,17 @@ func TestPurgeStaleSeries(t *testing.T) {
}
func TestFaultyNotifier(t *testing.T) {
fq := &datasource.FakeQuerier{}
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
fq := &fakeQuerier{}
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
r := newTestAlertingRule("instant", 0)
r.q = fq
fn := &notifier.FakeNotifier{}
fn := &fakeNotifier{}
e := &executor{
Notifiers: func() []notifier.Notifier {
notifiers: func() []notifier.Notifier {
return []notifier.Notifier{
&notifier.FaultyNotifier{},
&faultyNotifier{},
fn,
}
},
@@ -491,7 +460,7 @@ func TestFaultyNotifier(t *testing.T) {
tn := time.Now()
deadline := tn.Add(delay / 2)
for {
if fn.GetCounter() > 0 {
if fn.getCounter() > 0 {
return
}
if tn.After(deadline) {
@@ -504,18 +473,18 @@ func TestFaultyNotifier(t *testing.T) {
}
func TestFaultyRW(t *testing.T) {
fq := &datasource.FakeQuerier{}
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
fq := &fakeQuerier{}
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
r := &RecordingRule{
Name: "test",
state: newRuleState(10),
q: fq,
state: &ruleState{entries: make([]StateEntry, 10)},
}
e := &executor{
Rw: &remotewrite.Client{},
PreviouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
rw: &remotewrite.Client{},
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
err := e.exec(context.Background(), r, time.Now(), 0, 10)
@@ -525,38 +494,23 @@ func TestFaultyRW(t *testing.T) {
}
func TestCloseWithEvalInterruption(t *testing.T) {
const (
rules = `
- name: groupTest
rules:
- alert: VMRows
for: 1ms
expr: vm_rows > 0
labels:
label: bar
host: "{{ $labels.instance }}"
annotations:
summary: "{{ $value }}"
`
)
var groups []config.Group
err := yaml.Unmarshal([]byte(rules), &groups)
groups, err := config.Parse([]string{"config/testdata/rules/rules1-good.rules"}, notifier.ValidateTemplates, true)
if err != nil {
t.Fatalf("failed to parse rules: %s", err)
}
const delay = time.Second * 2
fq := &datasource.FakeQuerierWithDelay{Delay: delay}
fq := &fakeQuerierWithDelay{delay: delay}
const evalInterval = time.Millisecond
g := NewGroup(groups[0], fq, evalInterval, nil)
g := newGroup(groups[0], fq, evalInterval, nil)
go g.Start(context.Background(), nil, nil, nil)
go g.start(context.Background(), nil, nil, nil)
time.Sleep(evalInterval * 20)
go func() {
g.Close()
g.close()
}()
deadline := time.Tick(delay / 2)
@@ -566,194 +520,3 @@ func TestCloseWithEvalInterruption(t *testing.T) {
case <-g.finishedCh:
}
}
func TestGroupStartDelay(t *testing.T) {
g := &Group{}
// interval of 5min and key generate a static delay of 30s
g.Interval = time.Minute * 5
key := uint64(math.MaxUint64 / 10)
f := func(atS, expS string) {
t.Helper()
at, err := time.Parse(time.RFC3339Nano, atS)
if err != nil {
t.Fatal(err)
}
expTS, err := time.Parse(time.RFC3339Nano, expS)
if err != nil {
t.Fatal(err)
}
delay := delayBeforeStart(at, key, g.Interval, g.EvalOffset)
gotStart := at.Add(delay)
if expTS != gotStart {
t.Errorf("expected to get %v; got %v instead", expTS, gotStart)
}
}
// test group without offset
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:30.000+00:00")
f("2023-01-01T00:00:00.999+00:00", "2023-01-01T00:00:30.000+00:00")
f("2023-01-01T00:00:29.000+00:00", "2023-01-01T00:00:30.000+00:00")
f("2023-01-01T00:00:31.000+00:00", "2023-01-01T00:05:30.000+00:00")
// test group with offset smaller than above fixed randSleep,
// this way randSleep will always be enough
offset := 20 * time.Second
g.EvalOffset = &offset
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:30.000+00:00")
f("2023-01-01T00:00:29.000+00:00", "2023-01-01T00:00:30.000+00:00")
f("2023-01-01T00:00:31.000+00:00", "2023-01-01T00:05:30.000+00:00")
// test group with offset bigger than above fixed randSleep,
// this way offset will be added to delay
offset = 3 * time.Minute
g.EvalOffset = &offset
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:03:30.000+00:00")
f("2023-01-01T00:00:29.000+00:00", "2023-01-01T00:03:30.000+00:00")
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:08:30.000+00:00")
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:30.000+00:00")
f("2023-01-01T00:07:30.000+00:00", "2023-01-01T00:13:30.000+00:00")
offset = 10 * time.Minute
g.EvalOffset = &offset
// interval of 1h and key generate a static delay of 6m
g.Interval = time.Hour
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:16:00.000+00:00")
f("2023-01-01T00:05:00.000+00:00", "2023-01-01T00:16:00.000+00:00")
f("2023-01-01T00:30:00.000+00:00", "2023-01-01T01:16:00.000+00:00")
}
func TestGetPrometheusReqTimestamp(t *testing.T) {
offset := 30 * time.Minute
disableAlign := false
testCases := []struct {
name string
g *Group
originTS, expTS string
}{
{
"with query align",
&Group{
Interval: time.Hour,
},
"2023-08-28T11:11:00+00:00",
"2023-08-28T11:00:00+00:00",
},
{
"without query align",
&Group{
Interval: time.Hour,
evalAlignment: &disableAlign,
},
"2023-08-28T11:11:00+00:00",
"2023-08-28T11:11:00+00:00",
},
{
"with eval_offset, find previous offset point",
&Group{
EvalOffset: &offset,
Interval: time.Hour,
},
"2023-08-28T11:11:00+00:00",
"2023-08-28T10:30:00+00:00",
},
{
"with eval_offset",
&Group{
EvalOffset: &offset,
Interval: time.Hour,
},
"2023-08-28T11:41:00+00:00",
"2023-08-28T11:30:00+00:00",
},
}
for _, tc := range testCases {
originT, _ := time.Parse(time.RFC3339, tc.originTS)
expT, _ := time.Parse(time.RFC3339, tc.expTS)
gotTS := tc.g.adjustReqTimestamp(originT)
if !gotTS.Equal(expT) {
t.Fatalf("get wrong prometheus request timestamp, expect %s, got %s", expT, gotTS)
}
}
}
func TestRangeIterator(t *testing.T) {
testCases := []struct {
ri rangeIterator
result [][2]time.Time
}{
{
ri: rangeIterator{
start: parseTime(t, "2021-01-01T12:00:00.000Z"),
end: parseTime(t, "2021-01-01T12:30:00.000Z"),
step: 5 * time.Minute,
},
result: [][2]time.Time{
{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:05:00.000Z")},
{parseTime(t, "2021-01-01T12:05:00.000Z"), parseTime(t, "2021-01-01T12:10:00.000Z")},
{parseTime(t, "2021-01-01T12:10:00.000Z"), parseTime(t, "2021-01-01T12:15:00.000Z")},
{parseTime(t, "2021-01-01T12:15:00.000Z"), parseTime(t, "2021-01-01T12:20:00.000Z")},
{parseTime(t, "2021-01-01T12:20:00.000Z"), parseTime(t, "2021-01-01T12:25:00.000Z")},
{parseTime(t, "2021-01-01T12:25:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
},
},
{
ri: rangeIterator{
start: parseTime(t, "2021-01-01T12:00:00.000Z"),
end: parseTime(t, "2021-01-01T12:30:00.000Z"),
step: 45 * time.Minute,
},
result: [][2]time.Time{
{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
{parseTime(t, "2021-01-01T12:30:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
},
},
{
ri: rangeIterator{
start: parseTime(t, "2021-01-01T12:00:12.000Z"),
end: parseTime(t, "2021-01-01T12:00:17.000Z"),
step: time.Second,
},
result: [][2]time.Time{
{parseTime(t, "2021-01-01T12:00:12.000Z"), parseTime(t, "2021-01-01T12:00:13.000Z")},
{parseTime(t, "2021-01-01T12:00:13.000Z"), parseTime(t, "2021-01-01T12:00:14.000Z")},
{parseTime(t, "2021-01-01T12:00:14.000Z"), parseTime(t, "2021-01-01T12:00:15.000Z")},
{parseTime(t, "2021-01-01T12:00:15.000Z"), parseTime(t, "2021-01-01T12:00:16.000Z")},
{parseTime(t, "2021-01-01T12:00:16.000Z"), parseTime(t, "2021-01-01T12:00:17.000Z")},
},
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
var j int
for tc.ri.next() {
if len(tc.result) < j+1 {
t.Fatalf("unexpected result for iterator on step %d: %v - %v",
j, tc.ri.s, tc.ri.e)
}
s, e := tc.ri.s, tc.ri.e
expS, expE := tc.result[j][0], tc.result[j][1]
if s != expS {
t.Fatalf("expected to get start=%v; got %v", expS, s)
}
if e != expE {
t.Fatalf("expected to get end=%v; got %v", expE, e)
}
j++
}
})
}
}
func parseTime(t *testing.T, s string) time.Time {
t.Helper()
tt, err := time.Parse("2006-01-02T15:04:05.000Z", s)
if err != nil {
t.Fatal(err)
}
return tt
}

View File

@@ -1,18 +1,239 @@
package rule
package main
import (
"context"
"fmt"
"net/http"
"reflect"
"sort"
"sync"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
// CompareRules is a test helper func for other tests
func CompareRules(t *testing.T, a, b Rule) error {
type fakeQuerier struct {
sync.Mutex
metrics []datasource.Metric
err error
}
func (fq *fakeQuerier) setErr(err error) {
fq.Lock()
fq.err = err
fq.Unlock()
}
func (fq *fakeQuerier) reset() {
fq.Lock()
fq.err = nil
fq.metrics = fq.metrics[:0]
fq.Unlock()
}
func (fq *fakeQuerier) add(metrics ...datasource.Metric) {
fq.Lock()
fq.metrics = append(fq.metrics, metrics...)
fq.Unlock()
}
func (fq *fakeQuerier) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
return fq
}
func (fq *fakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) (datasource.Result, error) {
req, _, err := fq.Query(ctx, q, time.Now())
return req, err
}
func (fq *fakeQuerier) Query(_ context.Context, _ string, _ time.Time) (datasource.Result, *http.Request, error) {
fq.Lock()
defer fq.Unlock()
if fq.err != nil {
return datasource.Result{}, nil, fq.err
}
cp := make([]datasource.Metric, len(fq.metrics))
copy(cp, fq.metrics)
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
return datasource.Result{Data: cp}, req, nil
}
type fakeQuerierWithRegistry struct {
sync.Mutex
registry map[string][]datasource.Metric
}
func (fqr *fakeQuerierWithRegistry) set(key string, metrics ...datasource.Metric) {
fqr.Lock()
if fqr.registry == nil {
fqr.registry = make(map[string][]datasource.Metric)
}
fqr.registry[key] = metrics
fqr.Unlock()
}
func (fqr *fakeQuerierWithRegistry) reset() {
fqr.Lock()
fqr.registry = nil
fqr.Unlock()
}
func (fqr *fakeQuerierWithRegistry) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
return fqr
}
func (fqr *fakeQuerierWithRegistry) QueryRange(ctx context.Context, q string, _, _ time.Time) (datasource.Result, error) {
req, _, err := fqr.Query(ctx, q, time.Now())
return req, err
}
func (fqr *fakeQuerierWithRegistry) Query(_ context.Context, expr string, _ time.Time) (datasource.Result, *http.Request, error) {
fqr.Lock()
defer fqr.Unlock()
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
metrics, ok := fqr.registry[expr]
if !ok {
return datasource.Result{}, req, nil
}
cp := make([]datasource.Metric, len(metrics))
copy(cp, metrics)
return datasource.Result{Data: cp}, req, nil
}
type fakeQuerierWithDelay struct {
fakeQuerier
delay time.Duration
}
func (fqd *fakeQuerierWithDelay) Query(ctx context.Context, expr string, ts time.Time) (datasource.Result, *http.Request, error) {
timer := time.NewTimer(fqd.delay)
select {
case <-ctx.Done():
case <-timer.C:
}
return fqd.fakeQuerier.Query(ctx, expr, ts)
}
func (fqd *fakeQuerierWithDelay) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
return fqd
}
type fakeNotifier struct {
sync.Mutex
alerts []notifier.Alert
// records number of received alerts in total
counter int
}
func (*fakeNotifier) Close() {}
func (*fakeNotifier) Addr() string { return "" }
func (fn *fakeNotifier) Send(_ context.Context, alerts []notifier.Alert, _ map[string]string) error {
fn.Lock()
defer fn.Unlock()
fn.counter += len(alerts)
fn.alerts = alerts
return nil
}
func (fn *fakeNotifier) getCounter() int {
fn.Lock()
defer fn.Unlock()
return fn.counter
}
func (fn *fakeNotifier) getAlerts() []notifier.Alert {
fn.Lock()
defer fn.Unlock()
return fn.alerts
}
type faultyNotifier struct {
fakeNotifier
}
func (fn *faultyNotifier) Send(ctx context.Context, _ []notifier.Alert, _ map[string]string) error {
d, ok := ctx.Deadline()
if ok {
time.Sleep(time.Until(d))
}
return fmt.Errorf("send failed")
}
func metricWithValueAndLabels(t *testing.T, value float64, labels ...string) datasource.Metric {
return metricWithValuesAndLabels(t, []float64{value}, labels...)
}
func metricWithValuesAndLabels(t *testing.T, values []float64, labels ...string) datasource.Metric {
t.Helper()
m := metricWithLabels(t, labels...)
m.Values = values
for i := range values {
m.Timestamps = append(m.Timestamps, int64(i))
}
return m
}
func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
t.Helper()
if len(labels) == 0 || len(labels)%2 != 0 {
t.Fatalf("expected to get even number of labels")
}
m := datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}
for i := 0; i < len(labels); i += 2 {
m.Labels = append(m.Labels, datasource.Label{
Name: labels[i],
Value: labels[i+1],
})
}
return m
}
func toPromLabels(t *testing.T, labels ...string) []prompbmarshal.Label {
t.Helper()
if len(labels) == 0 || len(labels)%2 != 0 {
t.Fatalf("expected to get even number of labels")
}
var ls []prompbmarshal.Label
for i := 0; i < len(labels); i += 2 {
ls = append(ls, prompbmarshal.Label{
Name: labels[i],
Value: labels[i+1],
})
}
return ls
}
func compareGroups(t *testing.T, a, b *Group) {
t.Helper()
if a.Name != b.Name {
t.Fatalf("expected group name %q; got %q", a.Name, b.Name)
}
if a.File != b.File {
t.Fatalf("expected group %q file name %q; got %q", a.Name, a.File, b.File)
}
if a.Interval != b.Interval {
t.Fatalf("expected group %q interval %v; got %v", a.Name, a.Interval, b.Interval)
}
if len(a.Rules) != len(b.Rules) {
t.Fatalf("expected group %s to have %d rules; got: %d",
a.Name, len(a.Rules), len(b.Rules))
}
for i, r := range a.Rules {
got, want := r, b.Rules[i]
if a.ID() != b.ID() {
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
}
if err := compareRules(t, want, got); err != nil {
t.Fatalf("comparison error: %s", err)
}
}
}
func compareRules(t *testing.T, a, b Rule) error {
t.Helper()
switch v := a.(type) {
case *AlertingRule:
@@ -66,50 +287,6 @@ func compareAlertingRules(t *testing.T, a, b *AlertingRule) error {
return nil
}
func metricWithValueAndLabels(t *testing.T, value float64, labels ...string) datasource.Metric {
return metricWithValuesAndLabels(t, []float64{value}, labels...)
}
func metricWithValuesAndLabels(t *testing.T, values []float64, labels ...string) datasource.Metric {
t.Helper()
m := metricWithLabels(t, labels...)
m.Values = values
for i := range values {
m.Timestamps = append(m.Timestamps, int64(i))
}
return m
}
func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
t.Helper()
if len(labels) == 0 || len(labels)%2 != 0 {
t.Fatalf("expected to get even number of labels")
}
m := datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}
for i := 0; i < len(labels); i += 2 {
m.Labels = append(m.Labels, datasource.Label{
Name: labels[i],
Value: labels[i+1],
})
}
return m
}
func toPromLabels(t *testing.T, labels ...string) []prompbmarshal.Label {
t.Helper()
if len(labels) == 0 || len(labels)%2 != 0 {
t.Fatalf("expected to get even number of labels")
}
var ls []prompbmarshal.Label
for i := 0; i < len(labels); i += 2 {
ls = append(ls, prompbmarshal.Label{
Name: labels[i],
Value: labels[i+1],
})
}
return ls
}
func compareTimeSeries(t *testing.T, a, b []prompbmarshal.TimeSeries) error {
t.Helper()
if len(a) != len(b) {

View File

@@ -18,7 +18,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remoteread"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
@@ -67,6 +66,11 @@ absolute path to all .tpl files in root.
validateTemplates = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
"which by default is 4 times evaluationInterval of the parent group.")
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier")
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier. By default, hostname is used as address.")
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager `+
@@ -78,8 +82,12 @@ absolute path to all .tpl files in root.
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
"Pass multiple -label flags in order to add multiple label sets.")
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries."+
" For example, if lookback=1h then range from now() to now()-1h will be scanned.")
remoteReadIgnoreRestoreErrors = flag.Bool("remoteRead.ignoreRestoreErrors", true, "Whether to ignore errors from remote storage when restoring alerts state on startup. DEPRECATED - this flag has no effect and will be removed in the next releases.")
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
)
@@ -93,7 +101,6 @@ func main() {
remoteread.InitSecretFlags()
remotewrite.InitSecretFlags()
datasource.InitSecretFlags()
notifier.InitSecretFlags()
buildinfo.Init()
logger.Init()
pushmetrics.Init()
@@ -221,7 +228,7 @@ func newManager(ctx context.Context) (*manager, error) {
return nil, fmt.Errorf("failed to init notifier: %w", err)
}
manager := &manager{
groups: make(map[uint64]*rule.Group),
groups: make(map[uint64]*Group),
querierBuilder: q,
notifiers: nts,
labels: labels,

View File

@@ -8,19 +8,11 @@ import (
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
)
func init() {
// Disable rand sleep on group start during tests in order to speed up test execution.
// Rand sleep is needed only in prod code.
rule.SkipRandSleepOnGroupStart = true
}
func TestGetExternalURL(t *testing.T) {
expURL := "https://vicotriametrics.com/path"
u, err := getExternalURL(expURL, "", false)
@@ -95,10 +87,10 @@ groups:
)
f, err := os.CreateTemp("", "")
defer os.Remove(f.Name())
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.Remove(f.Name()) }()
writeToFile(t, f.Name(), rules1)
*configCheckInterval = 200 * time.Millisecond
@@ -106,10 +98,10 @@ groups:
ctx, cancel := context.WithCancel(context.Background())
m := &manager{
querierBuilder: &datasource.FakeQuerier{},
groups: make(map[uint64]*rule.Group),
querierBuilder: &fakeQuerier{},
groups: make(map[uint64]*Group),
labels: map[string]string{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} },
rw: &remotewrite.Client{},
}

View File

@@ -3,13 +3,14 @@ package main
import (
"context"
"fmt"
"net/url"
"sort"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@@ -18,7 +19,7 @@ type manager struct {
querierBuilder datasource.QuerierBuilder
notifiers func() []notifier.Notifier
rw remotewrite.RWClient
rw *remotewrite.Client
// remote read builder.
rr datasource.QuerierBuilder
@@ -26,28 +27,28 @@ type manager struct {
labels map[string]string
groupsMu sync.RWMutex
groups map[uint64]*rule.Group
groups map[uint64]*Group
}
// ruleAPI generates apiRule object from alert by its ID(hash)
func (m *manager) ruleAPI(gID, rID uint64) (apiRule, error) {
// RuleAPI generates APIRule object from alert by its ID(hash)
func (m *manager) RuleAPI(gID, rID uint64) (APIRule, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
g, ok := m.groups[gID]
if !ok {
return apiRule{}, fmt.Errorf("can't find group with id %d", gID)
return APIRule{}, fmt.Errorf("can't find group with id %d", gID)
}
for _, rule := range g.Rules {
if rule.ID() == rID {
return ruleToAPI(rule), nil
return rule.ToAPI(), nil
}
}
return apiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
return APIRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
}
// alertAPI generates apiAlert object from alert by its ID(hash)
func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
// AlertAPI generates APIAlert object from alert by its ID(hash)
func (m *manager) AlertAPI(gID, aID uint64) (*APIAlert, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
@@ -55,12 +56,12 @@ func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
if !ok {
return nil, fmt.Errorf("can't find group with id %d", gID)
}
for _, r := range g.Rules {
ar, ok := r.(*rule.AlertingRule)
for _, rule := range g.Rules {
ar, ok := rule.(*AlertingRule)
if !ok {
continue
}
if apiAlert := alertToAPI(ar, aID); apiAlert != nil {
if apiAlert := ar.AlertAPI(aID); apiAlert != nil {
return apiAlert, nil
}
}
@@ -81,15 +82,15 @@ func (m *manager) close() {
m.wg.Wait()
}
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
func (m *manager) startGroup(ctx context.Context, g *Group, restore bool) error {
m.wg.Add(1)
id := g.ID()
go func() {
defer m.wg.Done()
if restore {
g.Start(ctx, m.notifiers, m.rw, m.rr)
g.start(ctx, m.notifiers, m.rw, m.rr)
} else {
g.Start(ctx, m.notifiers, m.rw, nil)
g.start(ctx, m.notifiers, m.rw, nil)
}
}()
m.groups[id] = g
@@ -98,7 +99,7 @@ func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) e
func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
var rrPresent, arPresent bool
groupsRegistry := make(map[uint64]*rule.Group)
groupsRegistry := make(map[uint64]*Group)
for _, cfg := range groupsCfg {
for _, r := range cfg.Rules {
if rrPresent && arPresent {
@@ -111,7 +112,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
arPresent = true
}
}
ng := rule.NewGroup(cfg, m.querierBuilder, *evaluationInterval, m.labels)
ng := newGroup(cfg, m.querierBuilder, *evaluationInterval, m.labels)
groupsRegistry[ng.ID()] = ng
}
@@ -123,8 +124,8 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
}
type updateItem struct {
old *rule.Group
new *rule.Group
old *Group
new *Group
}
var toUpdate []updateItem
@@ -134,7 +135,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
if !ok {
// old group is not present in new list,
// so must be stopped and deleted
og.Close()
og.close()
delete(m.groups, og.ID())
og = nil
continue
@@ -156,13 +157,81 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
var wg sync.WaitGroup
for _, item := range toUpdate {
wg.Add(1)
go func(old *rule.Group, new *rule.Group) {
old.UpdateWith(new)
go func(old *Group, new *Group) {
old.updateCh <- new
wg.Done()
}(item.old, item.new)
item.old.InterruptEval()
item.old.interruptEval()
}
wg.Wait()
}
return nil
}
func (g *Group) toAPI() APIGroup {
g.mu.RLock()
defer g.mu.RUnlock()
ag := APIGroup{
// encode as string to avoid rounding
ID: fmt.Sprintf("%d", g.ID()),
Name: g.Name,
Type: g.Type.String(),
File: g.File,
Interval: g.Interval.Seconds(),
LastEvaluation: g.LastEvaluation,
Concurrency: g.Concurrency,
Params: urlValuesToStrings(g.Params),
Headers: headersToStrings(g.Headers),
NotifierHeaders: headersToStrings(g.NotifierHeaders),
Labels: g.Labels,
}
ag.Rules = make([]APIRule, 0)
for _, r := range g.Rules {
ag.Rules = append(ag.Rules, r.ToAPI())
}
return ag
}
func urlValuesToStrings(values url.Values) []string {
if len(values) < 1 {
return nil
}
keys := make([]string, 0, len(values))
for k := range values {
keys = append(keys, k)
}
sort.Strings(keys)
var res []string
for _, k := range keys {
params := values[k]
for _, v := range params {
res = append(res, fmt.Sprintf("%s=%s", k, v))
}
}
return res
}
func headersToStrings(headers map[string]string) []string {
if len(headers) < 1 {
return nil
}
keys := make([]string, 0, len(headers))
for k := range headers {
keys = append(keys, k)
}
sort.Strings(keys)
var res []string
for _, k := range keys {
v := headers[k]
res = append(res, fmt.Sprintf("%s: %s", k, v))
}
return res
}

View File

@@ -10,10 +10,8 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
)
@@ -28,7 +26,7 @@ func TestMain(m *testing.M) {
// successful cases of
// starting with empty rules folder
func TestManagerEmptyRulesDir(t *testing.T) {
m := &manager{groups: make(map[uint64]*rule.Group)}
m := &manager{groups: make(map[uint64]*Group)}
cfg := loadCfg(t, []string{"foo/bar"}, true, true)
if err := m.update(context.Background(), cfg, false); err != nil {
t.Fatalf("expected to load successfully with empty rules dir; got err instead: %v", err)
@@ -40,9 +38,9 @@ func TestManagerEmptyRulesDir(t *testing.T) {
// Should be executed with -race flag
func TestManagerUpdateConcurrent(t *testing.T) {
m := &manager{
groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
groups: make(map[uint64]*Group),
querierBuilder: &fakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} },
}
paths := []string{
"config/testdata/dir/rules0-good.rules",
@@ -93,7 +91,7 @@ func TestManagerUpdate(t *testing.T) {
}()
var (
VMRows = &rule.AlertingRule{
VMRows = &AlertingRule{
Name: "VMRows",
Expr: "vm_rows > 0",
For: 10 * time.Second,
@@ -106,7 +104,7 @@ func TestManagerUpdate(t *testing.T) {
"description": "{{$labels}}",
},
}
Conns = &rule.AlertingRule{
Conns = &AlertingRule{
Name: "Conns",
Expr: "sum(vm_tcplistener_conns) by(instance) > 1",
Annotations: map[string]string{
@@ -114,7 +112,7 @@ func TestManagerUpdate(t *testing.T) {
"description": "It is {{ $value }} connections for {{$labels.instance}}",
},
}
ExampleAlertAlwaysFiring = &rule.AlertingRule{
ExampleAlertAlwaysFiring = &AlertingRule{
Name: "ExampleAlertAlwaysFiring",
Expr: "sum by(job) (up == 1)",
}
@@ -124,20 +122,20 @@ func TestManagerUpdate(t *testing.T) {
name string
initPath string
updatePath string
want []*rule.Group
want []*Group
}{
{
name: "update good rules",
initPath: "config/testdata/rules/rules0-good.rules",
updatePath: "config/testdata/dir/rules1-good.rules",
want: []*rule.Group{
want: []*Group{
{
File: "config/testdata/dir/rules1-good.rules",
Name: "duplicatedGroupDiffFiles",
Type: config.NewPrometheusType(),
Interval: defaultEvalInterval,
Rules: []rule.Rule{
&rule.AlertingRule{
Rules: []Rule{
&AlertingRule{
Name: "VMRows",
Expr: "vm_rows > 0",
For: 5 * time.Minute,
@@ -155,68 +153,64 @@ func TestManagerUpdate(t *testing.T) {
name: "update good rules from 1 to 2 groups",
initPath: "config/testdata/dir/rules/rules1-good.rules",
updatePath: "config/testdata/rules/rules0-good.rules",
want: []*rule.Group{
want: []*Group{
{
File: "config/testdata/rules/rules0-good.rules",
Name: "groupGorSingleAlert",
Type: config.NewPrometheusType(),
Rules: []Rule{VMRows},
Interval: defaultEvalInterval,
Rules: []rule.Rule{VMRows},
},
{
File: "config/testdata/rules/rules0-good.rules",
Interval: defaultEvalInterval,
Type: config.NewPrometheusType(),
Name: "TestGroup",
Rules: []rule.Rule{
Name: "TestGroup", Rules: []Rule{
Conns,
ExampleAlertAlwaysFiring,
},
},
}},
},
},
{
name: "update with one bad rule file",
initPath: "config/testdata/rules/rules0-good.rules",
updatePath: "config/testdata/dir/rules2-bad.rules",
want: []*rule.Group{
want: []*Group{
{
File: "config/testdata/rules/rules0-good.rules",
Name: "groupGorSingleAlert",
Type: config.NewPrometheusType(),
Interval: defaultEvalInterval,
Rules: []rule.Rule{VMRows},
Rules: []Rule{VMRows},
},
{
File: "config/testdata/rules/rules0-good.rules",
Interval: defaultEvalInterval,
Name: "TestGroup",
Type: config.NewPrometheusType(),
Rules: []rule.Rule{
Rules: []Rule{
Conns,
ExampleAlertAlwaysFiring,
},
},
}},
},
},
{
name: "update empty dir rules from 0 to 2 groups",
initPath: "config/testdata/empty/*",
updatePath: "config/testdata/rules/rules0-good.rules",
want: []*rule.Group{
want: []*Group{
{
File: "config/testdata/rules/rules0-good.rules",
Name: "groupGorSingleAlert",
Type: config.NewPrometheusType(),
Interval: defaultEvalInterval,
Rules: []rule.Rule{VMRows},
Rules: []Rule{VMRows},
},
{
File: "config/testdata/rules/rules0-good.rules",
Interval: defaultEvalInterval,
Type: config.NewPrometheusType(),
Name: "TestGroup",
Rules: []rule.Rule{
Name: "TestGroup", Rules: []Rule{
Conns,
ExampleAlertAlwaysFiring,
},
@@ -228,9 +222,9 @@ func TestManagerUpdate(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.TODO())
m := &manager{
groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
groups: make(map[uint64]*Group),
querierBuilder: &fakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&fakeNotifier{}} },
}
cfgInit := loadCfg(t, []string{tc.initPath}, true, true)
@@ -259,44 +253,18 @@ func TestManagerUpdate(t *testing.T) {
})
}
}
func compareGroups(t *testing.T, a, b *rule.Group) {
t.Helper()
if a.Name != b.Name {
t.Fatalf("expected group name %q; got %q", a.Name, b.Name)
}
if a.File != b.File {
t.Fatalf("expected group %q file name %q; got %q", a.Name, a.File, b.File)
}
if a.Interval != b.Interval {
t.Fatalf("expected group %q interval %v; got %v", a.Name, a.Interval, b.Interval)
}
if len(a.Rules) != len(b.Rules) {
t.Fatalf("expected group %s to have %d rules; got: %d",
a.Name, len(a.Rules), len(b.Rules))
}
for i, r := range a.Rules {
got, want := r, b.Rules[i]
if a.ID() != b.ID() {
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
}
if err := rule.CompareRules(t, want, got); err != nil {
t.Fatalf("comparison error: %s", err)
}
}
}
func TestManagerUpdateNegative(t *testing.T) {
testCases := []struct {
notifiers []notifier.Notifier
rw remotewrite.RWClient
rw *remotewrite.Client
cfg config.Group
expErr string
}{
{
nil,
nil,
config.Group{
Name: "Recording rule only",
config.Group{Name: "Recording rule only",
Rules: []config.Rule{
{Record: "record", Expr: "max(up)"},
},
@@ -306,8 +274,7 @@ func TestManagerUpdateNegative(t *testing.T) {
{
nil,
nil,
config.Group{
Name: "Alerting rule only",
config.Group{Name: "Alerting rule only",
Rules: []config.Rule{
{Alert: "alert", Expr: "up > 0"},
},
@@ -315,10 +282,9 @@ func TestManagerUpdateNegative(t *testing.T) {
"contains alerting rules",
},
{
[]notifier.Notifier{&notifier.FakeNotifier{}},
[]notifier.Notifier{&fakeNotifier{}},
nil,
config.Group{
Name: "Recording and alerting rules",
config.Group{Name: "Recording and alerting rules",
Rules: []config.Rule{
{Alert: "alert1", Expr: "up > 0"},
{Alert: "alert2", Expr: "up > 0"},
@@ -330,8 +296,7 @@ func TestManagerUpdateNegative(t *testing.T) {
{
nil,
&remotewrite.Client{},
config.Group{
Name: "Recording and alerting rules",
config.Group{Name: "Recording and alerting rules",
Rules: []config.Rule{
{Record: "record1", Expr: "max(up)"},
{Record: "record2", Expr: "max(up)"},
@@ -345,8 +310,8 @@ func TestManagerUpdateNegative(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.cfg.Name, func(t *testing.T) {
m := &manager{
groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{},
groups: make(map[uint64]*Group),
querierBuilder: &fakeQuerier{},
rw: tc.rw,
}
if tc.notifiers != nil {
@@ -375,3 +340,21 @@ func loadCfg(t *testing.T, path []string, validateAnnotations, validateExpressio
}
return cfg
}
func TestUrlValuesToStrings(t *testing.T) {
mapQueryParams := map[string][]string{
"param1": {"param1"},
"param2": {"anotherparam"},
}
expectedRes := []string{"param1=param1", "param2=anotherparam"}
res := urlValuesToStrings(mapQueryParams)
if len(res) != len(expectedRes) {
t.Errorf("Expected length %d, but got %d", len(expectedRes), len(res))
}
for ind, val := range expectedRes {
if val != res[ind] {
t.Errorf("Expected %v; but got %v", val, res[ind])
}
}
}

View File

@@ -6,7 +6,6 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
@@ -18,7 +17,7 @@ import (
// AlertManager represents integration provider with Prometheus alert manager
// https://github.com/prometheus/alertmanager
type AlertManager struct {
addr *url.URL
addr string
argFunc AlertURLGenerator
client *http.Client
timeout time.Duration
@@ -49,12 +48,7 @@ func (am *AlertManager) Close() {
}
// Addr returns address where alerts are sent.
func (am AlertManager) Addr() string {
if *showNotifierURL {
return am.addr.String()
}
return am.addr.Redacted()
}
func (am AlertManager) Addr() string { return am.addr }
// Send an alert or resolve message
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[string]string) error {
@@ -70,7 +64,7 @@ func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[st
b := &bytes.Buffer{}
writeamRequest(b, alerts, am.argFunc, am.relabelConfigs)
req, err := http.NewRequest(http.MethodPost, am.addr.String(), b)
req, err := http.NewRequest(http.MethodPost, am.addr, b)
if err != nil {
return err
}
@@ -88,10 +82,7 @@ func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[st
req = req.WithContext(ctx)
if am.authCfg != nil {
err = am.authCfg.SetHeaders(req, true)
if err != nil {
return err
}
am.authCfg.SetHeaders(req, true)
}
resp, err := am.client.Do(req)
if err != nil {
@@ -100,16 +91,12 @@ func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[st
defer func() { _ = resp.Body.Close() }()
amURL := am.addr.Redacted()
if *showNotifierURL {
amURL = am.addr.String()
}
if resp.StatusCode != http.StatusOK {
body, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed to read response from %q: %w", amURL, err)
return fmt.Errorf("failed to read response from %q: %w", am.addr, err)
}
return fmt.Errorf("invalid SC %d from %q; response body: %s", resp.StatusCode, amURL, string(body))
return fmt.Errorf("invalid SC %d from %q; response body: %s", resp.StatusCode, am.addr, string(body))
}
return nil
}
@@ -149,15 +136,8 @@ func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg proma
return nil, fmt.Errorf("failed to configure auth: %w", err)
}
amURL, err := url.Parse(alertManagerURL)
if err != nil {
return nil, fmt.Errorf("provided incorrect notifier url: %w", err)
}
if !*showNotifierURL {
alertManagerURL = amURL.Redacted()
}
return &AlertManager{
addr: amURL,
addr: alertManagerURL,
argFunc: fn,
authCfg: aCfg,
relabelConfigs: relabelCfg,

View File

@@ -14,6 +14,7 @@ import (
func TestConfigWatcherReload(t *testing.T) {
f, err := os.CreateTemp("", "")
defer os.Remove(f.Name())
if err != nil {
t.Fatal(err)
}
@@ -36,6 +37,7 @@ static_configs:
}
f2, err := os.CreateTemp("", "")
defer os.Remove(f2.Name())
if err != nil {
t.Fatal(err)
}
@@ -63,6 +65,7 @@ func TestConfigWatcherStart(t *testing.T) {
defer consulSDServer.Close()
consulSDFile, err := os.CreateTemp("", "")
defer os.Remove(consulSDFile.Name())
if err != nil {
t.Fatal(err)
}
@@ -109,6 +112,7 @@ func TestConfigWatcherReloadConcurrent(t *testing.T) {
defer consulSDServer2.Close()
consulSDFile, err := os.CreateTemp("", "")
defer os.Remove(consulSDFile.Name())
if err != nil {
t.Fatal(err)
}
@@ -125,6 +129,7 @@ consul_sd_configs:
`, consulSDServer1.URL, consulSDServer2.URL))
staticAndConsulSDFile, err := os.CreateTemp("", "")
defer os.Remove(staticAndConsulSDFile.Name())
if err != nil {
t.Fatal(err)
}

View File

@@ -1,59 +0,0 @@
package notifier
import (
"context"
"fmt"
"sync"
"time"
)
// FakeNotifier is a mock notifier
type FakeNotifier struct {
sync.Mutex
alerts []Alert
// records number of received alerts in total
counter int
}
// Close does nothing
func (*FakeNotifier) Close() {}
// Addr returns ""
func (*FakeNotifier) Addr() string { return "" }
// Send sets alerts and increases counter
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error {
fn.Lock()
defer fn.Unlock()
fn.counter += len(alerts)
fn.alerts = alerts
return nil
}
// GetCounter returns received alerts count
func (fn *FakeNotifier) GetCounter() int {
fn.Lock()
defer fn.Unlock()
return fn.counter
}
// GetAlerts returns stored alerts
func (fn *FakeNotifier) GetAlerts() []Alert {
fn.Lock()
defer fn.Unlock()
return fn.alerts
}
// FaultyNotifier is a mock notifier that Send() will return failed response
type FaultyNotifier struct {
FakeNotifier
}
// Send returns failed response
func (fn *FaultyNotifier) Send(ctx context.Context, _ []Alert, _ map[string]string) error {
d, ok := ctx.Deadline()
if ok {
time.Sleep(time.Until(d))
}
return fmt.Errorf("send failed")
}

View File

@@ -19,8 +19,6 @@ var (
addrs = flagutil.NewArrayString("notifier.url", "Prometheus Alertmanager URL, e.g. http://127.0.0.1:9093. "+
"List all Alertmanager URLs if it runs in the cluster mode to ensure high availability.")
showNotifierURL = flag.Bool("notifier.showURL", false, "Whether to avoid stripping sensitive information such as passwords from URL in log messages or UI for -notifier.url. "+
"It is hidden by default, since it can contain sensitive info such as auth key")
blackHole = flag.Bool("notifier.blackhole", false, "Whether to blackhole alerting notifications. "+
"Enable this flag if you want vmalert to evaluate alerting rules without sending any notifications to external receivers (eg. alertmanager). "+
"`-notifier.url`, `-notifier.config` and `-notifier.blackhole` are mutually exclusive.")
@@ -131,13 +129,6 @@ func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (fu
return cw.notifiers, nil
}
// InitSecretFlags must be called after flag.Parse and before any logging
func InitSecretFlags() {
if !*showNotifierURL {
flagutil.RegisterSecretFlag("notifier.url")
}
}
func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
var notifiers []Notifier
for i, addr := range *addrs {

View File

@@ -1,4 +1,4 @@
package rule
package main
import (
"context"
@@ -49,8 +49,7 @@ func (rr *RecordingRule) ID() uint64 {
return rr.RuleID
}
// NewRecordingRule creates a new RecordingRule
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
rr := &RecordingRule{
Type: group.Type,
RuleID: cfg.ID,
@@ -67,22 +66,17 @@ func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
}),
}
entrySize := *ruleUpdateEntriesLimit
if cfg.UpdateEntriesLimit != nil {
entrySize = *cfg.UpdateEntriesLimit
}
if entrySize < 1 {
entrySize = 1
}
rr.state = &ruleState{
entries: make([]StateEntry, entrySize),
rr.state = newRuleState(*cfg.UpdateEntriesLimit)
} else {
rr.state = newRuleState(*ruleUpdateEntriesLimit)
}
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
func() float64 {
e := rr.state.getLast()
if e.Err == nil {
if e.err == nil {
return 0
}
return 1
@@ -90,21 +84,21 @@ func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
rr.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_last_evaluation_samples{%s}`, labels),
func() float64 {
e := rr.state.getLast()
return float64(e.Samples)
return float64(e.samples)
})
return rr
}
// close unregisters rule metrics
func (rr *RecordingRule) close() {
// Close unregisters rule metrics
func (rr *RecordingRule) Close() {
rr.metrics.errors.Unregister()
rr.metrics.samples.Unregister()
}
// execRange executes recording rule on the given time range similarly to Exec.
// ExecRange executes recording rule on the given time range similarly to Exec.
// It doesn't update internal states of the Rule and meant to be used just
// to get time series for backfilling.
func (rr *RecordingRule) execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
func (rr *RecordingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
res, err := rr.q.QueryRange(ctx, rr.Expr, start, end)
if err != nil {
return nil, err
@@ -123,17 +117,17 @@ func (rr *RecordingRule) execRange(ctx context.Context, start, end time.Time) ([
return tss, nil
}
// exec executes RecordingRule expression via the given Querier.
func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
// Exec executes RecordingRule expression via the given Querier.
func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
start := time.Now()
res, req, err := rr.q.Query(ctx, rr.Expr, ts)
curState := StateEntry{
Time: start,
At: ts,
Duration: time.Since(start),
Samples: len(res.Data),
SeriesFetched: res.SeriesFetched,
Curl: requestToCurl(req),
curState := ruleStateEntry{
time: start,
at: ts,
duration: time.Since(start),
samples: len(res.Data),
seriesFetched: res.SeriesFetched,
curl: requestToCurl(req),
}
defer func() {
@@ -141,15 +135,15 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
}()
if err != nil {
curState.Err = fmt.Errorf("failed to execute query %q: %w", rr.Expr, err)
return nil, curState.Err
curState.err = fmt.Errorf("failed to execute query %q: %w", rr.Expr, err)
return nil, curState.err
}
qMetrics := res.Data
numSeries := len(qMetrics)
if limit > 0 && numSeries > limit {
curState.Err = fmt.Errorf("exec exceeded limit of %d with %d series", limit, numSeries)
return nil, curState.Err
curState.err = fmt.Errorf("exec exceeded limit of %d with %d series", limit, numSeries)
return nil, curState.err
}
duplicates := make(map[string]struct{}, len(qMetrics))
@@ -158,8 +152,8 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
ts := rr.toTimeSeries(r)
key := stringifyLabels(ts)
if _, ok := duplicates[key]; ok {
curState.Err = fmt.Errorf("original metric %v; resulting labels %q: %w", r, key, errDuplicate)
return nil, curState.Err
curState.err = fmt.Errorf("original metric %v; resulting labels %q: %w", r, key, errDuplicate)
return nil, curState.err
}
duplicates[key] = struct{}{}
tss = append(tss, ts)
@@ -199,8 +193,8 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompbmarshal.TimeSer
return newTimeSeries(m.Values, m.Timestamps, labels)
}
// updateWith copies all significant fields.
func (rr *RecordingRule) updateWith(r Rule) error {
// UpdateWith copies all significant fields.
func (rr *RecordingRule) UpdateWith(r Rule) error {
nr, ok := r.(*RecordingRule)
if !ok {
return fmt.Errorf("BUG: attempt to update recroding rule with wrong type %#v", r)
@@ -210,3 +204,32 @@ func (rr *RecordingRule) updateWith(r Rule) error {
rr.q = nr.q
return nil
}
// ToAPI returns Rule's representation in form
// of APIRule
func (rr *RecordingRule) ToAPI() APIRule {
lastState := rr.state.getLast()
r := APIRule{
Type: "recording",
DatasourceType: rr.Type.String(),
Name: rr.Name,
Query: rr.Expr,
Labels: rr.Labels,
LastEvaluation: lastState.time,
EvaluationTime: lastState.duration.Seconds(),
Health: "ok",
LastSamples: lastState.samples,
LastSeriesFetched: lastState.seriesFetched,
MaxUpdates: rr.state.size(),
Updates: rr.state.getAll(),
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", rr.ID()),
GroupID: fmt.Sprintf("%d", rr.GroupID),
}
if lastState.err != nil {
r.LastError = lastState.err.Error()
r.Health = "err"
}
return r
}

View File

@@ -1,4 +1,4 @@
package rule
package main
import (
"context"
@@ -56,12 +56,10 @@ func TestRecordingRule_Exec(t *testing.T) {
Name: "job:foo",
Labels: map[string]string{
"source": "test",
},
},
}},
[]datasource.Metric{
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar"),
},
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar")},
[]prompbmarshal.TimeSeries{
newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": "job:foo",
@@ -78,11 +76,11 @@ func TestRecordingRule_Exec(t *testing.T) {
}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
fq.Add(tc.metrics...)
fq := &fakeQuerier{}
fq.add(tc.metrics...)
tc.rule.q = fq
tc.rule.state = &ruleState{entries: make([]StateEntry, 10)}
tss, err := tc.rule.exec(context.TODO(), time.Now(), 0)
tc.rule.state = newRuleState(10)
tss, err := tc.rule.Exec(context.TODO(), time.Now(), 0)
if err != nil {
t.Fatalf("unexpected Exec err: %s", err)
}
@@ -143,8 +141,7 @@ func TestRecordingRule_ExecRange(t *testing.T) {
}},
[]datasource.Metric{
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar"),
},
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar")},
[]prompbmarshal.TimeSeries{
newTimeSeries([]float64{2}, []int64{timestamp.UnixNano()}, map[string]string{
"__name__": "job:foo",
@@ -161,10 +158,10 @@ func TestRecordingRule_ExecRange(t *testing.T) {
}
for _, tc := range testCases {
t.Run(tc.rule.Name, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
fq.Add(tc.metrics...)
fq := &fakeQuerier{}
fq.add(tc.metrics...)
tc.rule.q = fq
tss, err := tc.rule.execRange(context.TODO(), time.Now(), time.Now())
tss, err := tc.rule.ExecRange(context.TODO(), time.Now(), time.Now())
if err != nil {
t.Fatalf("unexpected Exec err: %s", err)
}
@@ -201,15 +198,15 @@ func TestRecordingRuleLimit(t *testing.T) {
metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"),
metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"),
}
rule := &RecordingRule{Name: "job:foo", state: &ruleState{entries: make([]StateEntry, 10)}, Labels: map[string]string{
rule := &RecordingRule{Name: "job:foo", state: newRuleState(10), Labels: map[string]string{
"source": "test_limit",
}}
var err error
for _, testCase := range testCases {
fq := &datasource.FakeQuerier{}
fq.Add(testMetrics...)
fq := &fakeQuerier{}
fq.add(testMetrics...)
rule.q = fq
_, err = rule.exec(context.TODO(), timestamp, testCase.limit)
_, err = rule.Exec(context.TODO(), timestamp, testCase.limit)
if err != nil && !strings.EqualFold(err.Error(), testCase.err) {
t.Fatal(err)
}
@@ -218,17 +215,18 @@ func TestRecordingRuleLimit(t *testing.T) {
func TestRecordingRule_ExecNegative(t *testing.T) {
rr := &RecordingRule{
Name: "job:foo",
Name: "job:foo",
state: newRuleState(10),
Labels: map[string]string{
"job": "test",
},
state: &ruleState{entries: make([]StateEntry, 10)},
}
fq := &datasource.FakeQuerier{}
fq := &fakeQuerier{}
expErr := "connection reset by peer"
fq.SetErr(errors.New(expErr))
fq.setErr(errors.New(expErr))
rr.q = fq
_, err := rr.exec(context.TODO(), time.Now(), 0)
_, err := rr.Exec(context.TODO(), time.Now(), 0)
if err == nil {
t.Fatalf("expected to get err; got nil")
}
@@ -236,14 +234,14 @@ func TestRecordingRule_ExecNegative(t *testing.T) {
t.Fatalf("expected to get err %q; got %q insterad", expErr, err)
}
fq.Reset()
fq.reset()
// add metrics which differs only by `job` label
// which will be overridden by rule
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"))
fq.Add(metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "bar"))
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"))
fq.add(metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "bar"))
_, err = rr.exec(context.TODO(), time.Now(), 0)
_, err = rr.Exec(context.TODO(), time.Now(), 0)
if err == nil {
t.Fatalf("expected to get err; got nil")
}

View File

@@ -1,325 +0,0 @@
package remotewrite
import (
"bytes"
"context"
"flag"
"fmt"
"io"
"net/http"
"path"
"strings"
"sync"
"time"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/metrics"
)
const (
defaultConcurrency = 4
defaultMaxBatchSize = 1e3
defaultMaxQueueSize = 1e5
defaultFlushInterval = 5 * time.Second
defaultWriteTimeout = 30 * time.Second
)
var (
disablePathAppend = flag.Bool("remoteWrite.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.")
sendTimeout = flag.Duration("remoteWrite.sendTimeout", 30*time.Second, "Timeout for sending data to the configured -remoteWrite.url.")
retryMinInterval = flag.Duration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxInterval")
retryMaxTime = flag.Duration("remoteWrite.retryMaxTime", time.Second*30, "The max time spent on retry attempts for the failed remote-write request. Change this value if it is expected for remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval")
)
// Client is an asynchronous HTTP client for writing
// timeseries via remote write protocol.
type Client struct {
addr string
c *http.Client
authCfg *promauth.Config
input chan prompbmarshal.TimeSeries
flushInterval time.Duration
maxBatchSize int
maxQueueSize int
wg sync.WaitGroup
doneCh chan struct{}
}
// Config is config for remote write client.
type Config struct {
// Addr of remote storage
Addr string
AuthCfg *promauth.Config
// Concurrency defines number of readers that
// concurrently read from the queue and flush data
Concurrency int
// MaxBatchSize defines max number of timeseries
// to be flushed at once
MaxBatchSize int
// MaxQueueSize defines max length of input queue
// populated by Push method.
// Push will be rejected once queue is full.
MaxQueueSize int
// FlushInterval defines time interval for flushing batches
FlushInterval time.Duration
// Transport will be used by the underlying http.Client
Transport *http.Transport
}
// NewClient returns asynchronous client for
// writing timeseries via remotewrite protocol.
func NewClient(ctx context.Context, cfg Config) (*Client, error) {
if cfg.Addr == "" {
return nil, fmt.Errorf("config.Addr can't be empty")
}
if cfg.MaxBatchSize == 0 {
cfg.MaxBatchSize = defaultMaxBatchSize
}
if cfg.MaxQueueSize == 0 {
cfg.MaxQueueSize = defaultMaxQueueSize
}
if cfg.FlushInterval == 0 {
cfg.FlushInterval = defaultFlushInterval
}
if cfg.Transport == nil {
cfg.Transport = http.DefaultTransport.(*http.Transport).Clone()
}
cc := defaultConcurrency
if cfg.Concurrency > 0 {
cc = cfg.Concurrency
}
c := &Client{
c: &http.Client{
Timeout: *sendTimeout,
Transport: cfg.Transport,
},
addr: strings.TrimSuffix(cfg.Addr, "/"),
authCfg: cfg.AuthCfg,
flushInterval: cfg.FlushInterval,
maxBatchSize: cfg.MaxBatchSize,
maxQueueSize: cfg.MaxQueueSize,
doneCh: make(chan struct{}),
input: make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
}
for i := 0; i < cc; i++ {
c.run(ctx)
}
return c, nil
}
// Push adds timeseries into queue for writing into remote storage.
// Push returns and error if client is stopped or if queue is full.
func (c *Client) Push(s prompbmarshal.TimeSeries) error {
select {
case <-c.doneCh:
return fmt.Errorf("client is closed")
case c.input <- s:
return nil
default:
return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
"Queue size is controlled by -remoteWrite.maxQueueSize flag",
c.maxQueueSize)
}
}
// Close stops the client and waits for all goroutines
// to exit.
func (c *Client) Close() error {
if c.doneCh == nil {
return fmt.Errorf("client is already closed")
}
close(c.input)
close(c.doneCh)
c.wg.Wait()
return nil
}
func (c *Client) run(ctx context.Context) {
ticker := time.NewTicker(c.flushInterval)
wr := &prompbmarshal.WriteRequest{}
shutdown := func() {
for ts := range c.input {
wr.Timeseries = append(wr.Timeseries, ts)
}
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
logger.Infof("shutting down remote write client and flushing remained %d series", len(wr.Timeseries))
c.flush(lastCtx, wr)
cancel()
}
c.wg.Add(1)
go func() {
defer c.wg.Done()
defer ticker.Stop()
for {
select {
case <-c.doneCh:
shutdown()
return
case <-ctx.Done():
shutdown()
return
case <-ticker.C:
c.flush(ctx, wr)
case ts, ok := <-c.input:
if !ok {
continue
}
wr.Timeseries = append(wr.Timeseries, ts)
if len(wr.Timeseries) >= c.maxBatchSize {
c.flush(ctx, wr)
}
}
}
}()
}
var (
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
return float64(*concurrency)
})
)
// flush is a blocking function that marshals WriteRequest and sends
// it to remote-write endpoint. Flush performs limited amount of retries
// if request fails.
func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
if len(wr.Timeseries) < 1 {
return
}
defer prompbmarshal.ResetWriteRequest(wr)
defer bufferFlushDuration.UpdateDuration(time.Now())
data, err := wr.Marshal()
if err != nil {
logger.Errorf("failed to marshal WriteRequest: %s", err)
return
}
b := snappy.Encode(nil, data)
retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
if retryInterval > maxRetryInterval {
retryInterval = maxRetryInterval
}
timeStart := time.Now()
defer func() {
sendDuration.Add(time.Since(timeStart).Seconds())
}()
L:
for attempts := 0; ; attempts++ {
err := c.send(ctx, b)
if err == nil {
sentRows.Add(len(wr.Timeseries))
sentBytes.Add(len(b))
return
}
_, isNotRetriable := err.(*nonRetriableError)
logger.Warnf("attempt %d to send request failed: %s (retriable: %v)", attempts+1, err, !isNotRetriable)
if isNotRetriable {
// exit fast if error isn't retriable
break
}
// check if request has been cancelled before backoff
select {
case <-ctx.Done():
logger.Errorf("interrupting retry attempt %d: context cancelled", attempts+1)
break L
default:
}
timeLeftForRetries := maxRetryInterval - time.Since(timeStart)
if timeLeftForRetries < 0 {
// the max retry time has passed, so we give up
break
}
if retryInterval > timeLeftForRetries {
retryInterval = timeLeftForRetries
}
// sleeping to prevent remote db hammering
time.Sleep(retryInterval)
retryInterval *= 2
}
droppedRows.Add(len(wr.Timeseries))
droppedBytes.Add(len(b))
logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
len(wr.Timeseries))
}
func (c *Client) send(ctx context.Context, data []byte) error {
r := bytes.NewReader(data)
req, err := http.NewRequest(http.MethodPost, c.addr, r)
if err != nil {
return fmt.Errorf("failed to create new HTTP request: %w", err)
}
// RFC standard compliant headers
req.Header.Set("Content-Encoding", "snappy")
req.Header.Set("Content-Type", "application/x-protobuf")
// Prometheus compliant headers
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
if c.authCfg != nil {
err = c.authCfg.SetHeaders(req, true)
if err != nil {
return &nonRetriableError{err: err}
}
}
if !*disablePathAppend {
req.URL.Path = path.Join(req.URL.Path, "/api/v1/write")
}
resp, err := c.c.Do(req.WithContext(ctx))
if err != nil {
return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
req.URL.Redacted(), err, len(data), r.Size())
}
defer func() { _ = resp.Body.Close() }()
body, _ := io.ReadAll(resp.Body)
// according to https://prometheus.io/docs/concepts/remote_write_spec/
// Prometheus remote Write compatible receivers MUST
switch resp.StatusCode / 100 {
case 2:
// respond with a HTTP 2xx status code when the write is successful.
return nil
case 4:
if resp.StatusCode != http.StatusTooManyRequests {
// MUST NOT retry write requests on HTTP 4xx responses other than 429
return &nonRetriableError{fmt.Errorf("unexpected response code %d for %s. Response body %q",
resp.StatusCode, req.URL.Redacted(), body)}
}
fallthrough
default:
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
resp.StatusCode, req.URL.Redacted(), body)
}
}
type nonRetriableError struct {
err error
}
func (e *nonRetriableError) Error() string {
return e.err.Error()
}

View File

@@ -1,97 +0,0 @@
package remotewrite
import (
"bytes"
"fmt"
"io"
"net/http"
"path"
"strings"
"sync"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
// DebugClient won't push series periodically, but will write data to remote endpoint
// immediately when Push() is called
type DebugClient struct {
addr string
c *http.Client
wg sync.WaitGroup
}
// NewDebugClient initiates and returns a new DebugClient
func NewDebugClient() (*DebugClient, error) {
if *addr == "" {
return nil, nil
}
t, err := utils.Transport(*addr, *tlsCertFile, *tlsKeyFile, *tlsCAFile, *tlsServerName, *tlsInsecureSkipVerify)
if err != nil {
return nil, fmt.Errorf("failed to create transport: %w", err)
}
c := &DebugClient{
c: &http.Client{
Timeout: *sendTimeout,
Transport: t,
},
addr: strings.TrimSuffix(*addr, "/"),
}
return c, nil
}
// Push sends the given timeseries to the remote storage.
func (c *DebugClient) Push(s prompbmarshal.TimeSeries) error {
c.wg.Add(1)
defer c.wg.Done()
wr := &prompbmarshal.WriteRequest{Timeseries: []prompbmarshal.TimeSeries{s}}
data, err := wr.Marshal()
if err != nil {
return fmt.Errorf("failed to marshal the given time series: %w", err)
}
return c.send(data)
}
// Close stops the DebugClient
func (c *DebugClient) Close() error {
c.wg.Wait()
return nil
}
func (c *DebugClient) send(data []byte) error {
b := snappy.Encode(nil, data)
r := bytes.NewReader(b)
req, err := http.NewRequest(http.MethodPost, c.addr, r)
if err != nil {
return fmt.Errorf("failed to create new HTTP request: %w", err)
}
// RFC standard compliant headers
req.Header.Set("Content-Encoding", "snappy")
req.Header.Set("Content-Type", "application/x-protobuf")
// Prometheus compliant headers
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
if !*disablePathAppend {
req.URL.Path = path.Join(req.URL.Path, "/api/v1/write")
}
resp, err := c.c.Do(req)
if err != nil {
return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
req.URL.Redacted(), err, len(data), r.Size())
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode/100 == 2 {
return nil
}
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
resp.StatusCode, req.URL.Redacted(), body)
}

View File

@@ -1,50 +0,0 @@
package remotewrite
import (
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
func TestDebugClient_Push(t *testing.T) {
testSrv := newRWServer()
oldAddr := *addr
*addr = testSrv.URL
defer func() {
*addr = oldAddr
}()
client, err := NewDebugClient()
if err != nil {
t.Fatalf("failed to create debug client: %s", err)
}
const rowsN = 100
var sent int
for i := 0; i < rowsN; i++ {
s := prompbmarshal.TimeSeries{
Samples: []prompbmarshal.Sample{{
Value: float64(i),
Timestamp: time.Now().Unix(),
}},
}
err := client.Push(s)
if err != nil {
t.Fatalf("unexpected err: %s", err)
}
if err == nil {
sent++
}
}
if sent == 0 {
t.Fatalf("0 series sent")
}
if err := client.Close(); err != nil {
t.Fatalf("failed to close client: %s", err)
}
got := testSrv.accepted()
if got != sent {
t.Fatalf("expected to have %d series; got %d", sent, got)
}
}

View File

@@ -1,13 +1,320 @@
package remotewrite
import (
"bytes"
"context"
"flag"
"fmt"
"io"
"net/http"
"path"
"strings"
"sync"
"time"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/metrics"
)
// RWClient represents an HTTP client for pushing data via remote write protocol
type RWClient interface {
// Push pushes the give time series to remote storage
Push(s prompbmarshal.TimeSeries) error
// Close stops the client. Client can't be reused after Close call.
Close() error
var (
disablePathAppend = flag.Bool("remoteWrite.disablePathAppend", false, "Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.")
sendTimeout = flag.Duration("remoteWrite.sendTimeout", 30*time.Second, "Timeout for sending data to the configured -remoteWrite.url.")
retryMinInterval = flag.Duration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxInterval")
retryMaxTime = flag.Duration("remoteWrite.retryMaxTime", time.Second*30, "The max time spent on retry attempts for the failed remote-write request. Change this value if it is expected for remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval")
)
// Client is an asynchronous HTTP client for writing
// timeseries via remote write protocol.
type Client struct {
addr string
c *http.Client
authCfg *promauth.Config
input chan prompbmarshal.TimeSeries
flushInterval time.Duration
maxBatchSize int
maxQueueSize int
wg sync.WaitGroup
doneCh chan struct{}
}
// Config is config for remote write.
type Config struct {
// Addr of remote storage
Addr string
AuthCfg *promauth.Config
// Concurrency defines number of readers that
// concurrently read from the queue and flush data
Concurrency int
// MaxBatchSize defines max number of timeseries
// to be flushed at once
MaxBatchSize int
// MaxQueueSize defines max length of input queue
// populated by Push method.
// Push will be rejected once queue is full.
MaxQueueSize int
// FlushInterval defines time interval for flushing batches
FlushInterval time.Duration
// Transport will be used by the underlying http.Client
Transport *http.Transport
}
const (
defaultConcurrency = 4
defaultMaxBatchSize = 1e3
defaultMaxQueueSize = 1e5
defaultFlushInterval = 5 * time.Second
defaultWriteTimeout = 30 * time.Second
)
// NewClient returns asynchronous client for
// writing timeseries via remotewrite protocol.
func NewClient(ctx context.Context, cfg Config) (*Client, error) {
if cfg.Addr == "" {
return nil, fmt.Errorf("config.Addr can't be empty")
}
if cfg.MaxBatchSize == 0 {
cfg.MaxBatchSize = defaultMaxBatchSize
}
if cfg.MaxQueueSize == 0 {
cfg.MaxQueueSize = defaultMaxQueueSize
}
if cfg.FlushInterval == 0 {
cfg.FlushInterval = defaultFlushInterval
}
if cfg.Transport == nil {
cfg.Transport = http.DefaultTransport.(*http.Transport).Clone()
}
cc := defaultConcurrency
if cfg.Concurrency > 0 {
cc = cfg.Concurrency
}
c := &Client{
c: &http.Client{
Timeout: *sendTimeout,
Transport: cfg.Transport,
},
addr: strings.TrimSuffix(cfg.Addr, "/"),
authCfg: cfg.AuthCfg,
flushInterval: cfg.FlushInterval,
maxBatchSize: cfg.MaxBatchSize,
maxQueueSize: cfg.MaxQueueSize,
doneCh: make(chan struct{}),
input: make(chan prompbmarshal.TimeSeries, cfg.MaxQueueSize),
}
for i := 0; i < cc; i++ {
c.run(ctx)
}
return c, nil
}
// Push adds timeseries into queue for writing into remote storage.
// Push returns and error if client is stopped or if queue is full.
func (c *Client) Push(s prompbmarshal.TimeSeries) error {
select {
case <-c.doneCh:
return fmt.Errorf("client is closed")
case c.input <- s:
return nil
default:
return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
"Queue size is controlled by -remoteWrite.maxQueueSize flag",
c.maxQueueSize)
}
}
// Close stops the client and waits for all goroutines
// to exit.
func (c *Client) Close() error {
if c.doneCh == nil {
return fmt.Errorf("client is already closed")
}
close(c.input)
close(c.doneCh)
c.wg.Wait()
return nil
}
func (c *Client) run(ctx context.Context) {
ticker := time.NewTicker(c.flushInterval)
wr := &prompbmarshal.WriteRequest{}
shutdown := func() {
for ts := range c.input {
wr.Timeseries = append(wr.Timeseries, ts)
}
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
logger.Infof("shutting down remote write client and flushing remained %d series", len(wr.Timeseries))
c.flush(lastCtx, wr)
cancel()
}
c.wg.Add(1)
go func() {
defer c.wg.Done()
defer ticker.Stop()
for {
select {
case <-c.doneCh:
shutdown()
return
case <-ctx.Done():
shutdown()
return
case <-ticker.C:
c.flush(ctx, wr)
case ts, ok := <-c.input:
if !ok {
continue
}
wr.Timeseries = append(wr.Timeseries, ts)
if len(wr.Timeseries) >= c.maxBatchSize {
c.flush(ctx, wr)
}
}
}
}()
}
var (
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
return float64(*concurrency)
})
)
// flush is a blocking function that marshals WriteRequest and sends
// it to remote-write endpoint. Flush performs limited amount of retries
// if request fails.
func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
if len(wr.Timeseries) < 1 {
return
}
defer prompbmarshal.ResetWriteRequest(wr)
defer bufferFlushDuration.UpdateDuration(time.Now())
data, err := wr.Marshal()
if err != nil {
logger.Errorf("failed to marshal WriteRequest: %s", err)
return
}
b := snappy.Encode(nil, data)
retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
if retryInterval > maxRetryInterval {
retryInterval = maxRetryInterval
}
timeStart := time.Now()
defer sendDuration.Add(time.Since(timeStart).Seconds())
L:
for attempts := 0; ; attempts++ {
err := c.send(ctx, b)
if err == nil {
sentRows.Add(len(wr.Timeseries))
sentBytes.Add(len(b))
return
}
_, isNotRetriable := err.(*nonRetriableError)
logger.Warnf("attempt %d to send request failed: %s (retriable: %v)", attempts+1, err, !isNotRetriable)
if isNotRetriable {
// exit fast if error isn't retriable
break
}
// check if request has been cancelled before backoff
select {
case <-ctx.Done():
logger.Errorf("interrupting retry attempt %d: context cancelled", attempts+1)
break L
default:
}
timeLeftForRetries := maxRetryInterval - time.Since(timeStart)
if timeLeftForRetries < 0 {
// the max retry time has passed, so we give up
break
}
if retryInterval > timeLeftForRetries {
retryInterval = timeLeftForRetries
}
// sleeping to prevent remote db hammering
time.Sleep(retryInterval)
retryInterval *= 2
}
droppedRows.Add(len(wr.Timeseries))
droppedBytes.Add(len(b))
logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
len(wr.Timeseries))
}
func (c *Client) send(ctx context.Context, data []byte) error {
r := bytes.NewReader(data)
req, err := http.NewRequest(http.MethodPost, c.addr, r)
if err != nil {
return fmt.Errorf("failed to create new HTTP request: %w", err)
}
// RFC standard compliant headers
req.Header.Set("Content-Encoding", "snappy")
req.Header.Set("Content-Type", "application/x-protobuf")
// Prometheus compliant headers
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
if c.authCfg != nil {
c.authCfg.SetHeaders(req, true)
}
if !*disablePathAppend {
req.URL.Path = path.Join(req.URL.Path, "/api/v1/write")
}
resp, err := c.c.Do(req.WithContext(ctx))
if err != nil {
return fmt.Errorf("error while sending request to %s: %w; Data len %d(%d)",
req.URL.Redacted(), err, len(data), r.Size())
}
defer func() { _ = resp.Body.Close() }()
body, _ := io.ReadAll(resp.Body)
// according to https://prometheus.io/docs/concepts/remote_write_spec/
// Prometheus remote Write compatible receivers MUST
switch resp.StatusCode / 100 {
case 2:
// respond with a HTTP 2xx status code when the write is successful.
return nil
case 4:
if resp.StatusCode != http.StatusTooManyRequests {
// MUST NOT retry write requests on HTTP 4xx responses other than 429
return &nonRetriableError{fmt.Errorf("unexpected response code %d for %s. Response body %q",
resp.StatusCode, req.URL.Redacted(), body)}
}
fallthrough
default:
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
resp.StatusCode, req.URL.Redacted(), body)
}
}
type nonRetriableError struct {
err error
}
func (e *nonRetriableError) Error() string {
return e.err.Error()
}

View File

@@ -1,16 +1,19 @@
package main
import (
"context"
"flag"
"fmt"
"strings"
"time"
"github.com/cheggaaa/pb/v3"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
var (
@@ -30,7 +33,7 @@ var (
"Progress bar rendering might be verbose or break the logs parsing, so it is recommended to be disabled when not used in interactive mode.")
)
func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewrite.RWClient) error {
func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw *remotewrite.Client) error {
if *replayMaxDatapoints < 1 {
return fmt.Errorf("replay.maxDatapointsPerQuery can't be lower than 1")
}
@@ -65,8 +68,8 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewri
var total int
for _, cfg := range groupsCfg {
ng := rule.NewGroup(cfg, qb, *evaluationInterval, labels)
total += ng.Replay(tFrom, tTo, rw, *replayMaxDatapoints, *replayRuleRetryAttempts, *replayRulesDelay, *disableProgressBar)
ng := newGroup(cfg, qb, *evaluationInterval, labels)
total += ng.replay(tFrom, tTo, rw)
}
logger.Infof("replay finished! Imported %d samples", total)
if rw != nil {
@@ -74,3 +77,97 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewri
}
return nil
}
func (g *Group) replay(start, end time.Time, rw *remotewrite.Client) int {
var total int
step := g.Interval * time.Duration(*replayMaxDatapoints)
ri := rangeIterator{start: start, end: end, step: step}
iterations := int(end.Sub(start)/step) + 1
fmt.Printf("\nGroup %q"+
"\ninterval: \t%v"+
"\nrequests to make: \t%d"+
"\nmax range per request: \t%v\n",
g.Name, g.Interval, iterations, step)
if g.Limit > 0 {
fmt.Printf("\nPlease note, `limit: %d` param has no effect during replay.\n",
g.Limit)
}
for _, rule := range g.Rules {
fmt.Printf("> Rule %q (ID: %d)\n", rule, rule.ID())
var bar *pb.ProgressBar
if !*disableProgressBar {
bar = pb.StartNew(iterations)
}
ri.reset()
for ri.next() {
n, err := replayRule(rule, ri.s, ri.e, rw)
if err != nil {
logger.Fatalf("rule %q: %s", rule, err)
}
total += n
if bar != nil {
bar.Increment()
}
}
if bar != nil {
bar.Finish()
}
// sleep to let remote storage to flush data on-disk
// so chained rules could be calculated correctly
time.Sleep(*replayRulesDelay)
}
return total
}
func replayRule(rule Rule, start, end time.Time, rw *remotewrite.Client) (int, error) {
var err error
var tss []prompbmarshal.TimeSeries
for i := 0; i < *replayRuleRetryAttempts; i++ {
tss, err = rule.ExecRange(context.Background(), start, end)
if err == nil {
break
}
logger.Errorf("attempt %d to execute rule %q failed: %s", i+1, rule, err)
time.Sleep(time.Second)
}
if err != nil { // means all attempts failed
return 0, err
}
if len(tss) < 1 {
return 0, nil
}
var n int
for _, ts := range tss {
if err := rw.Push(ts); err != nil {
return n, fmt.Errorf("remote write failure: %s", err)
}
n += len(ts.Samples)
}
return n, nil
}
type rangeIterator struct {
step time.Duration
start, end time.Time
iter int
s, e time.Time
}
func (ri *rangeIterator) reset() {
ri.iter = 0
ri.s, ri.e = time.Time{}, time.Time{}
}
func (ri *rangeIterator) next() bool {
ri.s = ri.start.Add(ri.step * time.Duration(ri.iter))
if !ri.end.After(ri.s) {
return false
}
ri.e = ri.s.Add(ri.step)
if ri.e.After(ri.end) {
ri.e = ri.end
}
ri.iter++
return true
}

View File

@@ -12,7 +12,7 @@ import (
)
type fakeReplayQuerier struct {
datasource.FakeQuerier
fakeQuerier
registry map[string]map[string]struct{}
}
@@ -170,3 +170,81 @@ func TestReplay(t *testing.T) {
})
}
}
func TestRangeIterator(t *testing.T) {
testCases := []struct {
ri rangeIterator
result [][2]time.Time
}{
{
ri: rangeIterator{
start: parseTime(t, "2021-01-01T12:00:00.000Z"),
end: parseTime(t, "2021-01-01T12:30:00.000Z"),
step: 5 * time.Minute,
},
result: [][2]time.Time{
{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:05:00.000Z")},
{parseTime(t, "2021-01-01T12:05:00.000Z"), parseTime(t, "2021-01-01T12:10:00.000Z")},
{parseTime(t, "2021-01-01T12:10:00.000Z"), parseTime(t, "2021-01-01T12:15:00.000Z")},
{parseTime(t, "2021-01-01T12:15:00.000Z"), parseTime(t, "2021-01-01T12:20:00.000Z")},
{parseTime(t, "2021-01-01T12:20:00.000Z"), parseTime(t, "2021-01-01T12:25:00.000Z")},
{parseTime(t, "2021-01-01T12:25:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
},
},
{
ri: rangeIterator{
start: parseTime(t, "2021-01-01T12:00:00.000Z"),
end: parseTime(t, "2021-01-01T12:30:00.000Z"),
step: 45 * time.Minute,
},
result: [][2]time.Time{
{parseTime(t, "2021-01-01T12:00:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
{parseTime(t, "2021-01-01T12:30:00.000Z"), parseTime(t, "2021-01-01T12:30:00.000Z")},
},
},
{
ri: rangeIterator{
start: parseTime(t, "2021-01-01T12:00:12.000Z"),
end: parseTime(t, "2021-01-01T12:00:17.000Z"),
step: time.Second,
},
result: [][2]time.Time{
{parseTime(t, "2021-01-01T12:00:12.000Z"), parseTime(t, "2021-01-01T12:00:13.000Z")},
{parseTime(t, "2021-01-01T12:00:13.000Z"), parseTime(t, "2021-01-01T12:00:14.000Z")},
{parseTime(t, "2021-01-01T12:00:14.000Z"), parseTime(t, "2021-01-01T12:00:15.000Z")},
{parseTime(t, "2021-01-01T12:00:15.000Z"), parseTime(t, "2021-01-01T12:00:16.000Z")},
{parseTime(t, "2021-01-01T12:00:16.000Z"), parseTime(t, "2021-01-01T12:00:17.000Z")},
},
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
var j int
for tc.ri.next() {
if len(tc.result) < j+1 {
t.Fatalf("unexpected result for iterator on step %d: %v - %v",
j, tc.ri.s, tc.ri.e)
}
s, e := tc.ri.s, tc.ri.e
expS, expE := tc.result[j][0], tc.result[j][1]
if s != expS {
t.Fatalf("expected to get start=%v; got %v", expS, s)
}
if e != expE {
t.Fatalf("expected to get end=%v; got %v", expE, e)
}
j++
}
})
}
}
func parseTime(t *testing.T, s string) time.Time {
t.Helper()
tt, err := time.Parse("2006-01-02T15:04:05.000Z", s)
if err != nil {
t.Fatal(err)
}
return tt
}

118
app/vmalert/rule.go Normal file
View File

@@ -0,0 +1,118 @@
package main
import (
"context"
"errors"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
// Rule represents alerting or recording rule
// that has unique ID, can be Executed and
// updated with other Rule.
type Rule interface {
// ID returns unique ID that may be used for
// identifying this Rule among others.
ID() uint64
// Exec executes the rule with given context at the given timestamp and limit.
// returns an err if number of resulting time series exceeds the limit.
Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error)
// ExecRange executes the rule on the given time range.
ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error)
// UpdateWith performs modification of current Rule
// with fields of the given Rule.
UpdateWith(Rule) error
// ToAPI converts Rule into APIRule
ToAPI() APIRule
// Close performs the shutdown procedures for rule
// such as metrics unregister
Close()
}
var errDuplicate = errors.New("result contains metrics with the same labelset after applying rule labels. See https://docs.victoriametrics.com/vmalert.html#series-with-the-same-labelset for details")
type ruleState struct {
sync.RWMutex
entries []ruleStateEntry
cur int
}
type ruleStateEntry struct {
// stores last moment of time rule.Exec was called
time time.Time
// stores the timesteamp with which rule.Exec was called
at time.Time
// stores the duration of the last rule.Exec call
duration time.Duration
// stores last error that happened in Exec func
// resets on every successful Exec
// may be used as Health ruleState
err error
// stores the number of samples returned during
// the last evaluation
samples int
// stores the number of time series fetched during
// the last evaluation.
// Is supported by VictoriaMetrics only, starting from v1.90.0
// If seriesFetched == nil, then this attribute was missing in
// datasource response (unsupported).
seriesFetched *int
// stores the curl command reflecting the HTTP request used during rule.Exec
curl string
}
func newRuleState(size int) *ruleState {
if size < 1 {
size = 1
}
return &ruleState{
entries: make([]ruleStateEntry, size),
}
}
func (s *ruleState) getLast() ruleStateEntry {
s.RLock()
defer s.RUnlock()
return s.entries[s.cur]
}
func (s *ruleState) size() int {
s.RLock()
defer s.RUnlock()
return len(s.entries)
}
func (s *ruleState) getAll() []ruleStateEntry {
entries := make([]ruleStateEntry, 0)
s.RLock()
defer s.RUnlock()
cur := s.cur
for {
e := s.entries[cur]
if !e.time.IsZero() || !e.at.IsZero() {
entries = append(entries, e)
}
cur--
if cur < 0 {
cur = cap(s.entries) - 1
}
if cur == s.cur {
return entries
}
}
}
func (s *ruleState) add(e ruleStateEntry) {
s.Lock()
defer s.Unlock()
s.cur++
if s.cur > cap(s.entries)-1 {
s.cur = 0
}
s.entries[s.cur] = e
}

View File

@@ -1,174 +0,0 @@
package rule
import (
"context"
"errors"
"fmt"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
// Rule represents alerting or recording rule
// that has unique ID, can be Executed and
// updated with other Rule.
type Rule interface {
// ID returns unique ID that may be used for
// identifying this Rule among others.
ID() uint64
// exec executes the rule with given context at the given timestamp and limit.
// returns an err if number of resulting time series exceeds the limit.
exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error)
// execRange executes the rule on the given time range.
execRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error)
// updateWith performs modification of current Rule
// with fields of the given Rule.
updateWith(Rule) error
// close performs the shutdown procedures for rule
// such as metrics unregister
close()
}
var errDuplicate = errors.New("result contains metrics with the same labelset after applying rule labels. See https://docs.victoriametrics.com/vmalert.html#series-with-the-same-labelset for details")
type ruleState struct {
sync.RWMutex
entries []StateEntry
cur int
}
// StateEntry stores rule's execution states
type StateEntry struct {
// stores last moment of time rule.Exec was called
Time time.Time
// stores the timesteamp with which rule.Exec was called
At time.Time
// stores the duration of the last rule.Exec call
Duration time.Duration
// stores last error that happened in Exec func
// resets on every successful Exec
// may be used as Health ruleState
Err error
// stores the number of samples returned during
// the last evaluation
Samples int
// stores the number of time series fetched during
// the last evaluation.
// Is supported by VictoriaMetrics only, starting from v1.90.0
// If seriesFetched == nil, then this attribute was missing in
// datasource response (unsupported).
SeriesFetched *int
// stores the curl command reflecting the HTTP request used during rule.Exec
Curl string
}
// GetLastEntry returns latest stateEntry of rule
func GetLastEntry(r Rule) StateEntry {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.getLast()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.getLast()
}
return StateEntry{}
}
// GetRuleStateSize returns size of rule stateEntry
func GetRuleStateSize(r Rule) int {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.size()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.size()
}
return 0
}
// GetAllRuleState returns rule entire stateEntries
func GetAllRuleState(r Rule) []StateEntry {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.getAll()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.getAll()
}
return []StateEntry{}
}
func (s *ruleState) size() int {
s.RLock()
defer s.RUnlock()
return len(s.entries)
}
func (s *ruleState) getLast() StateEntry {
s.RLock()
defer s.RUnlock()
if len(s.entries) == 0 {
return StateEntry{}
}
return s.entries[s.cur]
}
func (s *ruleState) getAll() []StateEntry {
entries := make([]StateEntry, 0)
s.RLock()
defer s.RUnlock()
cur := s.cur
for {
e := s.entries[cur]
if !e.Time.IsZero() || !e.At.IsZero() {
entries = append(entries, e)
}
cur--
if cur < 0 {
cur = cap(s.entries) - 1
}
if cur == s.cur {
return entries
}
}
}
func (s *ruleState) add(e StateEntry) {
s.Lock()
defer s.Unlock()
s.cur++
if s.cur > cap(s.entries)-1 {
s.cur = 0
}
s.entries[s.cur] = e
}
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
var err error
var tss []prompbmarshal.TimeSeries
for i := 0; i < replayRuleRetryAttempts; i++ {
tss, err = r.execRange(context.Background(), start, end)
if err == nil {
break
}
logger.Errorf("attempt %d to execute rule %q failed: %s", i+1, r, err)
time.Sleep(time.Second)
}
if err != nil { // means all attempts failed
return 0, err
}
if len(tss) < 1 {
return 0, nil
}
var n int
for _, ts := range tss {
if err := rw.Push(ts); err != nil {
return n, fmt.Errorf("remote write failure: %s", err)
}
n += len(ts.Samples)
}
return n, nil
}

View File

@@ -1,81 +0,0 @@
package rule
import (
"sync"
"testing"
"time"
)
func TestRule_state(t *testing.T) {
stateEntriesN := 20
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, stateEntriesN)}}
e := r.state.getLast()
if !e.At.IsZero() {
t.Fatalf("expected entry to be zero")
}
now := time.Now()
r.state.add(StateEntry{At: now})
e = r.state.getLast()
if e.At != now {
t.Fatalf("expected entry at %v to be equal to %v",
e.At, now)
}
time.Sleep(time.Millisecond)
now2 := time.Now()
r.state.add(StateEntry{At: now2})
e = r.state.getLast()
if e.At != now2 {
t.Fatalf("expected entry at %v to be equal to %v",
e.At, now2)
}
if len(r.state.getAll()) != 2 {
t.Fatalf("expected for state to have 2 entries only; got %d",
len(r.state.getAll()),
)
}
var last time.Time
for i := 0; i < stateEntriesN*2; i++ {
last = time.Now()
r.state.add(StateEntry{At: last})
}
e = r.state.getLast()
if e.At != last {
t.Fatalf("expected entry at %v to be equal to %v",
e.At, last)
}
if len(r.state.getAll()) != stateEntriesN {
t.Fatalf("expected for state to have %d entries only; got %d",
stateEntriesN, len(r.state.getAll()),
)
}
}
// TestRule_stateConcurrent supposed to test concurrent
// execution of state updates.
// Should be executed with -race flag
func TestRule_stateConcurrent(_ *testing.T) {
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
const workers = 50
const iterations = 100
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func() {
defer wg.Done()
for i := 0; i < iterations; i++ {
r.state.add(StateEntry{At: time.Now()})
r.state.getAll()
r.state.getLast()
}
}()
}
wg.Wait()
}

View File

@@ -1,75 +0,0 @@
package rule
import (
"net/http"
"testing"
)
func TestRequestToCurl(t *testing.T) {
f := func(req *http.Request, exp string) {
t.Helper()
got := requestToCurl(req)
if got != exp {
t.Fatalf("expected to have %q; got %q instead", exp, got)
}
}
newReq := func(url string, queryParams ...string) *http.Request {
t.Helper()
r, err := http.NewRequest(http.MethodPost, url, nil)
if err != nil {
t.Fatal(err)
}
params := r.URL.Query()
for i := 0; i < len(queryParams); i += 2 {
params.Add(queryParams[i], queryParams[i+1])
}
r.URL.RawQuery = params.Encode()
return r
}
req := newReq("foo.com")
f(req, "curl -X POST 'http://foo.com'")
req = newReq("foo.com")
req.Header.Set("foo", "bar")
req.Header.Set("baz", "qux")
f(req, "curl -X POST -H 'Baz: qux' -H 'Foo: bar' 'http://foo.com'")
req = newReq("foo.com", "query", "up", "step", "10")
f(req, "curl -X POST 'http://foo.com?query=up&step=10'")
req = newReq("http://foo.com", "query", "up", "step", "10")
f(req, "curl -X POST 'http://foo.com?query=up&step=10'")
req = newReq("https://foo.com", "query", "up", "step", "10")
f(req, "curl -k -X POST 'https://foo.com?query=up&step=10'")
req = newReq("https://user:pass@foo.com", "query", "up", "step", "10")
f(req, "curl -k -X POST 'https://user:xxxxx@foo.com?query=up&step=10'")
req = newReq("https://user:pass@foo.com")
req.Header.Set("Authorisation", "Bearer 123456")
f(req, "curl -k -X POST -H 'Authorisation: <secret>' 'https://user:xxxxx@foo.com'")
req = newReq("https://user:pass@foo.com")
req.Header.Set("Authorisation", "Basic 123456")
f(req, "curl -k -X POST -H 'Authorisation: <secret>' 'https://user:xxxxx@foo.com'")
req = newReq("https://foo.com")
req.Header.Set("My-Password", "mypassword")
f(req, "curl -k -X POST -H 'My-Password: <secret>' 'https://foo.com'")
req = newReq("https://foo.com")
req.Header.Set("key-for", "my-new-key")
f(req, "curl -k -X POST -H 'Key-For: <secret>' 'https://foo.com'")
req = newReq("https://foo.com")
req.Header.Set("My-Secret-Org", "secret-organisation")
f(req, "curl -k -X POST -H 'My-Secret-Org: <secret>' 'https://foo.com'")
req = newReq("https://foo.com")
req.Header.Set("Token", "secret-token")
f(req, "curl -k -X POST -H 'Token: <secret>' 'https://foo.com'")
}

100
app/vmalert/rule_test.go Normal file
View File

@@ -0,0 +1,100 @@
package main
import (
"sync"
"testing"
"time"
)
func TestRule_stateDisabled(t *testing.T) {
state := newRuleState(-1)
e := state.getLast()
if !e.at.IsZero() {
t.Fatalf("expected entry to be zero")
}
state.add(ruleStateEntry{at: time.Now()})
state.add(ruleStateEntry{at: time.Now()})
state.add(ruleStateEntry{at: time.Now()})
if len(state.getAll()) != 1 {
// state should store at least one update at any circumstances
t.Fatalf("expected for state to have %d entries; got %d",
1, len(state.getAll()),
)
}
}
func TestRule_state(t *testing.T) {
stateEntriesN := 20
state := newRuleState(stateEntriesN)
e := state.getLast()
if !e.at.IsZero() {
t.Fatalf("expected entry to be zero")
}
now := time.Now()
state.add(ruleStateEntry{at: now})
e = state.getLast()
if e.at != now {
t.Fatalf("expected entry at %v to be equal to %v",
e.at, now)
}
time.Sleep(time.Millisecond)
now2 := time.Now()
state.add(ruleStateEntry{at: now2})
e = state.getLast()
if e.at != now2 {
t.Fatalf("expected entry at %v to be equal to %v",
e.at, now2)
}
if len(state.getAll()) != 2 {
t.Fatalf("expected for state to have 2 entries only; got %d",
len(state.getAll()),
)
}
var last time.Time
for i := 0; i < stateEntriesN*2; i++ {
last = time.Now()
state.add(ruleStateEntry{at: last})
}
e = state.getLast()
if e.at != last {
t.Fatalf("expected entry at %v to be equal to %v",
e.at, last)
}
if len(state.getAll()) != stateEntriesN {
t.Fatalf("expected for state to have %d entries only; got %d",
stateEntriesN, len(state.getAll()),
)
}
}
// TestRule_stateConcurrent supposed to test concurrent
// execution of state updates.
// Should be executed with -race flag
func TestRule_stateConcurrent(t *testing.T) {
state := newRuleState(20)
const workers = 50
const iterations = 100
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func() {
defer wg.Done()
for i := 0; i < iterations; i++ {
state.add(ruleStateEntry{at: time.Now()})
state.getAll()
state.getLast()
}
}()
}
wg.Wait()
}

View File

@@ -1,4 +1,4 @@
package rule
package main
import (
"fmt"
@@ -7,7 +7,6 @@ import (
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
@@ -81,9 +80,6 @@ func requestToCurl(req *http.Request) string {
schema := req.URL.Scheme
requestURL := req.URL.String()
if !datasource.ShowDatasourceURL() {
requestURL = req.URL.Redacted()
}
if schema == "" {
schema = "http"
if req.TLS != nil {
@@ -107,25 +103,9 @@ func requestToCurl(req *http.Request) string {
for _, k := range keys {
cw.add("-H")
if !datasource.ShowDatasourceURL() && isSecreteHeader(k) {
cw.addWithEsc(fmt.Sprintf("%s: <secret>", k))
continue
}
cw.addWithEsc(fmt.Sprintf("%s: %s", k, strings.Join(req.Header[k], " ")))
}
cw.addWithEsc(requestURL)
return cw.string()
}
var secretWords = []string{"auth", "pass", "key", "secret", "token"}
func isSecreteHeader(str string) bool {
s := strings.ToLower(str)
for _, secret := range secretWords {
if strings.Contains(s, secret) {
return true
}
}
return false
}

View File

@@ -41,7 +41,7 @@ func TestErrGroup(t *testing.T) {
// TestErrGroupConcurrent supposed to test concurrent
// use of error group.
// Should be executed with -race flag
func TestErrGroupConcurrent(_ *testing.T) {
func TestErrGroupConcurrent(t *testing.T) {
eg := new(ErrGroup)
const writersN = 4

47
app/vmalert/utils_test.go Normal file
View File

@@ -0,0 +1,47 @@
package main
import (
"net/http"
"testing"
)
func TestRequestToCurl(t *testing.T) {
f := func(req *http.Request, exp string) {
got := requestToCurl(req)
if got != exp {
t.Fatalf("expected to have %q; got %q instead", exp, got)
}
}
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
f(req, "curl -X POST 'http://foo.com'")
req, _ = http.NewRequest(http.MethodGet, "https://foo.com", nil)
f(req, "curl -k -X GET 'https://foo.com'")
req, _ = http.NewRequest(http.MethodPost, "foo.com", nil)
req.Header.Set("foo", "bar")
req.Header.Set("baz", "qux")
f(req, "curl -X POST -H 'Baz: qux' -H 'Foo: bar' 'http://foo.com'")
req, _ = http.NewRequest(http.MethodPost, "foo.com", nil)
params := req.URL.Query()
params.Add("query", "up")
params.Add("step", "10")
req.URL.RawQuery = params.Encode()
f(req, "curl -X POST 'http://foo.com?query=up&step=10'")
req, _ = http.NewRequest(http.MethodPost, "http://foo.com", nil)
params = req.URL.Query()
params.Add("query", "up")
params.Add("step", "10")
req.URL.RawQuery = params.Encode()
f(req, "curl -X POST 'http://foo.com?query=up&step=10'")
req, _ = http.NewRequest(http.MethodPost, "https://foo.com", nil)
params = req.URL.Query()
params.Add("query", "up")
params.Add("step", "10")
req.URL.RawQuery = params.Encode()
f(req, "curl -k -X POST 'https://foo.com?query=up&step=10'")
}

View File

@@ -10,7 +10,6 @@ import (
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@@ -144,32 +143,38 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
}
}
func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
const (
paramGroupID = "group_id"
paramAlertID = "alert_id"
paramRuleID = "rule_id"
)
func (rh *requestHandler) getRule(r *http.Request) (APIRule, error) {
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 0)
if err != nil {
return apiRule{}, fmt.Errorf("failed to read %q param: %s", paramGroupID, err)
return APIRule{}, fmt.Errorf("failed to read %q param: %s", paramGroupID, err)
}
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 64)
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 0)
if err != nil {
return apiRule{}, fmt.Errorf("failed to read %q param: %s", paramRuleID, err)
return APIRule{}, fmt.Errorf("failed to read %q param: %s", paramRuleID, err)
}
obj, err := rh.m.ruleAPI(groupID, ruleID)
rule, err := rh.m.RuleAPI(groupID, ruleID)
if err != nil {
return apiRule{}, errResponse(err, http.StatusNotFound)
return APIRule{}, errResponse(err, http.StatusNotFound)
}
return obj, nil
return rule, nil
}
func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
func (rh *requestHandler) getAlert(r *http.Request) (*APIAlert, error) {
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 0)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %s", paramGroupID, err)
}
alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 64)
alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 0)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %s", paramAlertID, err)
}
a, err := rh.m.alertAPI(groupID, alertID)
a, err := rh.m.AlertAPI(groupID, alertID)
if err != nil {
return nil, errResponse(err, http.StatusNotFound)
}
@@ -179,17 +184,17 @@ func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
type listGroupsResponse struct {
Status string `json:"status"`
Data struct {
Groups []apiGroup `json:"groups"`
Groups []APIGroup `json:"groups"`
} `json:"data"`
}
func (rh *requestHandler) groups() []apiGroup {
func (rh *requestHandler) groups() []APIGroup {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
groups := make([]apiGroup, 0)
groups := make([]APIGroup, 0)
for _, g := range rh.m.groups {
groups = append(groups, groupToAPI(g))
groups = append(groups, g.toAPI())
}
// sort list of alerts for deterministic output
@@ -216,35 +221,35 @@ func (rh *requestHandler) listGroups() ([]byte, error) {
type listAlertsResponse struct {
Status string `json:"status"`
Data struct {
Alerts []*apiAlert `json:"alerts"`
Alerts []*APIAlert `json:"alerts"`
} `json:"data"`
}
func (rh *requestHandler) groupAlerts() []groupAlerts {
func (rh *requestHandler) groupAlerts() []GroupAlerts {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
var gAlerts []groupAlerts
var groupAlerts []GroupAlerts
for _, g := range rh.m.groups {
var alerts []*apiAlert
var alerts []*APIAlert
for _, r := range g.Rules {
a, ok := r.(*rule.AlertingRule)
a, ok := r.(*AlertingRule)
if !ok {
continue
}
alerts = append(alerts, ruleToAPIAlert(a)...)
alerts = append(alerts, a.AlertsToAPI()...)
}
if len(alerts) > 0 {
gAlerts = append(gAlerts, groupAlerts{
Group: groupToAPI(g),
groupAlerts = append(groupAlerts, GroupAlerts{
Group: g.toAPI(),
Alerts: alerts,
})
}
}
sort.Slice(gAlerts, func(i, j int) bool {
return gAlerts[i].Group.Name < gAlerts[j].Group.Name
sort.Slice(groupAlerts, func(i, j int) bool {
return groupAlerts[i].Group.Name < groupAlerts[j].Group.Name
})
return gAlerts
return groupAlerts
}
func (rh *requestHandler) listAlerts() ([]byte, error) {
@@ -252,14 +257,14 @@ func (rh *requestHandler) listAlerts() ([]byte, error) {
defer rh.m.groupsMu.RUnlock()
lr := listAlertsResponse{Status: "success"}
lr.Data.Alerts = make([]*apiAlert, 0)
lr.Data.Alerts = make([]*APIAlert, 0)
for _, g := range rh.m.groups {
for _, r := range g.Rules {
a, ok := r.(*rule.AlertingRule)
a, ok := r.(*AlertingRule)
if !ok {
continue
}
lr.Data.Alerts = append(lr.Data.Alerts, ruleToAPIAlert(a)...)
lr.Data.Alerts = append(lr.Data.Alerts, a.AlertsToAPI()...)
}
}

View File

@@ -38,7 +38,7 @@ btn-primary
{% endif %}
{% endfunc %}
{% func ListGroups(r *http.Request, originGroups []apiGroup) %}
{% func ListGroups(r *http.Request, originGroups []APIGroup) %}
{%code prefix := utils.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "Groups", getLastConfigError()) %}
{%code
@@ -46,9 +46,9 @@ btn-primary
rOk := make(map[string]int)
rNotOk := make(map[string]int)
rNoMatch := make(map[string]int)
var groups []apiGroup
var groups []APIGroup
for _, g := range originGroups {
var rules []apiRule
var rules []APIRule
for _, r := range g.Rules {
if r.LastError != "" {
rNotOk[g.ID]++
@@ -166,7 +166,7 @@ btn-primary
{% endfunc %}
{% func ListAlerts(r *http.Request, groupAlerts []groupAlerts) %}
{% func ListAlerts(r *http.Request, groupAlerts []GroupAlerts) %}
{%code prefix := utils.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %}
{% if len(groupAlerts) > 0 %}
@@ -183,7 +183,7 @@ btn-primary
</div>
{%code
var keys []string
alertsByRule := make(map[string][]*apiAlert)
alertsByRule := make(map[string][]*APIAlert)
for _, alert := range ga.Alerts {
if len(alertsByRule[alert.RuleID]) < 1 {
keys = append(keys, alert.RuleID)
@@ -310,7 +310,7 @@ btn-primary
{% endfunc %}
{% func Alert(r *http.Request, alert *apiAlert) %}
{% func Alert(r *http.Request, alert *APIAlert) %}
{%code prefix := utils.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
{%code
@@ -397,7 +397,7 @@ btn-primary
{% endfunc %}
{% func RuleDetails(r *http.Request, rule apiRule) %}
{% func RuleDetails(r *http.Request, rule APIRule) %}
{%code prefix := utils.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
{%code
@@ -416,9 +416,9 @@ btn-primary
var seriesFetchedEnabled bool
var seriesFetchedWarning bool
for _, u := range rule.Updates {
if u.SeriesFetched != nil {
if u.seriesFetched != nil {
seriesFetchedEnabled = true
if *u.SeriesFetched == 0 && u.Samples == 0{
if *u.seriesFetched == 0 && u.samples == 0{
seriesFetchedWarning = true
}
}
@@ -537,23 +537,23 @@ btn-primary
<tbody>
{% for _, u := range rule.Updates %}
<tr{% if u.Err != nil %} class="alert-danger"{% endif %}>
<tr{% if u.err != nil %} class="alert-danger"{% endif %}>
<td>
<span class="badge bg-primary rounded-pill me-3" title="Updated at">{%s u.Time.Format(time.RFC3339) %}</span>
<span class="badge bg-primary rounded-pill me-3" title="Updated at">{%s u.time.Format(time.RFC3339) %}</span>
</td>
<td class="text-center">{%d u.Samples %}</td>
{% if seriesFetchedEnabled %}<td class="text-center">{% if u.SeriesFetched != nil %}{%d *u.SeriesFetched %}{% endif %}</td>{% endif %}
<td class="text-center">{%f.3 u.Duration.Seconds() %}s</td>
<td class="text-center">{%s u.At.Format(time.RFC3339) %}</td>
<td class="text-center">{%d u.samples %}</td>
{% if seriesFetchedEnabled %}<td class="text-center">{% if u.seriesFetched != nil %}{%d *u.seriesFetched %}{% endif %}</td>{% endif %}
<td class="text-center">{%f.3 u.duration.Seconds() %}s</td>
<td class="text-center">{%s u.at.Format(time.RFC3339) %}</td>
<td>
<textarea class="curl-area" rows="1" onclick="this.focus();this.select()">{%s u.Curl %}</textarea>
<textarea class="curl-area" rows="1" onclick="this.focus();this.select()">{%s u.curl %}</textarea>
</td>
</tr>
</li>
{% if u.Err != nil %}
<tr{% if u.Err != nil %} class="alert-danger"{% endif %}>
{% if u.err != nil %}
<tr{% if u.err != nil %} class="alert-danger"{% endif %}>
<td colspan="{% if seriesFetchedEnabled %}6{%else%}5{%endif%}">
<span class="alert-danger">{%v u.Err %}</span>
<span class="alert-danger">{%v u.err %}</span>
</td>
</tr>
{% endif %}
@@ -582,7 +582,7 @@ btn-primary
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
{% endfunc %}
{% func seriesFetchedWarn(r apiRule) %}
{% func seriesFetchedWarn(r APIRule) %}
{% if isNoMatch(r) %}
<svg xmlns="http://www.w3.org/2000/svg"
data-bs-toggle="tooltip"
@@ -596,7 +596,7 @@ btn-primary
{% endfunc %}
{%code
func isNoMatch (r apiRule) bool {
func isNoMatch (r APIRule) bool {
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
}
%}

View File

@@ -196,7 +196,7 @@ func buttonActive(filter, expValue string) string {
}
//line app/vmalert/web.qtpl:41
func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, originGroups []apiGroup) {
func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, originGroups []APIGroup) {
//line app/vmalert/web.qtpl:41
qw422016.N().S(`
`)
@@ -216,9 +216,9 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, originGroups [
rOk := make(map[string]int)
rNotOk := make(map[string]int)
rNoMatch := make(map[string]int)
var groups []apiGroup
var groups []APIGroup
for _, g := range originGroups {
var rules []apiRule
var rules []APIRule
for _, r := range g.Rules {
if r.LastError != "" {
rNotOk[g.ID]++
@@ -610,7 +610,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, originGroups [
}
//line app/vmalert/web.qtpl:166
func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, originGroups []apiGroup) {
func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, originGroups []APIGroup) {
//line app/vmalert/web.qtpl:166
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmalert/web.qtpl:166
@@ -621,7 +621,7 @@ func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, originGroups [
}
//line app/vmalert/web.qtpl:166
func ListGroups(r *http.Request, originGroups []apiGroup) string {
func ListGroups(r *http.Request, originGroups []APIGroup) string {
//line app/vmalert/web.qtpl:166
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmalert/web.qtpl:166
@@ -636,7 +636,7 @@ func ListGroups(r *http.Request, originGroups []apiGroup) string {
}
//line app/vmalert/web.qtpl:169
func StreamListAlerts(qw422016 *qt422016.Writer, r *http.Request, groupAlerts []groupAlerts) {
func StreamListAlerts(qw422016 *qt422016.Writer, r *http.Request, groupAlerts []GroupAlerts) {
//line app/vmalert/web.qtpl:169
qw422016.N().S(`
`)
@@ -712,7 +712,7 @@ func StreamListAlerts(qw422016 *qt422016.Writer, r *http.Request, groupAlerts []
`)
//line app/vmalert/web.qtpl:185
var keys []string
alertsByRule := make(map[string][]*apiAlert)
alertsByRule := make(map[string][]*APIAlert)
for _, alert := range ga.Alerts {
if len(alertsByRule[alert.RuleID]) < 1 {
keys = append(keys, alert.RuleID)
@@ -891,7 +891,7 @@ func StreamListAlerts(qw422016 *qt422016.Writer, r *http.Request, groupAlerts []
}
//line app/vmalert/web.qtpl:255
func WriteListAlerts(qq422016 qtio422016.Writer, r *http.Request, groupAlerts []groupAlerts) {
func WriteListAlerts(qq422016 qtio422016.Writer, r *http.Request, groupAlerts []GroupAlerts) {
//line app/vmalert/web.qtpl:255
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmalert/web.qtpl:255
@@ -902,7 +902,7 @@ func WriteListAlerts(qq422016 qtio422016.Writer, r *http.Request, groupAlerts []
}
//line app/vmalert/web.qtpl:255
func ListAlerts(r *http.Request, groupAlerts []groupAlerts) string {
func ListAlerts(r *http.Request, groupAlerts []GroupAlerts) string {
//line app/vmalert/web.qtpl:255
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmalert/web.qtpl:255
@@ -1091,7 +1091,7 @@ func ListTargets(r *http.Request, targets map[notifier.TargetType][]notifier.Tar
}
//line app/vmalert/web.qtpl:313
func StreamAlert(qw422016 *qt422016.Writer, r *http.Request, alert *apiAlert) {
func StreamAlert(qw422016 *qt422016.Writer, r *http.Request, alert *APIAlert) {
//line app/vmalert/web.qtpl:313
qw422016.N().S(`
`)
@@ -1274,7 +1274,7 @@ func StreamAlert(qw422016 *qt422016.Writer, r *http.Request, alert *apiAlert) {
}
//line app/vmalert/web.qtpl:397
func WriteAlert(qq422016 qtio422016.Writer, r *http.Request, alert *apiAlert) {
func WriteAlert(qq422016 qtio422016.Writer, r *http.Request, alert *APIAlert) {
//line app/vmalert/web.qtpl:397
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmalert/web.qtpl:397
@@ -1285,7 +1285,7 @@ func WriteAlert(qq422016 qtio422016.Writer, r *http.Request, alert *apiAlert) {
}
//line app/vmalert/web.qtpl:397
func Alert(r *http.Request, alert *apiAlert) string {
func Alert(r *http.Request, alert *APIAlert) string {
//line app/vmalert/web.qtpl:397
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmalert/web.qtpl:397
@@ -1300,7 +1300,7 @@ func Alert(r *http.Request, alert *apiAlert) string {
}
//line app/vmalert/web.qtpl:400
func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule apiRule) {
func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule) {
//line app/vmalert/web.qtpl:400
qw422016.N().S(`
`)
@@ -1331,9 +1331,9 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule apiRule)
var seriesFetchedEnabled bool
var seriesFetchedWarning bool
for _, u := range rule.Updates {
if u.SeriesFetched != nil {
if u.seriesFetched != nil {
seriesFetchedEnabled = true
if *u.SeriesFetched == 0 && u.Samples == 0 {
if *u.seriesFetched == 0 && u.samples == 0 {
seriesFetchedWarning = true
}
}
@@ -1587,7 +1587,7 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule apiRule)
qw422016.N().S(`
<tr`)
//line app/vmalert/web.qtpl:540
if u.Err != nil {
if u.err != nil {
//line app/vmalert/web.qtpl:540
qw422016.N().S(` class="alert-danger"`)
//line app/vmalert/web.qtpl:540
@@ -1597,13 +1597,13 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule apiRule)
<td>
<span class="badge bg-primary rounded-pill me-3" title="Updated at">`)
//line app/vmalert/web.qtpl:542
qw422016.E().S(u.Time.Format(time.RFC3339))
qw422016.E().S(u.time.Format(time.RFC3339))
//line app/vmalert/web.qtpl:542
qw422016.N().S(`</span>
</td>
<td class="text-center">`)
//line app/vmalert/web.qtpl:544
qw422016.N().D(u.Samples)
qw422016.N().D(u.samples)
//line app/vmalert/web.qtpl:544
qw422016.N().S(`</td>
`)
@@ -1612,9 +1612,9 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule apiRule)
//line app/vmalert/web.qtpl:545
qw422016.N().S(`<td class="text-center">`)
//line app/vmalert/web.qtpl:545
if u.SeriesFetched != nil {
if u.seriesFetched != nil {
//line app/vmalert/web.qtpl:545
qw422016.N().D(*u.SeriesFetched)
qw422016.N().D(*u.seriesFetched)
//line app/vmalert/web.qtpl:545
}
//line app/vmalert/web.qtpl:545
@@ -1625,18 +1625,18 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule apiRule)
qw422016.N().S(`
<td class="text-center">`)
//line app/vmalert/web.qtpl:546
qw422016.N().FPrec(u.Duration.Seconds(), 3)
qw422016.N().FPrec(u.duration.Seconds(), 3)
//line app/vmalert/web.qtpl:546
qw422016.N().S(`s</td>
<td class="text-center">`)
//line app/vmalert/web.qtpl:547
qw422016.E().S(u.At.Format(time.RFC3339))
qw422016.E().S(u.at.Format(time.RFC3339))
//line app/vmalert/web.qtpl:547
qw422016.N().S(`</td>
<td>
<textarea class="curl-area" rows="1" onclick="this.focus();this.select()">`)
//line app/vmalert/web.qtpl:549
qw422016.E().S(u.Curl)
qw422016.E().S(u.curl)
//line app/vmalert/web.qtpl:549
qw422016.N().S(`</textarea>
</td>
@@ -1644,12 +1644,12 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule apiRule)
</li>
`)
//line app/vmalert/web.qtpl:553
if u.Err != nil {
if u.err != nil {
//line app/vmalert/web.qtpl:553
qw422016.N().S(`
<tr`)
//line app/vmalert/web.qtpl:554
if u.Err != nil {
if u.err != nil {
//line app/vmalert/web.qtpl:554
qw422016.N().S(` class="alert-danger"`)
//line app/vmalert/web.qtpl:554
@@ -1671,7 +1671,7 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule apiRule)
qw422016.N().S(`">
<span class="alert-danger">`)
//line app/vmalert/web.qtpl:556
qw422016.E().V(u.Err)
qw422016.E().V(u.err)
//line app/vmalert/web.qtpl:556
qw422016.N().S(`</span>
</td>
@@ -1697,7 +1697,7 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule apiRule)
}
//line app/vmalert/web.qtpl:563
func WriteRuleDetails(qq422016 qtio422016.Writer, r *http.Request, rule apiRule) {
func WriteRuleDetails(qq422016 qtio422016.Writer, r *http.Request, rule APIRule) {
//line app/vmalert/web.qtpl:563
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmalert/web.qtpl:563
@@ -1708,7 +1708,7 @@ func WriteRuleDetails(qq422016 qtio422016.Writer, r *http.Request, rule apiRule)
}
//line app/vmalert/web.qtpl:563
func RuleDetails(r *http.Request, rule apiRule) string {
func RuleDetails(r *http.Request, rule APIRule) string {
//line app/vmalert/web.qtpl:563
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmalert/web.qtpl:563
@@ -1853,7 +1853,7 @@ func badgeStabilizing() string {
}
//line app/vmalert/web.qtpl:585
func streamseriesFetchedWarn(qw422016 *qt422016.Writer, r apiRule) {
func streamseriesFetchedWarn(qw422016 *qt422016.Writer, r APIRule) {
//line app/vmalert/web.qtpl:585
qw422016.N().S(`
`)
@@ -1879,7 +1879,7 @@ func streamseriesFetchedWarn(qw422016 *qt422016.Writer, r apiRule) {
}
//line app/vmalert/web.qtpl:596
func writeseriesFetchedWarn(qq422016 qtio422016.Writer, r apiRule) {
func writeseriesFetchedWarn(qq422016 qtio422016.Writer, r APIRule) {
//line app/vmalert/web.qtpl:596
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmalert/web.qtpl:596
@@ -1890,7 +1890,7 @@ func writeseriesFetchedWarn(qq422016 qtio422016.Writer, r apiRule) {
}
//line app/vmalert/web.qtpl:596
func seriesFetchedWarn(r apiRule) string {
func seriesFetchedWarn(r APIRule) string {
//line app/vmalert/web.qtpl:596
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmalert/web.qtpl:596
@@ -1905,6 +1905,6 @@ func seriesFetchedWarn(r apiRule) string {
}
//line app/vmalert/web.qtpl:599
func isNoMatch(r apiRule) bool {
func isNoMatch(r APIRule) bool {
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
}

Some files were not shown because too many files have changed in this diff Show More