Compare commits

..

1 Commits

Author SHA1 Message Date
Max Kotliar
e9261be945 lib/promutil: Weak pointer based labels compressor 2025-08-20 20:02:11 +03:00
4191 changed files with 241867 additions and 388626 deletions

View File

@@ -5,7 +5,7 @@ body:
- type: textarea
id: describe-the-component
attributes:
label: Is your question related to a specific component?
label: Is your question request related to a specific component?
placeholder: |
VictoriaMetrics, vmagent, vmalert, vmui, etc...
validations:

23
.github/copilot-instructions.md vendored Normal file
View File

@@ -0,0 +1,23 @@
# Project Overview
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
## Folder Structure
- `/app`: Contains the compilable binaries.
- `/lib`: Contains the golang reusable libraries
- `/docs/victoriametrics`: Contains documentation for the project.
- `/apptest/tests`: Contains integration tests.
## Libraries and Frameworks
- Backend: Golang, no framework. Use third-party libraries sparingly.
- Frontend: React.
## Code review guidelines
Ensure the feature or bugfix includes a changelog entry in /docs/victoriametrics/changelog/CHANGELOG.md.
Verify the entry is under the ## tip section and matches the structure and style of existing entries.
Chore-only changes may be omitted from the changelog.

View File

@@ -4,8 +4,6 @@ updates:
directory: "/"
schedule:
interval: "daily"
cooldown:
default-days: 21
- package-ecosystem: "gomod"
directory: "/"
schedule:
@@ -25,8 +23,6 @@ updates:
directory: "/"
schedule:
interval: "daily"
cooldown:
default-days: 21
- package-ecosystem: "npm"
directory: "/app/vmui/packages/vmui"
schedule:

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env sh
set -e
CHANGELOG_FILE="docs/victoriametrics/changelog/CHANGELOG.md"
GITHUB_BASE_REF=${GITHUB_BASE_REF:-"master"}
GIT_REMOTE=${GIT_REMOTE:-"origin"}
git diff "${GIT_REMOTE}/${GITHUB_BASE_REF}"...HEAD -- $CHANGELOG_FILE > diff.txt
if ! grep -q "^+" diff.txt; then
echo "No additions in CHANGELOG.md"
exit 0
fi
ADDED_LINES=$(grep "^+\S" diff.txt | sed 's/^+//')
START_TIP=$(grep -n "^## tip" "$CHANGELOG_FILE" | head -1 | cut -d: -f1)
if [ -z "$START_TIP" ]; then
echo "ERROR: ${CHANGELOG_FILE} does not contain a ## tip section"
exit 1
fi
END_TIP=$(awk "NR>$START_TIP && /^## / {print NR; exit}" "${CHANGELOG_FILE}")
if [ -z "$END_TIP" ]; then
END_TIP=$(wc -l < "$CHANGELOG_FILE")
fi
BAD=0
while IFS= read -r line; do
# Grep exact line inside the file and get line numbers
MATCHES=$(grep -n -F "$line" "$CHANGELOG_FILE" | cut -d: -f1)
for m in $MATCHES; do
if [ "$m" -lt "$START_TIP" ] || [ "$m" -gt "$END_TIP" ]; then
echo "'$line' on line ${m} is outside ## tip section (lines ${START_TIP}-${END_TIP})"
BAD=1
fi
done
done << EOF
$ADDED_LINES
EOF
if [ "$BAD" -ne 0 ]; then
echo "CHANGELOG modifications must be placed inside the ## tip section."
exit 1
fi
echo "CHANGELOG modifications are valid."

View File

@@ -47,8 +47,6 @@ jobs:
arch: arm
- os: linux
arch: ppc64le
- os: linux
arch: s390x
- os: darwin
arch: amd64
- os: darwin
@@ -61,18 +59,17 @@ jobs:
arch: amd64
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
go-version: stable
- name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }}
run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }}

View File

@@ -1,19 +0,0 @@
name: 'changelog-linter'
on:
pull_request:
paths:
- "docs/victoriametrics/changelog/CHANGELOG.md"
jobs:
tip-lint:
runs-on: 'ubuntu-latest'
steps:
- uses: 'actions/checkout@v6'
with:
# needed for proper diff
fetch-depth: 0
- name: 'Validate that changelog changes are under ## tip'
run: |
GITHUB_BASE_REF=${{ github.base_ref }} ./.github/scripts/lint-changelog-tip.sh

View File

@@ -1,47 +0,0 @@
name: check-commit-signed
on:
pull_request:
jobs:
check-commit-signed:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
fetch-depth: 0 # we need full history for commit verification
- name: Check commit signatures
run: |
if [ "${{ github.event_name }}" != "pull_request" ]; then
echo "Not a PR event, skipping signature check"
exit 0
fi
RANGE="${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
echo "Checking commits in PR range: $RANGE"
if [ -z "$(git rev-list $RANGE)" ]; then
echo "No new commits in this PR, skipping signature check"
exit 0
fi
# Check raw commit objects for a "gpgsig" header as a fast early signal for
# contributors. Both GPG and SSH signatures use this header.
# This avoids relying on %G? which returns N for SSH commits.
# This check is not a security enforcement — unsigned commits cannot be merged
# anyway due to the GitHub repository merge policy.
unsigned=""
for sha in $(git rev-list $RANGE); do
if ! git cat-file commit "$sha" | grep -q "^gpgsig"; then
unsigned="$unsigned $sha"
fi
done
if [ -n "$unsigned" ]; then
echo "Found unsigned commits:"
echo "$unsigned"
exit 1
fi
echo "All commits in PR are signed (GPG or SSH)"

View File

@@ -19,13 +19,11 @@ jobs:
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'
go-version: stable
cache: false
- run: go version
- name: Cache Go artifacts
uses: actions/cache@v4
with:
@@ -34,7 +32,7 @@ jobs:
~/go/pkg/mod
~/go/bin
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
- name: Check License
run: make check-licenses

View File

@@ -29,15 +29,14 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Set up Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache: false
go-version-file: 'go.mod'
- run: go version
go-version: stable
- name: Cache Go artifacts
uses: actions/cache@v4
@@ -47,17 +46,17 @@ jobs:
~/go/bin
~/go/pkg/mod
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
uses: github/codeql-action/init@v3
with:
languages: go
- name: Autobuild
uses: github/codeql-action/autobuild@v4
uses: github/codeql-action/autobuild@v3
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4
uses: github/codeql-action/analyze@v3
with:
category: 'language:go'

View File

@@ -16,19 +16,19 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
path: __vm
- name: Checkout private code
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
repository: VictoriaMetrics/vmdocs
token: ${{ secrets.VM_BOT_GH_TOKEN }}
path: __vm-docs
- name: Import GPG key
uses: crazy-max/ghaction-import-gpg@v7
uses: crazy-max/ghaction-import-gpg@v6
id: import-gpg
with:
gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }}

View File

@@ -32,19 +32,18 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
go-version: stable
- run: go version
- name: Cache golangci-lint
uses: actions/cache@v4
@@ -52,7 +51,7 @@ jobs:
path: |
~/.cache/golangci-lint
~/go/bin
key: golangci-lint-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('.golangci.yml') }}
key: golangci-lint-${{ runner.os }}-${{ hashFiles('.golangci.yml') }}
- name: Run check-all
run: |
@@ -66,46 +65,49 @@ jobs:
strategy:
matrix:
scenario:
- 'test'
- 'test-386'
- 'test-full'
- 'test-full-386'
- 'test-pure'
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
go-version: stable
- name: Run tests
run: make ${{ matrix.scenario}}
run: GOGC=10 make ${{ matrix.scenario}}
apptest:
name: apptest
runs-on: apptest
- name: Publish coverage
uses: codecov/codecov-action@v5
with:
files: ./coverage.txt
integration:
name: integration
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
go-version: stable
- name: Run app tests
run: make apptest
- name: Run integration tests
run: make integration-test

View File

@@ -32,41 +32,35 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Cache node_modules
id: cache
uses: actions/cache@v5
- name: Setup Node
uses: actions/setup-node@v4
with:
path: app/vmui/packages/vmui/node_modules
key: vmui-deps-${{ runner.os }}-${{ hashFiles('app/vmui/packages/vmui/package-lock.json', 'app/vmui/Dockerfile-build') }}
restore-keys: |
vmui-deps-${{ runner.os }}-
node-version: '24.x'
- name: Install dependencies
if: steps.cache.outputs.cache-hit != 'true'
run: make vmui-install
- name: Cache node-modules
uses: actions/cache@v4
with:
path: |
app/vmui/packages/vmui/node_modules
key: vmui-artifacts-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
restore-keys: vmui-artifacts-${{ runner.os }}-
- name: Run lint
id: lint
run: make vmui-lint
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run tests
id: test
run: make vmui-test
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run typecheck
id: typecheck
run: make vmui-typecheck
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Annotate Code Linting Results
uses: ataylorme/eslint-annotate-action@v3

View File

@@ -175,7 +175,7 @@
END OF TERMS AND CONDITIONS
Copyright 2019-2026 VictoriaMetrics, Inc.
Copyright 2019-2025 VictoriaMetrics, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -17,7 +17,7 @@ EXTRA_GO_BUILD_TAGS ?=
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
TAR_OWNERSHIP ?= --owner=1000 --group=1000
GOLANGCI_LINT_VERSION := 2.9.0
GOLANGCI_LINT_VERSION := 2.4.0
.PHONY: $(MAKECMDGOALS)
@@ -125,15 +125,6 @@ vmutils-linux-ppc64le: \
vmrestore-linux-ppc64le \
vmctl-linux-ppc64le
vmutils-linux-s390x: \
vmagent-linux-s390x \
vmalert-linux-s390x \
vmalert-tool-linux-s390x \
vmauth-linux-s390x \
vmbackup-linux-s390x \
vmrestore-linux-s390x \
vmctl-linux-s390x
vmutils-darwin-amd64: \
vmagent-darwin-amd64 \
vmalert-darwin-amd64 \
@@ -266,7 +257,6 @@ release-victoria-metrics: \
release-victoria-metrics-linux-amd64 \
release-victoria-metrics-linux-arm \
release-victoria-metrics-linux-arm64 \
release-victoria-metrics-linux-s390x \
release-victoria-metrics-darwin-amd64 \
release-victoria-metrics-darwin-arm64 \
release-victoria-metrics-freebsd-amd64 \
@@ -285,9 +275,6 @@ release-victoria-metrics-linux-arm:
release-victoria-metrics-linux-arm64:
GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch
release-victoria-metrics-linux-s390x:
GOOS=linux GOARCH=s390x $(MAKE) release-victoria-metrics-goos-goarch
release-victoria-metrics-darwin-amd64:
GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
@@ -327,7 +314,6 @@ release-vmutils: \
release-vmutils-linux-amd64 \
release-vmutils-linux-arm64 \
release-vmutils-linux-arm \
release-vmutils-linux-s390x \
release-vmutils-darwin-amd64 \
release-vmutils-darwin-arm64 \
release-vmutils-freebsd-amd64 \
@@ -346,9 +332,6 @@ release-vmutils-linux-arm64:
release-vmutils-linux-arm:
GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch
release-vmutils-linux-s390x:
GOOS=linux GOARCH=s390x $(MAKE) release-vmutils-goos-goarch
release-vmutils-darwin-amd64:
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
@@ -435,7 +418,7 @@ release-vmutils-windows-goarch: \
vmctl-windows-$(GOARCH)-prod.exe
pprof-cpu:
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics $(PPROF_FILE)
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
fmt:
gofmt -l -w -s ./lib
@@ -443,7 +426,7 @@ fmt:
gofmt -l -w -s ./apptest
vet:
go vet -tags 'synctest' ./lib/...
GOEXPERIMENT=synctest go vet ./lib/...
go vet ./app/...
go vet ./apptest/...
@@ -452,55 +435,39 @@ check-all: fmt vet golangci-lint govulncheck
clean-checkers: remove-golangci-lint remove-govulncheck
test:
go test -tags 'synctest' ./lib/... ./app/...
GOEXPERIMENT=synctest go test ./lib/... ./app/...
test-race:
go test -tags 'synctest' -race ./lib/... ./app/...
test-386:
GOARCH=386 go test -tags 'synctest' ./lib/... ./app/...
GOEXPERIMENT=synctest go test -race ./lib/... ./app/...
test-pure:
CGO_ENABLED=0 go test -tags 'synctest' ./lib/... ./app/...
GOEXPERIMENT=synctest CGO_ENABLED=0 go test ./lib/... ./app/...
test-full:
go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
GOEXPERIMENT=synctest go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
test-full-386:
GOARCH=386 go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
integration-test:
$(MAKE) apptest
apptest:
$(MAKE) victoria-metrics-race vmagent-race vmalert-race vmauth-race vmctl-race vmbackup-race vmrestore-race
go test ./apptest/... -skip="^Test(Cluster|Legacy).*"
apptest-legacy: victoria-metrics-race vmbackup-race vmrestore-race
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
VERSION=v1.132.0; \
VMSINGLE=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}.tar.gz; \
VMCLUSTER=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}-cluster.tar.gz; \
URL=https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/$${VERSION}; \
DIR=/tmp/$${VERSION}; \
test -d $${DIR} || (mkdir $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
); \
VM_LEGACY_VMSINGLE_PATH=$${DIR}/victoria-metrics-prod \
VM_LEGACY_VMSTORAGE_PATH=$${DIR}/vmstorage-prod \
go test ./apptest/tests -run="^TestLegacySingle.*"
$(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
go test ./apptest/... -skip="^TestCluster.*"
benchmark:
go test -run=NO_TESTS -bench=. ./lib/...
go test -run=NO_TESTS -bench=. ./app/...
GOEXPERIMENT=synctest go test -bench=. ./lib/...
go test -bench=. ./app/...
benchmark-pure:
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./lib/...
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./app/...
GOEXPERIMENT=synctest CGO_ENABLED=0 go test -bench=. ./lib/...
CGO_ENABLED=0 go test -bench=. ./app/...
vendor-update:
go get -u ./lib/...
go get -u ./app/...
go mod tidy -compat=1.26
go mod tidy -compat=1.24
go mod vendor
app-local:
@@ -516,15 +483,14 @@ app-local-windows-goarch:
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
quicktemplate-gen: install-qtc
qtc -dir=lib
qtc -dir=app
qtc
install-qtc:
which qtc || go install github.com/valyala/quicktemplate/qtc@latest
golangci-lint: install-golangci-lint
golangci-lint run --build-tags 'synctest'
GOEXPERIMENT=synctest golangci-lint run
install-golangci-lint:
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)

View File

@@ -1,11 +1,12 @@
# VictoriaMetrics
[![Latest Release](https://img.shields.io/github/v/release/VictoriaMetrics/VictoriaMetrics?sort=semver&label=&filter=!*-victorialogs&logo=github&labelColor=gray&color=gray&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Freleases%2Flatest)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
[![Docker Pulls](https://img.shields.io/docker/pulls/victoriametrics/victoria-metrics?label=&logo=docker&logoColor=white&labelColor=2496ED&color=2496ED&link=https%3A%2F%2Fhub.docker.com%2Fr%2Fvictoriametrics%2Fvictoria-metrics)](https://hub.docker.com/u/victoriametrics)
![Docker Pulls](https://img.shields.io/docker/pulls/victoriametrics/victoria-metrics?label=&logo=docker&logoColor=white&labelColor=2496ED&color=2496ED&link=https%3A%2F%2Fhub.docker.com%2Fr%2Fvictoriametrics%2Fvictoria-metrics)
[![Go Report](https://goreportcard.com/badge/github.com/VictoriaMetrics/VictoriaMetrics?link=https%3A%2F%2Fgoreportcard.com%2Freport%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics)](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml/badge.svg?branch=master&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Factions)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml)
[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml/badge.svg?branch=master&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Factions)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml)
[![codecov](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics/branch/master/graph/badge.svg?link=https%3A%2F%2Fcodecov.io%2Fgh%2FVictoriaMetrics%2FVictoriaMetrics)](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
[![License](https://img.shields.io/github/license/VictoriaMetrics/VictoriaMetrics?labelColor=green&label=&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Fblob%2Fmaster%2FLICENSE)](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
[![Join Slack](https://img.shields.io/badge/Join%20Slack-4A154B?logo=slack)](https://slack.victoriametrics.com)
![Slack](https://img.shields.io/badge/Join-4A154B?logo=slack&link=https%3A%2F%2Fslack.victoriametrics.com)
[![X](https://img.shields.io/twitter/follow/VictoriaMetrics?style=flat&label=Follow&color=black&logo=x&labelColor=black&link=https%3A%2F%2Fx.com%2FVictoriaMetrics)](https://x.com/VictoriaMetrics/)
[![Reddit](https://img.shields.io/reddit/subreddit-subscribers/VictoriaMetrics?style=flat&label=Join&labelColor=red&logoColor=white&logo=reddit&link=https%3A%2F%2Fwww.reddit.com%2Fr%2FVictoriaMetrics)](https://www.reddit.com/r/VictoriaMetrics/)
@@ -15,21 +16,16 @@
<img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo">
</picture>
VictoriaMetrics is a fast, cost-effective, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
Here are some resources and information about VictoriaMetrics:
- **Case studies**: [Grammarly, Roblox, Wix, Spotify,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
- **Available**: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), Docker images on [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
- **Deployment types**: [Single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) under [Apache License 2.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE).
- **Getting started:** Read [key concepts](https://docs.victoriametrics.com/victoriametrics/keyconcepts/) and follow the
[quick start guide](https://docs.victoriametrics.com/victoriametrics/quick-start/).
- **Community**: [Slack](https://slack.victoriametrics.com/) (join via [Slack Inviter](https://slack.victoriametrics.com/)), [X (Twitter)](https://x.com/VictoriaMetrics), [YouTube](https://www.youtube.com/@VictoriaMetrics). See full list [here](https://docs.victoriametrics.com/victoriametrics/#community-and-contributions).
- **Changelog**: Project evolves fast - check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).
- **Enterprise support:** [Contact us](mailto:info@victoriametrics.com) for commercial support with additional [enterprise features](https://docs.victoriametrics.com/victoriametrics/enterprise/).
- **Enterprise releases:** Enterprise and [long-term support releases (LTS)](https://docs.victoriametrics.com/victoriametrics/lts-releases/) are publicly available and can be evaluated for free
using a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
- **Security:** we achieved [security certifications](https://victoriametrics.com/security/) for Database Software Development and Software-Based Monitoring Services.
- Documentation: [docs.victoriametrics.com](https://docs.victoriametrics.com)
- Case studies: [Grammarly, Roblox, Wix,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
- Available: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), docker images [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics)
- Deployment types: [Single-node version](https://docs.victoriametrics.com/), [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/), and [Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/)
- Changelog: [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics)
- Community: [Slack](https://slack.victoriametrics.com/), [X (Twitter)](https://x.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
Yes, we open-source both the single-node VictoriaMetrics and the cluster version.

View File

@@ -4,39 +4,15 @@
The following versions of VictoriaMetrics receive regular security fixes:
| Version | Supported |
|--------------------------------------------------------------------------------|--------------------|
| [Latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
| [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| other releases | :x: |
| Version | Supported |
|---------|--------------------|
| [latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
| v1.102.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| v1.110.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| other releases | :x: |
See [this page](https://victoriametrics.com/security/) for more details.
## Software Bill of Materials (SBOM)
Every VictoriaMetrics container{{% available_from "#" %}} image published to
[Docker Hub](https://hub.docker.com/u/victoriametrics)
and [Quay.io](https://quay.io/organization/victoriametrics)
includes an [SPDX](https://spdx.dev/) SBOM attestation
generated automatically by BuildKit during
`docker buildx build`.
To inspect the SBOM for an image:
```sh
docker buildx imagetools inspect \
docker.io/victoriametrics/victoria-metrics:latest \
--format "{{ json .SBOM }}"
```
To scan an image using its SBOM attestation with
[Trivy](https://github.com/aquasecurity/trivy):
```sh
trivy image --sbom-sources oci \
docker.io/victoriametrics/victoria-metrics:latest
```
## Reporting a Vulnerability
Please report any security issues to <security@victoriametrics.com>

View File

@@ -27,9 +27,6 @@ victoria-metrics-linux-ppc64le-prod:
victoria-metrics-linux-386-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386
victoria-metrics-linux-s390x-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-s390x
victoria-metrics-darwin-amd64-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64

View File

@@ -134,7 +134,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>")
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{
@@ -170,7 +169,7 @@ func usage() {
const s = `
victoria-metrics is a time series database and monitoring solution.
See the docs at https://docs.victoriametrics.com/victoriametrics/
See the docs at https://docs.victoriametrics.com/
`
flagutil.Usage(s)
}

View File

@@ -10,11 +10,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
)
@@ -29,9 +27,11 @@ var selfScraperWG sync.WaitGroup
func startSelfScraper() {
selfScraperStopCh = make(chan struct{})
selfScraperWG.Go(func() {
selfScraperWG.Add(1)
go func() {
defer selfScraperWG.Done()
selfScraper(*selfScrapeInterval)
})
}()
}
func stopSelfScraper() {
@@ -48,7 +48,6 @@ func selfScraper(scrapeInterval time.Duration) {
var bb bytesutil.ByteBuffer
var rows prometheus.Rows
var metadataRows prometheus.MetadataRows
var mrs []storage.MetricRow
var labels []prompb.Label
t := time.NewTicker(scrapeInterval)
@@ -58,12 +57,8 @@ func selfScraper(scrapeInterval time.Duration) {
appmetrics.WritePrometheusMetrics(&bb)
s := bytesutil.ToUnsafeString(bb.B)
rows.Reset()
// Parse metrics and optionally metadata when enabled
if prommetadata.IsEnabled() {
rows, metadataRows = prometheus.UnmarshalWithMetadata(rows, metadataRows, s, nil)
} else {
rows.UnmarshalWithErrLogger(s, nil)
}
// VictoriaMetrics components don't expose metadata yet, only need to parse samples
rows.UnmarshalWithErrLogger(s, nil)
mrs = mrs[:0]
for i := range rows.Rows {
r := &rows.Rows[i]
@@ -96,19 +91,6 @@ func selfScraper(scrapeInterval time.Duration) {
if err := vmstorage.AddRows(mrs); err != nil {
logger.Errorf("cannot store self-scraped metrics: %s", err)
}
if len(metadataRows.Rows) > 0 {
mms := make([]metricsmetadata.Row, 0, len(metadataRows.Rows))
for _, mm := range metadataRows.Rows {
mms = append(mms, metricsmetadata.Row{
MetricFamilyName: bytesutil.ToUnsafeBytes(mm.Metric),
Help: bytesutil.ToUnsafeBytes(mm.Help),
Type: mm.Type,
})
}
if err := vmstorage.AddMetadataRows(mms); err != nil {
logger.Errorf("cannot store self-scraped metrics metadata: %s", err)
}
}
}
for {
select {

View File

@@ -33,13 +33,13 @@ func PopulateTimeTpl(b []byte, tGlobal time.Time) []byte {
}
switch strings.TrimSpace(parts[0]) {
case `TIME_S`:
return fmt.Appendf(nil, "%d", t.Unix())
return []byte(fmt.Sprintf("%d", t.Unix()))
case `TIME_MSZ`:
return fmt.Appendf(nil, "%d", t.Unix()*1e3)
return []byte(fmt.Sprintf("%d", t.Unix()*1e3))
case `TIME_MS`:
return fmt.Appendf(nil, "%d", timeToMillis(t))
return []byte(fmt.Sprintf("%d", timeToMillis(t)))
case `TIME_NS`:
return fmt.Appendf(nil, "%d", t.UnixNano())
return []byte(fmt.Sprintf("%d", t.UnixNano()))
default:
log.Fatalf("unknown time pattern %s in %s", parts[0], repl)
}

View File

@@ -27,9 +27,6 @@ vmagent-linux-ppc64le-prod:
vmagent-linux-386-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-386
vmagent-linux-s390x-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-s390x
vmagent-darwin-amd64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64

View File

@@ -49,11 +49,6 @@ func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels
Name: "__name__",
Value: m.Name,
})
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10557
labels = append(labels, prompb.Label{
Name: "host",
Value: sketch.Host,
})
for _, label := range m.Labels {
labels = append(labels, prompb.Label{
Name: label.Name,
@@ -62,6 +57,9 @@ func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels
}
for _, tag := range sketch.Tags {
name, value := datadogutil.SplitTag(tag)
if name == "host" {
name = "exported_host"
}
labels = append(labels, prompb.Label{
Name: name,
Value: value,

View File

@@ -27,7 +27,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -75,7 +74,7 @@ var (
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config and /remotewrite-.*-config pages. It must be passed via authKey query arg. It overrides -httpAuth.*")
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config page. It must be passed via authKey query arg. It overrides -httpAuth.*")
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
@@ -245,7 +244,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>vmagent</h2>")
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{
@@ -254,8 +252,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
{"metric-relabel-debug", "debug metric relabeling"},
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
{"config", "-promscrape.config contents"},
{"remotewrite-relabel-config", "-remoteWrite.relabelConfig contents"},
{"remotewrite-url-relabel-config", "-remoteWrite.urlRelabelConfig contents"},
{"metrics", "available service metrics"},
{"flags", "command-line flags"},
{"-/reload", "reload configuration"},
@@ -352,17 +348,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
firehose.WriteSuccessResponse(w, r)
return true
case "/zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(nil, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "/newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
@@ -492,42 +477,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
promscrape.WriteConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/remotewrite-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
remotewrite.WriteRelabelConfigData(w)
return true
case "/api/v1/status/remotewrite-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteStatusRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
remotewrite.WriteRelabelConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/remotewrite-url-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteURLRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
remotewrite.WriteURLRelabelConfigData(w)
return true
case "/api/v1/status/remotewrite-url-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteStatusURLRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
remotewrite.WriteURLRelabelConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/prometheus/-/reload", "/-/reload":
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
return true
@@ -657,17 +606,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
}
firehose.WriteSuccessResponse(w, r)
return true
case "zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(at, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
@@ -789,9 +727,6 @@ var (
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
zabbixconnectorHistoryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
zabbixconnectorHistoryErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
@@ -812,12 +747,6 @@ var (
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`)
remoteWriteRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-relabel-config"}`)
remoteWriteStatusRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-relabel-config"}`)
remoteWriteURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-url-relabel-config"}`)
remoteWriteStatusURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-url-relabel-config"}`)
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
)

View File

@@ -78,7 +78,7 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompb.Label)
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(samplesCount)
rowsInserted.Add(len(rows))
if at != nil {
rowsTenantInserted.Get(at).Add(samplesCount)
}

View File

@@ -2,14 +2,13 @@ package opentelemetry
import (
"fmt"
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
@@ -25,13 +24,6 @@ var (
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
)
// InsertHandlerForReader processes metrics from given reader.
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
return insertRows(at, tss, mms, nil)
})
}
// InsertHandler processes opentelemetry metrics.
func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
@@ -76,7 +68,7 @@ func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMeta
ctx.WriteRequest.Timeseries = tssDst
var metadataTotal int
if prommetadata.IsEnabled() {
if promscrape.IsMetadataEnabled() {
var accountID, projectID uint32
if at != nil {
accountID = at.AccountID

View File

@@ -7,8 +7,8 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
@@ -36,7 +36,7 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, defaultTimestamp, encoding, true, prommetadata.IsEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
return stream.Parse(req.Body, defaultTimestamp, encoding, true, promscrape.IsMetadataEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
return insertRows(at, rows, mms, extraLabels)
}, func(s string) {
httpserver.LogError(req, s)

View File

@@ -6,8 +6,8 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
@@ -71,7 +71,7 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
ctx.WriteRequest.Timeseries = tssDst
var metadataTotal int
if prommetadata.IsEnabled() {
if promscrape.IsMetadataEnabled() {
var accountID, projectID uint32
if at != nil {
accountID = at.AccountID

View File

@@ -13,18 +13,19 @@ import (
"sync/atomic"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/metrics"
"github.com/golang/snappy"
)
var (
@@ -202,10 +203,14 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
return float64(concurrency)
return float64(*queues)
})
for range concurrency {
c.wg.Go(c.runWorker)
for i := 0; i < concurrency; i++ {
c.wg.Add(1)
go func() {
defer c.wg.Done()
c.runWorker()
}()
}
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
}
@@ -290,7 +295,7 @@ func getAWSAPIConfig(argIdx int) (*awsapi.Config, error) {
accessKey := awsAccessKey.GetOptionalArg(argIdx)
secretKey := awsSecretKey.GetOptionalArg(argIdx)
service := awsService.GetOptionalArg(argIdx)
cfg, err := awsapi.NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, service, "")
cfg, err := awsapi.NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, service)
if err != nil {
return nil, err
}
@@ -405,7 +410,8 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
// Otherwise, it tries sending the block to remote storage indefinitely.
func (c *client) sendBlockHTTP(block []byte) bool {
c.rl.Register(len(block))
bt := timeutil.NewBackoffTimer(c.retryMinInterval, c.retryMaxInterval)
maxRetryDuration := timeutil.AddJitterToDuration(c.retryMaxInterval)
retryDuration := timeutil.AddJitterToDuration(c.retryMinInterval)
retriesCount := 0
again:
@@ -414,10 +420,19 @@ again:
c.requestDuration.UpdateDuration(startTime)
if err != nil {
c.errorsCount.Inc()
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %s",
len(block), c.sanitizedURL, err, bt.CurrentDelay())
if !bt.Wait(c.stopCh) {
retryDuration *= 2
if retryDuration > maxRetryDuration {
retryDuration = maxRetryDuration
}
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
len(block), c.sanitizedURL, err, retryDuration.Seconds())
t := timerpool.Get(retryDuration)
select {
case <-c.stopCh:
timerpool.Put(t)
return false
case <-t.C:
timerpool.Put(t)
}
c.retriesCount.Inc()
goto again
@@ -448,6 +463,12 @@ again:
// - Real-world implementations of v1 use both 400 and 415 status codes.
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
case 415, 400:
if c.canDowngradeVMProto.Swap(false) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
c.useVMProto.Store(false)
}
if encoding.IsZstd(block) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
@@ -483,10 +504,7 @@ again:
// Unexpected status code returned
retriesCount++
retryAfterHeader := parseRetryAfterHeader(resp.Header.Get("Retry-After"))
// retryAfterDuration has the highest priority duration
if retryAfterHeader > 0 {
bt.SetDelay(retryAfterHeader)
}
retryDuration = getRetryDuration(retryAfterHeader, retryDuration, maxRetryDuration)
// Handle response
body, err := io.ReadAll(resp.Body)
@@ -495,10 +513,15 @@ again:
logger.Errorf("cannot read response body from %q during retry #%d: %s", c.sanitizedURL, retriesCount, err)
} else {
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+
"re-sending the block in %s", len(block), c.sanitizedURL, retriesCount, statusCode, body, bt.CurrentDelay())
"re-sending the block in %.3f seconds", len(block), c.sanitizedURL, retriesCount, statusCode, body, retryDuration.Seconds())
}
if !bt.Wait(c.stopCh) {
t := timerpool.Get(retryDuration)
select {
case <-c.stopCh:
timerpool.Put(t)
return false
case <-t.C:
timerpool.Put(t)
}
c.retriesCount.Inc()
goto again
@@ -507,6 +530,27 @@ again:
var remoteWriteRejectedLogger = logger.WithThrottler("remoteWriteRejected", 5*time.Second)
var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Second)
// getRetryDuration returns retry duration.
// retryAfterDuration has the highest priority.
// If retryAfterDuration is not specified, retryDuration gets doubled.
// retryDuration can't exceed maxRetryDuration.
//
// Also see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6097
func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.Duration) time.Duration {
// retryAfterDuration has the highest priority duration
if retryAfterDuration > 0 {
return timeutil.AddJitterToDuration(retryAfterDuration)
}
// default backoff retry policy
retryDuration *= 2
if retryDuration > maxRetryDuration {
retryDuration = maxRetryDuration
}
return retryDuration
}
// repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block.
//
// The input block may be corrupted, for example, if vmagent was shut down ungracefully and
@@ -516,9 +560,9 @@ var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Sec
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
plainBlock := make([]byte, 0, len(zstdBlock)*2)
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock)
plainBlock, err := zstd.Decompress(plainBlock, zstdBlock)
if err != nil {
return nil, err
return nil, fmt.Errorf("zstd: decompress: %s", err)
}
return snappy.Encode(nil, plainBlock), nil
@@ -537,20 +581,24 @@ func logBlockRejected(block []byte, sanitizedURL string, resp *http.Response) {
}
// parseRetryAfterHeader parses `Retry-After` value retrieved from HTTP response header.
//
// s should be in either HTTP-date or a number of seconds.
// It returns time.Duration(0) if s does not follow RFC 7231.
func parseRetryAfterHeader(s string) time.Duration {
if s == "" {
return 0
// retryAfterString should be in either HTTP-date or a number of seconds.
// It will return time.Duration(0) if `retryAfterString` does not follow RFC 7231.
func parseRetryAfterHeader(retryAfterString string) (retryAfterDuration time.Duration) {
if retryAfterString == "" {
return retryAfterDuration
}
defer func() {
v := retryAfterDuration.Seconds()
logger.Infof("'Retry-After: %s' parsed into %.2f second(s)", retryAfterString, v)
}()
// Retry-After could be in "Mon, 02 Jan 2006 15:04:05 GMT" format.
if parsedTime, err := time.Parse(http.TimeFormat, s); err == nil {
if parsedTime, err := time.Parse(http.TimeFormat, retryAfterString); err == nil {
return time.Duration(time.Until(parsedTime).Seconds()) * time.Second
}
// Retry-After could be in seconds.
if seconds, err := strconv.Atoi(s); err == nil {
if seconds, err := strconv.Atoi(retryAfterString); err == nil {
return time.Duration(seconds) * time.Second
}

View File

@@ -6,11 +6,66 @@ import (
"testing"
"time"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/golang/snappy"
)
func TestCalculateRetryDuration(t *testing.T) {
// `testFunc` call `calculateRetryDuration` for `n` times
// and evaluate if the result of `calculateRetryDuration` is
// 1. >= expectMinDuration
// 2. <= expectMinDuration + 10% (see timeutil.AddJitterToDuration)
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
t.Helper()
for i := 0; i < n; i++ {
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
}
expectMaxDuration := helper(expectMinDuration)
expectMinDuration = expectMinDuration - (1000 * time.Millisecond) // Avoid edge case when calculating time.Until(now)
if retryDuration < expectMinDuration || retryDuration > expectMaxDuration {
t.Fatalf(
"incorrect retry duration, want (ms): [%d, %d], got (ms): %d",
expectMinDuration.Milliseconds(), expectMaxDuration.Milliseconds(),
retryDuration.Milliseconds(),
)
}
}
// Call calculateRetryDuration for 1 time.
{
// default backoff policy
f(0, time.Second, 1, 2*time.Second)
// default backoff policy exceed max limit"
f(0, 10*time.Minute, 1, time.Minute)
// retry after > default backoff policy
f(10*time.Second, 1*time.Second, 1, 10*time.Second)
// retry after < default backoff policy
f(1*time.Second, 10*time.Second, 1, 1*time.Second)
// retry after invalid and < default backoff policy
f(0, time.Second, 1, 2*time.Second)
}
// Call calculateRetryDuration for multiple times.
{
// default backoff policy 2 times
f(0, time.Second, 2, 4*time.Second)
// default backoff policy 3 times
f(0, time.Second, 3, 8*time.Second)
// default backoff policy N times exceed max limit
f(0, time.Second, 10, time.Minute)
// retry after 120s 1 times
f(120*time.Second, time.Second, 1, 120*time.Second)
// retry after 120s 2 times
f(120*time.Second, time.Second, 2, 120*time.Second)
}
}
func TestParseRetryAfterHeader(t *testing.T) {
f := func(retryAfterString string, expectResult time.Duration) {
t.Helper()
@@ -36,6 +91,16 @@ func TestParseRetryAfterHeader(t *testing.T) {
f(time.Now().Add(10*time.Second).Format("Mon, 02 Jan 2006 15:04:05 FAKETZ"), 0)
}
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
func helper(d time.Duration) time.Duration {
dv := d / 10
if dv > 10*time.Second {
dv = 10 * time.Second
}
return d + dv
}
func TestRepackBlockFromZstdToSnappy(t *testing.T) {
expectedPlainBlock := []byte(`foobar`)

View File

@@ -48,7 +48,11 @@ func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite *atomic.Boo
ps.wr.significantFigures = significantFigures
ps.wr.roundDigits = roundDigits
ps.stopCh = make(chan struct{})
ps.periodicFlusherWG.Go(ps.periodicFlusher)
ps.periodicFlusherWG.Add(1)
go func() {
defer ps.periodicFlusherWG.Done()
ps.periodicFlusher()
}()
return &ps
}

View File

@@ -51,9 +51,9 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
var wr prompb.WriteRequest
for i := range seriesCount {
for i := 0; i < seriesCount; i++ {
var labels []prompb.Label
for j := range labelsCount {
for j := 0; j < labelsCount; j++ {
labels = append(labels, prompb.Label{
Name: fmt.Sprintf("label_%d_%d", i, j),
Value: fmt.Sprintf("value_%d_%d", i, j),

View File

@@ -3,24 +3,22 @@ package remotewrite
import (
"flag"
"fmt"
"io"
"strconv"
"strings"
"sync"
"sync/atomic"
"github.com/VictoriaMetrics/metrics"
"gopkg.in/yaml.v2"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/metrics"
)
var (
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to all -remoteWrite.url.")
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
"The path can point either to local file or to http url. "+
@@ -34,12 +32,9 @@ var (
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
)
var labelsGlobal []prompb.Label
var (
labelsGlobal []prompb.Label
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
remoteWriteURLRelabelConfigData atomic.Pointer[[]any]
relabelConfigReloads *metrics.Counter
relabelConfigReloadErrors *metrics.Counter
relabelConfigSuccess *metrics.Gauge
@@ -72,42 +67,6 @@ func initRelabelConfigs() {
}
}
// WriteRelabelConfigData writes -remoteWrite.relabelConfig contents to w
func WriteRelabelConfigData(w io.Writer) {
p := remoteWriteRelabelConfigData.Load()
if p == nil {
// Nothing to write to w
return
}
_, _ = w.Write(*p)
}
// WriteURLRelabelConfigData writes -remoteWrite.urlRelabelConfig contents to w
func WriteURLRelabelConfigData(w io.Writer) {
p := remoteWriteURLRelabelConfigData.Load()
if p == nil {
// Nothing to write to w
return
}
type urlRelabelCfg struct {
Url string `yaml:"url"`
RelabelConfig any `yaml:"relabel_config"`
}
var cs []urlRelabelCfg
for i, url := range *remoteWriteURLs {
cfgData := (*p)[i]
if !*showRemoteWriteURL {
url = fmt.Sprintf("%d:secret-url", i+1)
}
cs = append(cs, urlRelabelCfg{
Url: url,
RelabelConfig: cfgData,
})
}
d, _ := yaml.Marshal(cs)
_, _ = w.Write(d)
}
func reloadRelabelConfigs() {
rcs := allRelabelConfigs.Load()
if !rcs.isSet() {
@@ -131,43 +90,28 @@ func reloadRelabelConfigs() {
func loadRelabelConfigs() (*relabelConfigs, error) {
var rcs relabelConfigs
if *relabelConfigPathGlobal != "" {
global, rawCfg, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
if err != nil {
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
}
remoteWriteRelabelConfigData.Store(&rawCfg)
rcs.global = global
}
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
}
var urlRelabelCfgs []any
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
for i, path := range *relabelConfigPaths {
if len(path) == 0 {
urlRelabelCfgs = append(urlRelabelCfgs, nil)
// Skip empty relabel config.
continue
}
prc, rawCfg, err := promrelabel.LoadRelabelConfigs(path)
prc, err := promrelabel.LoadRelabelConfigs(path)
if err != nil {
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
}
rcs.perURL[i] = prc
var parsedCfg any
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
}
if len(*remoteWriteURLs) > len(*relabelConfigPaths) {
// fill the urlRelabelCfgs with empty relabel configs if not set
for i := len(*relabelConfigPaths); i < len(*remoteWriteURLs); i++ {
urlRelabelCfgs = append(urlRelabelCfgs, nil)
}
}
remoteWriteURLRelabelConfigData.Store(&urlRelabelCfgs)
return &rcs, nil
}
@@ -176,9 +120,19 @@ type relabelConfigs struct {
perURL []*promrelabel.ParsedConfigs
}
// isSet indicates whether (global or per-URL) command-line flags is set
func (rcs *relabelConfigs) isSet() bool {
return *relabelConfigPathGlobal != "" || len(*relabelConfigPaths) > 0
if rcs == nil {
return false
}
if rcs.global.Len() > 0 {
return true
}
for _, pc := range rcs.perURL {
if pc.Len() > 0 {
return true
}
}
return false
}
// initLabelsGlobal must be called after parsing command-line flags.

View File

@@ -3,7 +3,6 @@ package remotewrite
import (
"flag"
"fmt"
"math"
"net/http"
"net/url"
"path/filepath"
@@ -12,10 +11,6 @@ import (
"sync/atomic"
"time"
"github.com/cespare/xxhash/v2"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -28,14 +23,14 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
"github.com/VictoriaMetrics/metrics"
"github.com/cespare/xxhash/v2"
)
var (
@@ -63,7 +58,7 @@ var (
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
queues = flagutil.NewArrayInt("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
"isn't enough for sending high volume of collected data to remote storage. "+
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
@@ -84,14 +79,10 @@ var (
`This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. `+
`For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}`+
`Enabled sorting for labels can slow down ingestion performance a bit`)
maxHourlySeries = flag.Int64("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+
"Excess series are logged and dropped. This can be useful for limiting series cardinality. "+
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
maxDailySeries = flag.Int64("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
"Excess series are logged and dropped. This can be useful for limiting series churn rate. "+
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
maxHourlySeries = flag.Int("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
maxIngestionRate = flag.Int("maxIngestionRate", 0, "The maximum number of samples vmagent can receive per second. Data ingestion is paused when the limit is exceeded. "+
"By default there are no limits on samples ingestion rate. See also -remoteWrite.rateLimit")
@@ -100,8 +91,6 @@ var (
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence . See also -remoteWrite.dropSamplesOnOverload")
dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
disableMetadataPerURL = flagutil.NewArrayBool("remoteWrite.disableMetadata", "Whether to disable sending metadata to the corresponding -remoteWrite.url. "+
"By default, metadata sending is controlled by the global -enableMetadata flag")
)
var (
@@ -167,8 +156,8 @@ func Init() {
if len(*remoteWriteURLs) == 0 {
logger.Fatalf("at least one `-remoteWrite.url` command-line flag must be set")
}
if limit := getMaxHourlySeries(); limit > 0 {
hourlySeriesLimiter = bloomfilter.NewLimiter(limit, time.Hour)
if *maxHourlySeries > 0 {
hourlySeriesLimiter = bloomfilter.NewLimiter(*maxHourlySeries, time.Hour)
_ = metrics.NewGauge(`vmagent_hourly_series_limit_max_series`, func() float64 {
return float64(hourlySeriesLimiter.MaxItems())
})
@@ -176,8 +165,8 @@ func Init() {
return float64(hourlySeriesLimiter.CurrentItems())
})
}
if limit := getMaxDailySeries(); limit > 0 {
dailySeriesLimiter = bloomfilter.NewLimiter(limit, 24*time.Hour)
if *maxDailySeries > 0 {
dailySeriesLimiter = bloomfilter.NewLimiter(*maxDailySeries, 24*time.Hour)
_ = metrics.NewGauge(`vmagent_daily_series_limit_max_series`, func() float64 {
return float64(dailySeriesLimiter.MaxItems())
})
@@ -186,6 +175,13 @@ func Init() {
})
}
if *queues > maxQueues {
*queues = maxQueues
}
if *queues <= 0 {
*queues = 1
}
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
@@ -218,7 +214,9 @@ func Init() {
dropDanglingQueues()
// Start config reloader.
configReloaderWG.Go(func() {
configReloaderWG.Add(1)
go func() {
defer configReloaderWG.Done()
for {
select {
case <-configReloaderStopCh:
@@ -228,7 +226,7 @@ func Init() {
reloadRelabelConfigs()
reloadStreamAggrConfigs()
}
})
}()
}
func dropDanglingQueues() {
@@ -268,6 +266,17 @@ func initRemoteWriteCtxs(urls []string) {
if len(urls) == 0 {
logger.Panicf("BUG: urls must be non-empty")
}
maxInmemoryBlocks := memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
if maxInmemoryBlocks / *queues > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 100 * *queues
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
rwctxs := make([]*remoteWriteCtx, len(urls))
rwctxIdx := make([]int, len(urls))
if retryMaxTime.String() != "" {
@@ -282,7 +291,7 @@ func initRemoteWriteCtxs(urls []string) {
if *showRemoteWriteURL {
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
}
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, sanitizedURL)
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
rwctxIdx[i] = i
}
@@ -476,9 +485,6 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
matchIdxs.B = sas.Push(tssBlock, matchIdxs.B)
if !*streamAggrGlobalKeepInput {
tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput)
} else if *streamAggrGlobalDropInput {
// if both keep_input and drop_input are true, we keep only the aggregated series
tssBlock = dropUnaggregatedSeries(tssBlock, matchIdxs.B)
}
matchIdxsPool.Put(matchIdxs)
}
@@ -548,13 +554,11 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
// Push metadata to remote storage systems in parallel to reduce
// the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup
wg.Add(len(rwctxs))
var anyPushFailed atomic.Bool
for _, rwctx := range rwctxs {
if !rwctx.enableMetadata {
// Skip remote storage with disabled metadata
continue
}
wg.Go(func() {
go func(rwctx *remoteWriteCtx) {
defer wg.Done()
if !rwctx.tryPushMetadataInternal(mms) {
rwctx.pushFailures.Inc()
if forceDropSamplesOnFailure {
@@ -563,7 +567,7 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
}
anyPushFailed.Store(true)
}
})
}(rwctx)
}
wg.Wait()
return !anyPushFailed.Load()
@@ -595,13 +599,15 @@ func tryPushTimeSeriesToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prom
// Push tssBlock to remote storage systems in parallel to reduce
// the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup
wg.Add(len(rwctxs))
var anyPushFailed atomic.Bool
for _, rwctx := range rwctxs {
wg.Go(func() {
go func(rwctx *remoteWriteCtx) {
defer wg.Done()
if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) {
anyPushFailed.Store(true)
}
})
}(rwctx)
}
wg.Wait()
return !anyPushFailed.Load()
@@ -623,11 +629,13 @@ func tryShardingTimeSeriesAmongRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock
if len(shard) == 0 {
continue
}
wg.Go(func() {
if !rwctx.TryPushTimeSeries(shard, forceDropSamplesOnFailure) {
wg.Add(1)
go func(rwctx *remoteWriteCtx, tss []prompb.TimeSeries) {
defer wg.Done()
if !rwctx.TryPushTimeSeries(tss, forceDropSamplesOnFailure) {
anyPushFailed.Store(true)
}
})
}(rwctx, shard)
}
wg.Wait()
return !anyPushFailed.Load()
@@ -825,11 +833,6 @@ type remoteWriteCtx struct {
streamAggrKeepInput bool
streamAggrDropInput bool
// enableMetadata indicates whether metadata should be sent to this remote storage.
// It is determined by -remoteWrite.enableMetadata per-URL flag if set,
// otherwise by the global -enableMetadata flag.
enableMetadata bool
pss []*pendingSeries
pssNextIdx atomic.Uint64
@@ -841,19 +844,7 @@ type remoteWriteCtx struct {
rowsDroppedOnPushFailure *metrics.Counter
}
// isMetadataEnabledForURL returns true if metadata should be sent to the remote storage at argIdx.
// It checks the per-URL -remoteWrite.disableMetadata flag first.
// If not set, it falls back to the global -enableMetadata flag.
func isMetadataEnabledForURL(argIdx int) bool {
if disableMetadataPerURL.GetOptionalArg(argIdx) {
// Metadata is explicitly disabled for this URL
return false
}
// Use global -enableMetadata value
return prommetadata.IsEnabled()
}
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string) *remoteWriteCtx {
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
// strip query params, otherwise changing params resets pq
pqURL := *remoteWriteURL
pqURL.RawQuery = ""
@@ -868,23 +859,6 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
}
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
queuesSize := queues.GetOptionalArg(argIdx)
if queuesSize > maxQueues {
queuesSize = maxQueues
} else if queuesSize <= 0 {
queuesSize = 1
}
maxInmemoryBlocks := memory.Allowed() / len(*remoteWriteURLs) / *maxRowsPerBlock / 100
if maxInmemoryBlocks/queuesSize > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 100 * queuesSize
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
return float64(fq.GetPendingBytes())
@@ -902,16 +876,16 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
var c *client
switch remoteWriteURL.Scheme {
case "http", "https":
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, queuesSize)
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
default:
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
}
c.init(argIdx, queuesSize, sanitizedURL)
c.init(argIdx, *queues, sanitizedURL)
// Initialize pss
sf := significantFigures.GetOptionalArg(argIdx)
rd := roundDigits.GetOptionalArg(argIdx)
pssLen := queuesSize
pssLen := *queues
if n := cgroup.AvailableCPUs(); pssLen > n {
// There is no sense in running more than availableCPUs concurrent pendingSeries,
// since every pendingSeries can saturate up to a single CPU.
@@ -923,11 +897,10 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
}
rwctx := &remoteWriteCtx{
idx: argIdx,
fq: fq,
c: c,
pss: pss,
enableMetadata: isMetadataEnabledForURL(argIdx),
idx: argIdx,
fq: fq,
c: c,
pss: pss,
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
@@ -1015,17 +988,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
tss = append(*v, tss...)
}
tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput)
} else if rwctx.streamAggrDropInput {
// if both keep_input and drop_input are true, we keep only the aggregated series
if rctx == nil {
rctx = getRelabelCtx()
// Make a copy of tss before dropping aggregated series
v = tssPool.Get().(*[]prompb.TimeSeries)
tss = append(*v, tss...)
}
tss = dropUnaggregatedSeries(tss, matchIdxs.B)
}
matchIdxsPool.Put(matchIdxs)
}
if rwctx.deduplicator != nil {
@@ -1048,10 +1011,9 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
return false
}
var matchIdxsPool slicesutil.BufferPool[uint32]
var matchIdxsPool bytesutil.ByteBufferPool
// dropAggregatedSeries drops matched series, also the unmatched if dropInput is true.
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput bool) []prompb.TimeSeries {
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []byte, dropInput bool) []prompb.TimeSeries {
dst := src[:0]
if !dropInput {
for i, match := range matchIdxs {
@@ -1066,20 +1028,6 @@ func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput
return dst
}
// dropUnaggregatedSeries drops unmatched series.
func dropUnaggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32) []prompb.TimeSeries {
dst := src[:0]
for i, match := range matchIdxs {
if match == 0 {
continue
}
dst = append(dst, src[i])
}
tail := src[len(dst):]
clear(tail)
return dst
}
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) {
if rwctx.tryPushTimeSeriesInternal(tss) {
return
@@ -1112,7 +1060,7 @@ func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries)
}()
if len(labelsGlobal) > 0 {
// Make a copy of tss before adding extra labels to prevent
// Make a copy of tss before adding extra labels in order to prevent
// from affecting time series for other remoteWrite.url configs.
rctx = getRelabelCtx()
v = tssPool.Get().(*[]prompb.TimeSeries)
@@ -1148,21 +1096,3 @@ func newMapFromStrings(a []string) map[string]struct{} {
}
return m
}
func getMaxHourlySeries() int {
limit := *maxHourlySeries
if limit == -1 || limit > math.MaxInt32 {
return math.MaxInt32
}
return int(limit)
}
func getMaxDailySeries() int {
limit := *maxDailySeries
if limit == -1 || limit > math.MaxInt32 {
return math.MaxInt32
}
return int(limit)
}

View File

@@ -10,8 +10,6 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
@@ -28,12 +26,12 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
itemsCount := 1_000 * bucketsCount
m := make([]int, bucketsCount)
var labels []prompb.Label
for i := range itemsCount {
for i := 0; i < itemsCount; i++ {
labels = append(labels[:0], prompb.Label{
Name: "__name__",
Value: fmt.Sprintf("some_name_%d", i),
})
for j := range 10 {
for j := 0; j < 10; j++ {
labels = append(labels, prompb.Label{
Name: fmt.Sprintf("label_%d", j),
Value: fmt.Sprintf("value_%d_%d", i, j),
@@ -59,8 +57,8 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
f(10)
}
func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string, expectedRowsPushedAfterRelabel, expectedPushedSample int) {
func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string) {
t.Helper()
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
if err != nil {
@@ -73,16 +71,10 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
}
allRelabelConfigs.Store(rcs)
path := "fast-queue-write-test"
fs.MustRemoveDir(path)
fq := persistentqueue.MustOpenFastQueue(path, "test", 100, 0, false)
defer fs.MustRemoveDir(path)
defer fq.MustClose()
pss := make([]*pendingSeries, 1)
isVMProto := &atomic.Bool{}
isVMProto.Store(true)
pss[0] = newPendingSeries(fq, isVMProto, 0, 100)
pss[0] = newPendingSeries(nil, isVMProto, 0, 100)
rwctx := &remoteWriteCtx{
idx: 0,
streamAggrKeepInput: keepInput,
@@ -91,8 +83,6 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
}
defer metrics.UnregisterAllMetrics()
if dedupInterval > 0 {
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
}
@@ -114,27 +104,23 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
expectedTss := make([]prompb.TimeSeries, len(inputTss))
// check inputTss is not modified after TryPushTimeSeries
// copy inputTss to make sure it is not mutated during TryPush call
copy(expectedTss, inputTss)
if !rwctx.TryPushTimeSeries(inputTss, false) {
t.Fatalf("cannot push samples to rwctx")
}
if int(rwctx.rowsPushedAfterRelabel.Get()) != expectedRowsPushedAfterRelabel {
t.Fatalf("unexpected number of rows after relabel; got %d; want %d", rwctx.rowsPushedAfterRelabel.Get(), expectedRowsPushedAfterRelabel)
}
if len(pss[0].wr.tss) != expectedPushedSample {
t.Fatalf("unexpected number of pushed samples; got %d; want %d", len(pss[0].wr.tss), expectedPushedSample)
}
if !reflect.DeepEqual(expectedTss, inputTss) {
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
}
}
// relabeling
f(``, `
f(`
- interval: 1m
outputs: [sum_samples]
- interval: 2m
outputs: [count_series]
`, `
- action: keep
source_labels: [env]
regex: "dev"
@@ -143,66 +129,53 @@ metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 2, 2)
// relabeling + aggregation
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, `
- action: keep
source_labels: [env]
regex: ".*"
`, false, 0, false, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 2)
// aggregation + keepInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, true, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 4)
// aggregation + dropInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, false, true, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 0)
// aggregation + keepInput + dropInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, true, true, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="bar"} 25
`, 3, 1)
// aggregation + deduplication
`)
f(``, ``, true, time.Hour, false, false, `
metric{env="dev"} 10
metric{env="foo"} 20
metric{env="dev"} 15
metric{env="foo"} 25
`, 4, 0)
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, false, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, true, false, `
metric{env="test"} 10
metric{env="dev"} 20
metric{env="foo"} 15
metric{env="dev"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, false, true, `
metric{env="foo"} 10
metric{env="dev"} 20
metric{env="foo"} 15
metric{env="dev"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, true, true, `
metric{env="dev"} 10
metric{env="test"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`)
}
func TestShardAmountRemoteWriteCtx(t *testing.T) {
@@ -248,7 +221,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
seriesCount := 100000
// build 1000000 series
tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
for i := range seriesCount {
for i := 0; i < seriesCount; i++ {
tssBlock = append(tssBlock, prompb.TimeSeries{
Labels: []prompb.Label{
{
@@ -269,7 +242,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
// build active time series set
nodes := make([]string, 0, remoteWriteCount)
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
for i := range remoteWriteCount {
for i := 0; i < remoteWriteCount; i++ {
nodes = append(nodes, fmt.Sprintf("node%d", i))
activeTimeSeriesByNodes[i] = make(map[string]struct{})
}

View File

@@ -18,12 +18,12 @@ var (
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples that match any rule in "+
"-streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
"are written to the remote storage. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop input samples that not matching any rule in "+
"-streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
"are written to the remote storage. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation "+
"with -streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to remote storages write. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation "+
"with -remoteWrite.streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to remote storages write. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
"aggregator before optional aggregation with -streamAggr.config . "+
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
@@ -43,11 +43,11 @@ var (
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop input samples that not matching any rule in "+
"the corresponding -remoteWrite.streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop all the input samples after the aggregation "+
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples that match any rule in "+
"the corresponding -remoteWrite.streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep all the input samples after the aggregation "+
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")

View File

@@ -1,80 +0,0 @@
package zabbixconnector
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="zabbixconnector"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="zabbixconnector"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="zabbixconnector"}`)
)
// InsertHandlerForHTTP processes remote write for ZabbixConnector POST /zabbixconnector/v1/history request.
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, encoding, func(rows []zabbixconnector.Row) error {
return insertRows(at, rows, extraLabels)
})
}
func insertRows(at *auth.Token, rows []zabbixconnector.Row, extraLabels []prompb.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := len(rows)
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
Name: bytesutil.ToUnsafeString(tag.Key),
Value: bytesutil.ToUnsafeString(tag.Value),
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
samples = append(samples, prompb.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -27,9 +27,6 @@ vmalert-tool-linux-ppc64le-prod:
vmalert-tool-linux-386-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
vmalert-tool-linux-s390x-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-s390x
vmalert-tool-darwin-amd64-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64

View File

@@ -41,7 +41,7 @@ func TestParseInputValue_Success(t *testing.T) {
if len(outputExpected) != len(output) {
t.Fatalf("unexpected output length; got %d; want %d", len(outputExpected), len(output))
}
for i := range outputExpected {
for i := 0; i < len(outputExpected); i++ {
if outputExpected[i].Omitted != output[i].Omitted {
t.Fatalf("unexpected Omitted field in the output\ngot\n%v\nwant\n%v", output, outputExpected)
}

View File

@@ -4,7 +4,6 @@ import (
"context"
"flag"
"fmt"
"maps"
"net"
"net/http"
"net/http/httptest"
@@ -13,7 +12,6 @@ import (
"os/signal"
"path/filepath"
"reflect"
"slices"
"sort"
"strings"
"syscall"
@@ -36,7 +34,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"github.com/VictoriaMetrics/metrics"
)
@@ -87,8 +84,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
defer server.Close()
} else {
httpListenAddr = httpListenPort
ln, err := net.Listen(netutil.GetTCPNetwork(), fmt.Sprintf(":%s", httpListenPort))
ln, err := net.Listen("tcp", fmt.Sprintf(":%s", httpListenPort))
if err != nil {
logger.Fatalf("cannot listen on port %s: %v", httpListenPort, err)
}
@@ -134,7 +130,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
}
labels[s[:n]] = s[n+1:]
}
err = notifier.Init(labels, externalURL)
_, err = notifier.Init(nil, labels, externalURL)
if err != nil {
logger.Fatalf("failed to init notifier: %v", err)
}
@@ -350,7 +346,9 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
for k := range alertEvalTimesMap {
alertEvalTimes = append(alertEvalTimes, k)
}
slices.Sort(alertEvalTimes)
sort.Slice(alertEvalTimes, func(i, j int) bool {
return alertEvalTimes[i] < alertEvalTimes[j]
})
// sort group eval order according to the given "group_eval_order".
sort.Slice(testGroups, func(i, j int) bool {
@@ -361,8 +359,12 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
var groups []*rule.Group
for _, group := range testGroups {
mergedExternalLabels := make(map[string]string)
maps.Copy(mergedExternalLabels, tg.ExternalLabels)
maps.Copy(mergedExternalLabels, externalLabels)
for k, v := range tg.ExternalLabels {
mergedExternalLabels[k] = v
}
for k, v := range externalLabels {
mergedExternalLabels[k] = v
}
ng := rule.NewGroup(group, q, time.Minute, mergedExternalLabels)
ng.Init()
groups = append(groups, ng)
@@ -375,7 +377,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
if len(g.Rules) == 0 {
continue
}
errs := g.ExecOnce(context.Background(), rw, ts)
errs := g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, rw, ts)
for err := range errs {
if err != nil {
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,

View File

@@ -27,9 +27,6 @@ vmalert-linux-ppc64le-prod:
vmalert-linux-386-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-386
vmalert-linux-s390x-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-s390x
vmalert-darwin-amd64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64

View File

@@ -31,7 +31,7 @@ type Group struct {
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
Limit *int `yaml:"limit,omitempty"`
Limit int `yaml:"limit,omitempty"`
Rules []Rule `yaml:"rules"`
Concurrency int `yaml:"concurrency"`
// Labels is a set of label value pairs, that will be added to every rule.
@@ -81,15 +81,18 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
if g.Interval.Duration() < 0 {
return fmt.Errorf("interval shouldn't be lower than 0")
}
// if `eval_offset` is set, the group interval must be specified explicitly(instead of inherited from global evaluationInterval flag) and must bigger than offset.
if g.EvalOffset.Duration().Abs() > g.Interval.Duration() {
return fmt.Errorf("the abs value of eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
if g.EvalOffset.Duration() < 0 {
return fmt.Errorf("eval_offset shouldn't be lower than 0")
}
// if `eval_offset` is set, interval won't use global evaluationInterval flag and must bigger than offset.
if g.EvalOffset.Duration() > g.Interval.Duration() {
return fmt.Errorf("eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
}
if g.EvalOffset != nil && g.EvalDelay != nil {
return fmt.Errorf("eval_offset cannot be used with eval_delay")
}
if g.Limit != nil && *g.Limit < 0 {
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", *g.Limit)
if g.Limit < 0 {
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
}
if g.Concurrency < 0 {
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)

View File

@@ -116,7 +116,7 @@ func TestParse_Failure(t *testing.T) {
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
f([]string{"testdata/dir/rules0-bad.rules"}, "invalid annotations")
f([]string{"testdata/dir/rules0-bad.rules"}, "error parsing annotation")
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
@@ -176,21 +176,14 @@ func TestGroupValidate_Failure(t *testing.T) {
}, false, "interval shouldn't be lower than 0")
f(&Group{
Name: "too big eval_offset",
Name: "wrong eval_offset",
Interval: promutil.NewDuration(time.Minute),
EvalOffset: promutil.NewDuration(2 * time.Minute),
}, false, "eval_offset should be smaller than interval")
f(&Group{
Name: "too big negative eval_offset",
Interval: promutil.NewDuration(time.Minute),
EvalOffset: promutil.NewDuration(-2 * time.Minute),
}, false, "eval_offset should be smaller than interval")
limit := -1
f(&Group{
Name: "wrong limit",
Limit: &limit,
Limit: -1,
}, false, "invalid limit")
f(&Group{
@@ -349,6 +342,7 @@ func TestGroupValidate_Failure(t *testing.T) {
},
},
}, true, "bad prometheus expr")
}
func TestGroupValidate_Success(t *testing.T) {

View File

@@ -2,7 +2,6 @@ package config
import (
"fmt"
"slices"
"strings"
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
@@ -77,12 +76,13 @@ func (t *Type) ValidateExpr(expr string) error {
if err != nil {
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
}
labels, err := q.GetStatsLabels()
if err != nil {
return fmt.Errorf("cannot obtain labels from LogsQL expr: %q, err: %w", expr, err)
}
if slices.Contains(labels, "_time") {
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
fields, _ := q.GetStatsByFields()
for i := range fields {
// VictoriaLogs inserts `_time` field as a label in result when query with `stats by (_time:step)`,
// making the result meaningless and may lead to cardinality issues.
if fields[i] == "_time" {
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
}
}
default:
return fmt.Errorf("unknown datasource type=%q", t.Name)

View File

@@ -5,7 +5,6 @@ import (
"errors"
"fmt"
"io"
"maps"
"net/http"
"net/url"
"strings"
@@ -92,7 +91,9 @@ func (c *Client) Clone() *Client {
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
copy(ns.extraHeaders, c.extraHeaders)
}
maps.Copy(ns.extraParams, c.extraParams)
for k, v := range c.extraParams {
ns.extraParams[k] = v
}
return ns
}
@@ -172,26 +173,22 @@ func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result,
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
}
defer func() { _ = resp.Body.Close() }()
// Process the received response.
var parseFn func(resp *http.Response) (Result, error)
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType {
case datasourcePrometheus:
parseFn = parsePrometheusInstantResponse
parseFn = parsePrometheusResponse
case datasourceGraphite:
parseFn = parseGraphiteResponse
case datasourceVLogs:
parseFn = parseVLogsInstantResponse
parseFn = parseVLogsResponse
default:
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
}
result, err := parseFn(resp)
if err != nil {
return Result{}, nil, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
}
return result, req, nil
result, err := parseFn(req, resp)
_ = resp.Body.Close()
return result, req, err
}
// QueryRange executes the given query on the given time range.
@@ -232,23 +229,19 @@ func (c *Client) QueryRange(ctx context.Context, query string, start, end time.T
return res, fmt.Errorf("second attempt: %w", err)
}
}
defer func() { _ = resp.Body.Close() }()
// Process the received response.
var parseFn func(resp *http.Response) (Result, error)
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType {
case datasourcePrometheus:
parseFn = parsePrometheusRangeResponse
parseFn = parsePrometheusResponse
case datasourceVLogs:
parseFn = parseVLogsRangeResponse
parseFn = parseVLogsResponse
default:
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
}
res, err = parseFn(resp)
if err != nil {
return Result{}, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
}
res, err = parseFn(req, resp)
_ = resp.Body.Close()
return res, err
}

View File

@@ -33,10 +33,10 @@ func (r graphiteResponse) metrics() []Metric {
return ms
}
func parseGraphiteResponse(resp *http.Response) (Result, error) {
func parseGraphiteResponse(req *http.Request, resp *http.Response) (Result, error) {
r := &graphiteResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
return Result{}, fmt.Errorf("error parsing graphite metrics: %w", err)
return Result{}, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
}
return Result{Data: r.metrics()}, nil
}

View File

@@ -34,7 +34,7 @@ type promResponse struct {
// Stats supported by VictoriaMetrics since v1.90
Stats struct {
SeriesFetched *string `json:"seriesFetched,omitempty"`
} `json:"stats"`
} `json:"stats,omitempty"`
// IsPartial supported by VictoriaMetrics
IsPartial *bool `json:"isPartial,omitempty"`
}
@@ -172,26 +172,17 @@ const (
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
)
func parsePromResponse(resp *http.Response) (*promResponse, error) {
func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
r := &promResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
return res, fmt.Errorf("error parsing response from %s: %w", req.URL.Redacted(), err)
}
if r.Status == statusError {
return nil, fmt.Errorf("response error %q: %s", r.ErrorType, r.Error)
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
}
if r.Status != statusSuccess {
return nil, fmt.Errorf("unknown response status %q", r.Status)
return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
}
return r, nil
}
func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error) {
r, err := parsePromResponse(resp)
if err != nil {
return res, fmt.Errorf("failed to parse response: %w", err)
}
var parseFn func() ([]Metric, error)
switch r.Data.ResultType {
case rtVector:
@@ -200,6 +191,12 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
}
parseFn = pi.metrics
case rtMatrix:
var pr promRange
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
return res, err
}
parseFn = pr.metrics
case rScalar:
var ps promScalar
if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
@@ -209,6 +206,7 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
default:
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
}
ms, err := parseFn()
if err != nil {
return res, err
@@ -224,34 +222,6 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
return res, nil
}
func parsePrometheusRangeResponse(resp *http.Response) (res Result, err error) {
r, err := parsePromResponse(resp)
if err != nil {
return res, fmt.Errorf("failed to parse response: %w", err)
}
if r.Data.ResultType != rtMatrix {
return res, fmt.Errorf("unexpected result type %q; expected result type %q", r.Data.ResultType, rtMatrix)
}
var pr promRange
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
return res, err
}
ms, err := pr.metrics()
if err != nil {
return res, err
}
res = Result{Data: ms, IsPartial: r.IsPartial}
if r.Stats.SeriesFetched != nil {
intV, err := strconv.Atoi(*r.Stats.SeriesFetched)
if err != nil {
return res, fmt.Errorf("failed to convert stats.seriesFetched to int: %w", err)
}
res.SeriesFetched = &intV
}
return res, nil
}
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
if c.appendTypePrefix {
r.URL.Path += "/prometheus"

View File

@@ -65,23 +65,21 @@ func TestVMInstantQuery(t *testing.T) {
case 3:
w.Write([]byte(`{"status":"unknown"}`))
case 4:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector"}}`))
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
case 5:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
case 6:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
case 7:
case 6:
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
case 8:
case 7:
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
case 9:
case 8:
w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
}
})
mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) {
c++
switch c {
case 10:
case 9:
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
}
})
@@ -104,9 +102,9 @@ func TestVMInstantQuery(t *testing.T) {
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
}
switch c {
case 11:
case 10:
w.Write([]byte("[]"))
case 12:
case 11:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
}
})
@@ -125,7 +123,6 @@ func TestVMInstantQuery(t *testing.T) {
ts := time.Now()
expErr := func(query, err string) {
t.Helper()
_, _, gotErr := pq.Query(ctx, query, ts)
if gotErr == nil {
t.Fatalf("expected %q got nil", err)
@@ -138,11 +135,10 @@ func TestVMInstantQuery(t *testing.T) {
expErr(vmQuery, "500") // 0
expErr(vmQuery, "error parsing response") // 1
expErr(vmQuery, "response error") // 2
expErr(vmQuery, "unknown response status") // 3
expErr(vmQuery, "unknown status") // 3
expErr(vmQuery, "unexpected end of JSON input") // 4
expErr(vmQuery, "unknown result type") // 5
res, _, err := pq.Query(ctx, vmQuery, ts) // 6 - vector
res, _, err := pq.Query(ctx, vmQuery, ts) // 5 - vector
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -163,7 +159,7 @@ func TestVMInstantQuery(t *testing.T) {
}
metricsEqual(t, res.Data, expected)
res, req, err := pq.Query(ctx, vmQuery, ts) // 7 - scalar
res, req, err := pq.Query(ctx, vmQuery, ts) // 6 - scalar
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -188,7 +184,7 @@ func TestVMInstantQuery(t *testing.T) {
res.SeriesFetched)
}
res, _, err = pq.Query(ctx, vmQuery, ts) // 8 - scalar with stats
res, _, err = pq.Query(ctx, vmQuery, ts) // 7 - scalar with stats
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -209,7 +205,7 @@ func TestVMInstantQuery(t *testing.T) {
*res.SeriesFetched)
}
res, _, err = pq.Query(ctx, vmQuery, ts) // 9
res, _, err = pq.Query(ctx, vmQuery, ts) // 8
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -220,7 +216,7 @@ func TestVMInstantQuery(t *testing.T) {
// test graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
res, _, err = gq.Query(ctx, queryRender, ts) // 10 - graphite
res, _, err = gq.Query(ctx, queryRender, ts) // 9 - graphite
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -240,9 +236,9 @@ func TestVMInstantQuery(t *testing.T) {
vlogs := datasourceVLogs
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
expErr(vlogsQuery, "error parsing response") // 11
expErr(vlogsQuery, "error parsing response") // 10
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 12
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 11
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -394,8 +390,6 @@ func TestVMRangeQuery(t *testing.T) {
switch c {
case 0:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
case 1:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[1583786142, "1"]}}`))
}
})
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
@@ -428,7 +422,7 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
}
switch c {
case 2:
case 1:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
}
})
@@ -452,13 +446,13 @@ func TestVMRangeQuery(t *testing.T) {
start, end := time.Now().Add(-time.Minute), time.Now()
res, err := pq.QueryRange(ctx, vmQuery, start, end) // case 0
res, err := pq.QueryRange(ctx, vmQuery, start, end)
if err != nil {
t.Fatalf("unexpected %s", err)
}
m := res.Data
if len(m) != 1 {
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
}
expected := Metric{
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}},
@@ -469,9 +463,6 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
}
_, err = pq.QueryRange(ctx, vmQuery, start, end) // case 1
expectError(t, err, "unexpected result type")
// test unsupported graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})

View File

@@ -40,28 +40,8 @@ func (c *Client) setVLogsRangeReqParams(r *http.Request, query string, start, en
c.setReqParams(r, query)
}
func parseVLogsInstantResponse(resp *http.Response) (res Result, err error) {
res, err = parsePrometheusInstantResponse(resp)
if err != nil {
return Result{}, err
}
for i := range res.Data {
m := &res.Data[i]
for j := range m.Labels {
// reserve the stats func result name with a new label `stats_result` instead of dropping it,
// since there could be multiple stats results in a single query, for instance:
// _time:5m | stats quantile(0.5, request_duration_seconds) p50, quantile(0.9, request_duration_seconds) p90
if m.Labels[j].Name == "__name__" {
m.Labels[j].Name = "stats_result"
break
}
}
}
return
}
func parseVLogsRangeResponse(resp *http.Response) (res Result, err error) {
res, err = parsePrometheusRangeResponse(resp)
func parseVLogsResponse(req *http.Request, resp *http.Response) (res Result, err error) {
res, err = parsePrometheusResponse(req, resp)
if err != nil {
return Result{}, err
}

View File

@@ -132,9 +132,12 @@ func (ls Labels) String() string {
// a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1
// a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0
func LabelCompare(a, b Labels) int {
l := min(len(b), len(a))
l := len(a)
if len(b) < l {
l = len(b)
}
for i := range l {
for i := 0; i < l; i++ {
if a[i].Name != b[i].Name {
if a[i].Name < b[i].Name {
return -1

View File

@@ -13,7 +13,7 @@ func BenchmarkPromInstantUnmarshal(b *testing.B) {
// BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op
b.Run("Instant std+fastjson", func(b *testing.B) {
for range b.N {
for i := 0; i < b.N; i++ {
var pi promInstant
err = pi.Unmarshal(data)
if err != nil {

View File

@@ -7,6 +7,7 @@ import (
"net/url"
"os"
"sort"
"strconv"
"strings"
"sync"
"time"
@@ -56,7 +57,7 @@ absolute path to all .tpl files in root.
-rule.templates="dir/**/*.tpl". Includes all the .tpl files in "dir" subfolders recursively.
`)
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule', '-rule.templates' and '-notifier.config' files. "+
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule' or '-notifier.config' files. "+
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "Address to listen for incoming http requests. See also -tls and -httpListenAddr.useProxyProtocol")
@@ -76,12 +77,15 @@ absolute path to all .tpl files in root.
`Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+
`If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
"In case of conflicts, original labels are kept with prefix 'exported_'.")
"In case of conflicts, original labels are kept with prefix `exported_`.")
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
)
var extURL *url.URL
var (
alertURLGeneratorFn notifier.AlertURLGenerator
extURL *url.URL
)
func main() {
// Write flags and help message to stdout, since it is easier to grep or pipe.
@@ -117,7 +121,7 @@ func main() {
return
}
err = notifier.InitAlertURLGeneratorFn(extURL, *externalAlertSource, *validateTemplates)
alertURLGeneratorFn, err = getAlertURLGenerator(extURL, *externalAlertSource, *validateTemplates)
if err != nil {
logger.Fatalf("failed to init `external.alert.source`: %s", err)
}
@@ -159,7 +163,7 @@ func main() {
ctx, cancel := context.WithCancel(context.Background())
manager, err := newManager(ctx)
if err != nil {
logger.Fatalf("failed to create manager: %s", err)
logger.Fatalf("failed to init: %s", err)
}
logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";"))
groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)
@@ -224,13 +228,14 @@ func newManager(ctx context.Context) (*manager, error) {
labels[s[:n]] = s[n+1:]
}
err = notifier.Init(labels, *externalURL)
nts, err := notifier.Init(alertURLGeneratorFn, labels, *externalURL)
if err != nil {
return nil, fmt.Errorf("failed to init notifier: %w", err)
}
manager := &manager{
groups: make(map[uint64]*rule.Group),
querierBuilder: q,
notifiers: nts,
labels: labels,
}
rw, err := remotewrite.Init(ctx)
@@ -287,6 +292,35 @@ func getHostnameAsExternalURL(addr string, isSecure bool) (*url.URL, error) {
return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
}
func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, validateTemplate bool) (notifier.AlertURLGenerator, error) {
if externalAlertSource == "" {
return func(a notifier.Alert) string {
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, paramGroupID, gID, paramAlertID, aID)
}, nil
}
if validateTemplate {
if err := notifier.ValidateTemplates(map[string]string{
"tpl": externalAlertSource,
}); err != nil {
return nil, fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
}
}
m := map[string]string{
"tpl": externalAlertSource,
}
return func(alert notifier.Alert) string {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
}
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
if err != nil {
logger.Errorf("cannot template alert source: %s", err)
}
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
}, nil
}
func usage() {
const s = `
vmalert processes alerts and recording rules.

View File

@@ -49,6 +49,30 @@ func TestGetExternalURL(t *testing.T) {
}
}
func TestGetAlertURLGenerator(t *testing.T) {
testAlert := notifier.Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
u, _ := url.Parse("https://victoriametrics.com/path")
fn, err := getAlertURLGenerator(u, "", false)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", paramGroupID, paramAlertID)
if exp != fn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
}
_, err = getAlertURLGenerator(nil, "foo?{{invalid}}", true)
if err == nil {
t.Fatalf("expected template validation error got nil")
}
fn, err = getAlertURLGenerator(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != fn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
}
}
func TestConfigReload(t *testing.T) {
originalRulePath := *rulePath
originalExternalURL := extURL
@@ -96,10 +120,9 @@ groups:
querierBuilder: &datasource.FakeQuerier{},
groups: make(map[uint64]*rule.Group),
labels: map[string]string{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
rw: &remotewrite.Client{},
}
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
syncCh := make(chan struct{})
sighupCh := procutil.NewSighupChan()

View File

@@ -3,7 +3,6 @@ package main
import (
"context"
"fmt"
"strconv"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
@@ -17,6 +16,7 @@ import (
// manager controls group states
type manager struct {
querierBuilder datasource.QuerierBuilder
notifiers func() []notifier.Notifier
rw remotewrite.RWClient
// remote read builder.
@@ -29,8 +29,25 @@ type manager struct {
groups map[uint64]*rule.Group
}
// groupAPI generates apiGroup object from group by its ID(hash)
func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
// ruleAPI generates apiRule object from alert by its ID(hash)
func (m *manager) ruleAPI(gID, rID uint64) (apiRule, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
g, ok := m.groups[gID]
if !ok {
return apiRule{}, fmt.Errorf("can't find group with id %d", gID)
}
for _, rule := range g.Rules {
if rule.ID() == rID {
return ruleToAPI(rule), nil
}
}
return apiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
}
// alertAPI generates apiAlert object from alert by its ID(hash)
func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
@@ -38,47 +55,13 @@ func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
if !ok {
return nil, fmt.Errorf("can't find group with id %d", gID)
}
return g.ToAPI(), nil
}
// ruleAPI generates apiRule object from alert by its ID(hash)
func (m *manager) ruleAPI(gID, rID uint64) (rule.ApiRule, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
group, ok := m.groups[gID]
if !ok {
return rule.ApiRule{}, fmt.Errorf("can't find group with id %d", gID)
}
g := group.ToAPI()
ruleID := strconv.FormatUint(rID, 10)
for _, r := range g.Rules {
if r.ID == ruleID {
return r, nil
}
}
return rule.ApiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
}
// alertAPI generates apiAlert object from alert by its ID(hash)
func (m *manager) alertAPI(gID, aID uint64) (*rule.ApiAlert, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
group, ok := m.groups[gID]
if !ok {
return nil, fmt.Errorf("can't find group with id %d", gID)
}
g := group.ToAPI()
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
ar, ok := r.(*rule.AlertingRule)
if !ok {
continue
}
alertID := strconv.FormatUint(aID, 10)
for _, a := range r.Alerts {
if a.ID == alertID {
return a, nil
}
if apiAlert := alertToAPI(ar, aID); apiAlert != nil {
return apiAlert, nil
}
}
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)
@@ -98,18 +81,20 @@ func (m *manager) close() {
m.wg.Wait()
}
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) {
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
m.wg.Add(1)
id := g.GetID()
g.Init()
m.wg.Go(func() {
go func() {
defer m.wg.Done()
if restore {
g.Start(ctx, m.rw, m.rr)
g.Start(ctx, m.notifiers, m.rw, m.rr)
} else {
g.Start(ctx, m.rw, nil)
g.Start(ctx, m.notifiers, m.rw, nil)
}
})
}()
m.groups[id] = g
return nil
}
func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
@@ -118,7 +103,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
for _, cfg := range groupsCfg {
for _, r := range cfg.Rules {
if rrPresent && arPresent {
break
continue
}
if r.Record != "" {
rrPresent = true
@@ -134,7 +119,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
if rrPresent && m.rw == nil {
return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set")
}
if arPresent && notifier.GetTargets() == nil {
if arPresent && m.notifiers == nil {
return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set")
}
@@ -161,22 +146,25 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
}
}
for _, ng := range groupsRegistry {
m.startGroup(ctx, ng, restore)
if err := m.startGroup(ctx, ng, restore); err != nil {
m.groupsMu.Unlock()
return err
}
}
m.groupsMu.Unlock()
if len(toUpdate) > 0 {
var wg sync.WaitGroup
for _, item := range toUpdate {
oldG := item.old
newG := item.new
wg.Go(func() {
// cancel evaluation so the Update will be applied as fast as possible.
// it is important to call InterruptEval before the update, because cancel fn
// can be re-assigned during the update.
oldG.InterruptEval()
oldG.UpdateWith(newG)
})
wg.Add(1)
// cancel evaluation so the Update will be applied as fast as possible.
// it is important to call InterruptEval before the update, because cancel fn
// can be re-assigned during the update.
item.old.InterruptEval()
go func(oldGroup *rule.Group, newGroup *rule.Group) {
oldGroup.UpdateWith(newGroup)
wg.Done()
}(item.old, item.new)
}
wg.Wait()
}

View File

@@ -40,11 +40,10 @@ func TestManagerEmptyRulesDir(t *testing.T) {
// execution of configuration update.
// Should be executed with -race flag
func TestManagerUpdateConcurrent(t *testing.T) {
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
m := &manager{
groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
}
paths := []string{
"config/testdata/dir/rules0-good.rules",
@@ -65,11 +64,13 @@ func TestManagerUpdateConcurrent(t *testing.T) {
const workers = 500
const iterations = 10
var wg sync.WaitGroup
for n := range workers {
wg.Go(func() {
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func(n int) {
defer wg.Done()
r := rand.New(rand.NewSource(int64(n)))
for range iterations {
for i := 0; i < iterations; i++ {
rnd := r.Intn(len(paths))
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
if err != nil { // update can fail and this is expected
@@ -77,7 +78,7 @@ func TestManagerUpdateConcurrent(t *testing.T) {
}
_ = m.update(context.Background(), cfg, false)
}
})
}(i)
}
wg.Wait()
}
@@ -126,9 +127,8 @@ func TestManagerUpdate_Success(t *testing.T) {
m := &manager{
groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
}
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
cfgInit := loadCfg(t, []string{initPath}, true, true)
if err := m.update(ctx, cfgInit, false); err != nil {
@@ -259,7 +259,7 @@ func compareGroups(t *testing.T, a, b *rule.Group) {
for i, r := range a.Rules {
got, want := r, b.Rules[i]
if a.CreateID() != b.CreateID() {
t.Fatalf("expected to have rule %d; got %d", want.ID(), got.ID())
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
}
if err := rule.CompareRules(t, want, got); err != nil {
t.Fatalf("comparison error: %s", err)
@@ -277,8 +277,7 @@ func TestManagerUpdate_Failure(t *testing.T) {
rw: rw,
}
if notifiers != nil {
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
m.notifiers = func() []notifier.Notifier { return notifiers }
}
err := m.update(context.Background(), []config.Group{cfg}, false)
if err == nil {

View File

@@ -80,15 +80,14 @@ func (as AlertState) String() string {
// AlertTplData is used to execute templating
type AlertTplData struct {
Type string
Labels map[string]string
Value float64
Expr string
AlertID uint64
GroupID uint64
ActiveAt time.Time
For time.Duration
IsPartial bool
Type string
Labels map[string]string
Value float64
Expr string
AlertID uint64
GroupID uint64
ActiveAt time.Time
For time.Duration
}
var tplHeaders = []string{
@@ -102,7 +101,6 @@ var tplHeaders = []string{
"{{ $groupID := .GroupID }}",
"{{ $activeAt := .ActiveAt }}",
"{{ $for := .For }}",
"{{ $isPartial := .IsPartial }}",
}
// ExecTemplate executes the Alert template for given
@@ -168,8 +166,8 @@ func templateAnnotations(annotations map[string]string, data AlertTplData, tmpl
ctmpl, _ := tmpl.Clone()
ctmpl = ctmpl.Option("missingkey=zero")
if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil {
r[key] = err.Error()
eg.Add(fmt.Errorf("(key: %q, value: %q): %w", key, text, err))
r[key] = text
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
continue
}
r[key] = buf.String()
@@ -186,13 +184,13 @@ type tplData struct {
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
tpl, err := tpl.Parse(text)
if err != nil {
return fmt.Errorf("error parsing template: %w", err)
return fmt.Errorf("error parsing annotation template: %w", err)
}
if !execute {
return nil
}
if err = tpl.Execute(dst, data); err != nil {
return fmt.Errorf("error evaluating template: %w", err)
return fmt.Errorf("error evaluating annotation template: %w", err)
}
return nil
}

View File

@@ -20,7 +20,7 @@ func TestAlertExecTemplate(t *testing.T) {
)
extLabels["cluster"] = extCluster
extLabels["dc"] = extDC
err := Init(extLabels, extURL)
_, err := Init(nil, extLabels, extURL)
checkErr(t, err)
f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) {

View File

@@ -3,7 +3,6 @@ package notifier
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"net/http"
@@ -14,6 +13,7 @@ import (
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
@@ -22,11 +22,10 @@ import (
// AlertManager represents integration provider with Prometheus alert manager
// https://github.com/prometheus/alertmanager
type AlertManager struct {
addr *url.URL
argFunc AlertURLGenerator
client *http.Client
timeout time.Duration
lastError string
addr *url.URL
argFunc AlertURLGenerator
client *http.Client
timeout time.Duration
authCfg *promauth.Config
// stores already parsed RelabelConfigs object
@@ -72,42 +71,24 @@ func (am AlertManager) Addr() string {
return am.addr.Redacted()
}
func (am *AlertManager) LastError() string {
return am.lastError
}
// Send an alert or resolve message
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
if len(alerts) != len(alertLabels) {
return fmt.Errorf("mismatched number of alerts and label sets after global alert relabeling")
}
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[string]string) error {
am.metrics.alertsSent.Add(len(alerts))
startTime := time.Now()
err := am.send(ctx, alerts, alertLabels, headers)
err := am.send(ctx, alerts, headers)
am.metrics.alertsSendDuration.UpdateDuration(startTime)
if err != nil {
// the context can be cancelled on graceful shutdown
// or on group update. So no need to handle the error as usual.
if errors.Is(err, context.Canceled) {
return nil
}
am.metrics.alertsSendErrors.Add(len(alerts))
am.lastError = err.Error()
} else {
am.lastError = ""
}
return err
}
func (am *AlertManager) send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[string]string) error {
b := &bytes.Buffer{}
alertsToSend := make([]Alert, 0, len(alerts))
lblss := make([][]prompb.Label, 0, len(alerts))
for i, a := range alerts {
lbls := alertLabels[i]
if am.relabelConfigs != nil {
lbls = am.relabelConfigs.Apply(lbls, 0)
}
for _, a := range alerts {
lbls := a.applyRelabelingIfNeeded(am.relabelConfigs)
if len(lbls) == 0 {
continue
}
@@ -171,6 +152,11 @@ const alertManagerPath = "/api/v2/alerts"
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
) (*AlertManager, error) {
if err := httputil.CheckURL(alertManagerURL); err != nil {
return nil, fmt.Errorf("invalid alertmanager URL: %w", err)
}
tls := &promauth.TLSConfig{}
if authCfg.TLSConfig != nil {
tls = authCfg.TLSConfig

View File

@@ -11,7 +11,6 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)
@@ -146,11 +145,11 @@ func TestAlertManager_Send(t *testing.T) {
t.Fatalf("unexpected error: %s", err)
}
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
t.Fatalf("expected connection error got nil")
}
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
t.Fatalf("expected wrong http code error got nil")
}
@@ -161,7 +160,7 @@ func TestAlertManager_Send(t *testing.T) {
End: time.Now().UTC(),
Labels: map[string]string{"alertname": "alert0"},
Annotations: map[string]string{"a": "b", "c": "d"},
}}, [][]prompb.Label{{{Name: "alertname", Value: "alert0"}}}, map[string]string{headerKey: "bar"}); err != nil {
}}, map[string]string{headerKey: "bar"}); err != nil {
t.Fatalf("unexpected error %s", err)
}
@@ -175,7 +174,7 @@ func TestAlertManager_Send(t *testing.T) {
Name: "alert2",
Labels: map[string]string{"rule": "test", "tenant": "1"},
},
}, [][]prompb.Label{{{Name: "rule", Value: "test"}, {Name: "tenant", Value: "0"}}, {{Name: "rule", Value: "test"}, {Name: "tenant", Value: "1"}}}, map[string]string{headerKey: "bar"}); err != nil {
}, map[string]string{headerKey: "bar"}); err != nil {
t.Fatalf("unexpected error %s", err)
}
@@ -188,7 +187,7 @@ func TestAlertManager_Send(t *testing.T) {
Name: "alert2",
Labels: map[string]string{},
},
}, [][]prompb.Label{{{Name: "rule", Value: "test"}}, {{}}}, map[string]string{}); err != nil {
}, map[string]string{}); err != nil {
t.Fatalf("unexpected error %s", err)
}

View File

@@ -27,9 +27,15 @@ type Config struct {
// PathPrefix is added to URL path before adding alertManagerPath value
PathPrefix string `yaml:"path_prefix,omitempty"`
ConsulSDConfigs []ConsulSDConfigs `yaml:"consul_sd_configs,omitempty"`
DNSSDConfigs []DNSSDConfigs `yaml:"dns_sd_configs,omitempty"`
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
// ConsulSDConfigs contains list of settings for service discovery via Consul
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"`
// DNSSDConfigs contains list of settings for service discovery via DNS.
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"`
// StaticConfigs contains list of static targets
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
// HTTPClientConfig contains HTTP configuration for Notifier clients
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
@@ -56,29 +62,14 @@ type Config struct {
parsedAlertRelabelConfigs *promrelabel.ParsedConfigs
}
// staticConfig contains list of static targets in the following form:
// StaticConfig contains list of static targets in the following form:
//
// targets:
// [ - '<host>' ]
type StaticConfig struct {
Targets []string `yaml:"targets"`
// HTTPClientConfig contains HTTP configuration for the Targets
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
}
// ConsulSDConfigs contains list of settings for service discovery via Consul,
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
type ConsulSDConfigs struct {
consul.SDConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
}
// DNSSDConfigs contains list of settings for service discovery via DNS,
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
type DNSSDConfigs struct {
dns.SDConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@@ -104,31 +95,6 @@ func (cfg *Config) UnmarshalYAML(unmarshal func(any) error) error {
}
cfg.parsedAlertRelabelConfigs = arCfg
for _, s := range cfg.StaticConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in static_config: %w", err)
}
}
}
for _, s := range cfg.ConsulSDConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in consul_sd_config: %w", err)
}
}
}
for _, s := range cfg.DNSSDConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in dns_sd_config: %w", err)
}
}
}
b, err := yaml.Marshal(cfg)
if err != nil {
return fmt.Errorf("failed to marshal configuration for checksum: %w", err)

View File

@@ -35,6 +35,4 @@ func TestParseConfig_Failure(t *testing.T) {
f("testdata/unknownFields.bad.yaml", "unknown field")
f("non-existing-file", "error reading")
f("testdata/consul.bad.yaml", "failed to parse alert_relabel_configs in consul_sd_config")
f("testdata/dns.bad.yaml", "failed to parse alert relabeling config")
}

View File

@@ -8,7 +8,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
@@ -29,7 +28,11 @@ type configWatcher struct {
targets map[TargetType][]Target
}
func newWatcher(cfg *Config, gen AlertURLGenerator) (*configWatcher, error) {
func newWatcher(path string, gen AlertURLGenerator) (*configWatcher, error) {
cfg, err := parseConfig(path)
if err != nil {
return nil, err
}
cw := &configWatcher{
cfg: cfg,
wg: sync.WaitGroup{},
@@ -85,15 +88,18 @@ func (cw *configWatcher) reload(path string) error {
return cw.start()
}
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn getTargets) error {
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
for _, err := range errors {
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
}
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
cw.wg.Go(func() {
cw.wg.Add(1)
go func() {
defer cw.wg.Done()
ticker := time.NewTicker(interval)
defer ticker.Stop()
@@ -103,77 +109,62 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn
return
case <-ticker.C:
}
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
for _, err := range errors {
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
}
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
}
})
}()
return nil
}
type targetMetadata struct {
*promutil.Labels
alertRelabelConfigs *promrelabel.ParsedConfigs
}
func getTargetMetadata(targetsFn getTargets, cfg *Config) (map[string]targetMetadata, []error) {
metaLabelsList, alertRelabelCfgs, err := targetsFn()
func getTargetMetadata(labelsFn getLabels, cfg *Config) (map[string]*promutil.Labels, []error) {
metaLabels, err := labelsFn()
if err != nil {
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
}
targetMts := make(map[string]targetMetadata, len(metaLabelsList))
targetMetadata := make(map[string]*promutil.Labels, len(metaLabels))
var errors []error
duplicates := make(map[string]struct{})
for i := range metaLabelsList {
metaLabels := metaLabelsList[i]
alertRelabelCfg := alertRelabelCfgs[i]
for _, labels := range metaLabels {
target := labels.Get("__address__")
u, processedLabels, err := parseLabels(target, labels, cfg)
if err != nil {
errors = append(errors, err)
continue
}
if len(u) == 0 {
continue
}
// check for duplicated targets
// targets with same address but different alert_relabel_configs are still considered duplicates since it's mostly due to misconfiguration and could cause duplicated notifications.
if _, ok := duplicates[u]; ok {
if !*suppressDuplicateTargetErrors {
logger.Errorf("skipping duplicate target with identical address %q; "+
"make sure service discovery and relabeling is set up properly; "+
"original labels: %s; resulting labels: %s",
u, labels, processedLabels)
}
continue
}
duplicates[u] = struct{}{}
targetMts[u] = targetMetadata{
Labels: processedLabels,
alertRelabelConfigs: alertRelabelCfg,
}
for _, labels := range metaLabels {
target := labels.Get("__address__")
u, processedLabels, err := parseLabels(target, labels, cfg)
if err != nil {
errors = append(errors, err)
continue
}
if len(u) == 0 {
continue
}
if _, ok := duplicates[u]; ok { // check for duplicates
if !*suppressDuplicateTargetErrors {
logger.Errorf("skipping duplicate target with identical address %q; "+
"make sure service discovery and relabeling is set up properly; "+
"original labels: %s; resulting labels: %s",
u, labels, processedLabels)
}
continue
}
duplicates[u] = struct{}{}
targetMetadata[u] = processedLabels
}
return targetMts, errors
return targetMetadata, errors
}
type getTargets func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error)
type getLabels func() ([]*promutil.Labels, error)
func (cw *configWatcher) start() error {
if len(cw.cfg.StaticConfigs) > 0 {
var targets []Target
for i, cfg := range cw.cfg.StaticConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.StaticConfigs[i].AlertRelabelConfigs)
for _, cfg := range cw.cfg.StaticConfigs {
httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig)
for _, target := range cfg.Targets {
address, labels, err := parseLabels(target, nil, cw.cfg)
if err != nil {
return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
}
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, alertRelabelConfig, cw.cfg.Timeout.Duration())
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
if err != nil {
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
}
@@ -187,20 +178,17 @@ func (cw *configWatcher) start() error {
}
if len(cw.cfg.ConsulSDConfigs) > 0 {
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
var labels [][]*promutil.Labels
var alertRelabelConfigs []*promrelabel.ParsedConfigs
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]*promutil.Labels, error) {
var labels []*promutil.Labels
for i := range cw.cfg.ConsulSDConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.ConsulSDConfigs[i].AlertRelabelConfigs)
sdc := &cw.cfg.ConsulSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
if err != nil {
return nil, nil, fmt.Errorf("got labels err: %w", err)
return nil, fmt.Errorf("got labels err: %w", err)
}
labels = append(labels, targetLabels)
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
labels = append(labels, targetLabels...)
}
return labels, alertRelabelConfigs, nil
return labels, nil
})
if err != nil {
return fmt.Errorf("failed to start consulSD discovery: %w", err)
@@ -208,21 +196,17 @@ func (cw *configWatcher) start() error {
}
if len(cw.cfg.DNSSDConfigs) > 0 {
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
var labels [][]*promutil.Labels
var alertRelabelConfigs []*promrelabel.ParsedConfigs
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]*promutil.Labels, error) {
var labels []*promutil.Labels
for i := range cw.cfg.DNSSDConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.DNSSDConfigs[i].AlertRelabelConfigs)
sdc := &cw.cfg.DNSSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
if err != nil {
return nil, nil, fmt.Errorf("got labels err: %w", err)
return nil, fmt.Errorf("got labels err: %w", err)
}
labels = append(labels, targetLabels)
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
labels = append(labels, targetLabels...)
}
return labels, alertRelabelConfigs, nil
return labels, nil
})
if err != nil {
return fmt.Errorf("failed to start DNSSD discovery: %w", err)
@@ -256,30 +240,30 @@ func (cw *configWatcher) setTargets(key TargetType, targets []Target) {
cw.targetsMu.Unlock()
}
func (cw *configWatcher) updateTargets(key TargetType, targetMts map[string]targetMetadata, cfg *Config, genFn AlertURLGenerator) {
func (cw *configWatcher) updateTargets(key TargetType, targetMetadata map[string]*promutil.Labels, cfg *Config, genFn AlertURLGenerator) {
cw.targetsMu.Lock()
defer cw.targetsMu.Unlock()
oldTargets := cw.targets[key]
var updatedTargets []Target
for _, ot := range oldTargets {
if _, ok := targetMts[ot.Addr()]; !ok {
if _, ok := targetMetadata[ot.Addr()]; !ok {
// if target not exists in currentTargets, close it
ot.Close()
} else {
updatedTargets = append(updatedTargets, ot)
delete(targetMts, ot.Addr())
delete(targetMetadata, ot.Addr())
}
}
// create new resources for the new targets
for addr, metadata := range targetMts {
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, metadata.alertRelabelConfigs, cfg.Timeout.Duration())
for addr, labels := range targetMetadata {
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
if err != nil {
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
continue
}
updatedTargets = append(updatedTargets, Target{
Notifier: am,
Labels: metadata.Labels,
Labels: labels,
})
}

View File

@@ -7,7 +7,6 @@ import (
"net/http/httptest"
"os"
"sync"
"sync/atomic"
"testing"
"time"
@@ -29,11 +28,7 @@ static_configs:
- localhost:9093
- localhost:9094
`)
cfg, err := parseConfig(f.Name())
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
cw, err := newWatcher(f.Name(), nil)
if err != nil {
t.Fatalf("failed to start config watcher: %s", err)
}
@@ -88,64 +83,33 @@ consul_sd_configs:
- server: %s
services:
- alertmanager
- server: %s
services:
- alertmanager
alert_relabel_configs:
- target_label: "foo"
replacement: "tar"
`, consulSDServer.URL, consulSDServer.URL))
`, consulSDServer.URL))
cfg, err := parseConfig(consulSDFile.Name())
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
cw, err := newWatcher(consulSDFile.Name(), nil)
if err != nil {
t.Fatalf("failed to start config watcher: %s", err)
}
defer cw.mustStop()
if len(cw.notifiers()) != 3 {
t.Fatalf("expected to get 3 notifiers; got %d", len(cw.notifiers()))
if len(cw.notifiers()) != 2 {
t.Fatalf("expected to get 2 notifiers; got %d", len(cw.notifiers()))
}
expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1)
expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2)
expAddr3 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService3)
n1, n2, n3 := cw.notifiers()[0], cw.notifiers()[1], cw.notifiers()[2]
n1, n2 := cw.notifiers()[0], cw.notifiers()[1]
if n1.Addr() != expAddr1 {
t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr())
}
if n2.Addr() != expAddr2 {
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
}
if n3.Addr() != expAddr3 {
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
}
if n1.(*AlertManager).relabelConfigs.String() != "" {
t.Fatalf("unexpected relabel configs: %q", n1.(*AlertManager).relabelConfigs.String())
}
if n2.(*AlertManager).relabelConfigs.String() != "" {
t.Fatalf("unexpected relabel configs: %q", n2.(*AlertManager).relabelConfigs.String())
}
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
}
f := func() bool { return len(cw.notifiers()) == 1 }
if !waitFor(f, time.Second) {
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
}
n3 = cw.notifiers()[0]
if n3.Addr() != expAddr3 {
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
}
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
}
}
// TestConfigWatcherReloadConcurrent supposed to test concurrent
@@ -200,11 +164,7 @@ consul_sd_configs:
"unknownFields.bad.yaml",
}
cfg, err := parseConfig(paths[0])
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
cw, err := newWatcher(paths[0], nil)
if err != nil {
t.Fatalf("failed to start config watcher: %s", err)
}
@@ -212,16 +172,18 @@ consul_sd_configs:
const workers = 500
const iterations = 10
var wg sync.WaitGroup
for n := range workers {
wg.Go(func() {
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func(n int) {
defer wg.Done()
r := rand.New(rand.NewSource(int64(n)))
for range iterations {
for i := 0; i < iterations; i++ {
rnd := r.Intn(len(paths))
_ = cw.reload(paths[rnd]) // update can fail and this is expected
_ = cw.notifiers()
}
})
}(i)
}
wg.Wait()
}
@@ -240,11 +202,10 @@ func checkErr(t *testing.T, err error) {
const (
fakeConsulService1 = "127.0.0.1:9093"
fakeConsulService2 = "127.0.0.1:9095"
fakeConsulService3 = "127.0.0.1:9097"
)
func newFakeConsulServer() *httptest.Server {
var requestCount atomic.Int32
requestCount := 0
mux := http.NewServeMux()
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
@@ -259,7 +220,7 @@ func newFakeConsulServer() *httptest.Server {
}`))
})
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
if requestCount.Load() == 0 {
if requestCount == 0 {
rw.Header().Set("X-Consul-Index", "1")
rw.Write([]byte(`
[
@@ -399,7 +360,7 @@ func newFakeConsulServer() *httptest.Server {
}
]`))
}
requestCount.Add(1)
requestCount++
})
return httptest.NewServer(mux)

View File

@@ -5,8 +5,6 @@ import (
"fmt"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
// FakeNotifier is a mock notifier
@@ -17,32 +15,14 @@ type FakeNotifier struct {
counter int
}
// InitFakeNotifier initializes global notifier to FakeNotifier,
// and returns a cleanup function to restore the original getActiveNotifiers.
func InitFakeNotifier() (*FakeNotifier, func()) {
originalGetActiveNotifiers := getActiveNotifiers
fn := &FakeNotifier{}
getActiveNotifiers = func() []Notifier {
return []Notifier{fn}
}
return fn, func() {
getActiveNotifiers = originalGetActiveNotifiers
}
}
// Close does nothing
func (*FakeNotifier) Close() {}
// LastError returns last error message
func (*FakeNotifier) LastError() string {
return ""
}
// Addr returns ""
func (*FakeNotifier) Addr() string { return "" }
// Send sets alerts and increases counter
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error {
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error {
fn.Lock()
defer fn.Unlock()
fn.counter += len(alerts)

View File

@@ -1,22 +1,14 @@
package notifier
import (
"context"
"flag"
"fmt"
"net/url"
"strconv"
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
)
@@ -65,61 +57,11 @@ var (
sendTimeout = flagutil.NewArrayDuration("notifier.sendTimeout", 10*time.Second, "Timeout when sending alerts to the corresponding -notifier.url")
)
// AlertURLGeneratorFn returns a URL to the passed alert object.
// Call InitAlertURLGeneratorFn before using this function.
var AlertURLGeneratorFn AlertURLGenerator
// InitAlertURLGeneratorFn populates AlertURLGeneratorFn
func InitAlertURLGeneratorFn(externalURL *url.URL, externalAlertSource string, validateTemplate bool) error {
if externalAlertSource == "" {
AlertURLGeneratorFn = func(a Alert) string {
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, "group_id", gID, "alert_id", aID)
}
return nil
}
if validateTemplate {
if err := ValidateTemplates(map[string]string{
"tpl": externalAlertSource,
}); err != nil {
return fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
}
}
m := map[string]string{
"tpl": externalAlertSource,
}
AlertURLGeneratorFn = func(alert Alert) string {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
}
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
if err != nil {
logger.Errorf("cannot template alert source: %s", err)
}
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
}
return nil
}
var (
// getActiveNotifiers returns the current list of Notifier objects.
getActiveNotifiers func() []Notifier
// globalRelabelCfg stores the parsed alert relabeling config from the config file if there is
globalRelabelCfg *promrelabel.ParsedConfigs
// cw holds a configWatcher for configPath configuration file
// configWatcher provides a list of Notifier objects discovered
// from static config or via service discovery.
// cw is not nil only if configPath is provided.
cw *configWatcher
// externalLabels is a global variable for holding external labels configured via flags
// It is supposed to be inited via Init function only.
externalLabels map[string]string
// externalURL is a global variable for holding external URL value configured via flag
// It is supposed to be inited via Init function only.
externalURL string
)
// cw holds a configWatcher for configPath configuration file
// configWatcher provides a list of Notifier objects discovered
// from static config or via service discovery.
// cw is not nil only if configPath is provided.
var cw *configWatcher
// Reload checks the changes in configPath configuration file
// and applies changes if any.
@@ -130,62 +72,66 @@ func Reload() error {
return cw.reload(*configPath)
}
var staticNotifiersFn func() []Notifier
var (
// externalLabels is a global variable for holding external labels configured via flags
// It is supposed to be inited via Init function only.
externalLabels map[string]string
// externalURL is a global variable for holding external URL value configured via flag
// It is supposed to be inited via Init function only.
externalURL string
)
// Init returns a function for retrieving actual list of Notifier objects.
// Init works in two mods:
// - configuration via flags (for backward compatibility). Is always static
// and don't support live reloads.
// - configuration via file. Supports live reloads and service discovery.
//
// Init returns an error if both mods are used.
func Init(extLabels map[string]string, extURL string) error {
func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (func() []Notifier, error) {
externalURL = extURL
externalLabels = extLabels
_, err := url.Parse(externalURL)
if err != nil {
return fmt.Errorf("failed to parse external URL: %w", err)
return nil, fmt.Errorf("failed to parse external URL: %w", err)
}
if *blackHole {
if len(*addrs) > 0 || *configPath != "" {
return fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
return nil, fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
}
notifier := newBlackHoleNotifier()
getActiveNotifiers = func() []Notifier {
staticNotifiersFn = func() []Notifier {
return []Notifier{notifier}
}
return nil
return staticNotifiersFn, nil
}
if *configPath == "" && len(*addrs) == 0 {
return nil
return nil, nil
}
if *configPath != "" && len(*addrs) > 0 {
return fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
return nil, fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
}
if len(*addrs) > 0 {
notifiers, err := notifiersFromFlags(AlertURLGeneratorFn)
notifiers, err := notifiersFromFlags(gen)
if err != nil {
return fmt.Errorf("failed to create notifier from flag values: %w", err)
return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
}
getActiveNotifiers = func() []Notifier {
staticNotifiersFn = func() []Notifier {
return notifiers
}
return nil
return staticNotifiersFn, nil
}
cfg, err := parseConfig(*configPath)
cw, err = newWatcher(*configPath, gen)
if err != nil {
return err
return nil, fmt.Errorf("failed to init config watcher: %w", err)
}
if cfg.AlertRelabelConfigs != nil {
globalRelabelCfg = cfg.parsedAlertRelabelConfigs
}
cw, err = newWatcher(cfg, AlertURLGeneratorFn)
if err != nil {
return fmt.Errorf("failed to init config watcher: %w", err)
}
getActiveNotifiers = cw.notifiers
return nil
return cw.notifiers, nil
}
// InitSecretFlags must be called after flag.Parse and before any logging
@@ -229,9 +175,6 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
Headers: []string{headers.GetOptionalArg(i)},
}
if err := httputil.CheckURL(addr); err != nil {
return nil, fmt.Errorf("invalid notifier.url %q: %w", addr, err)
}
addr = strings.TrimSuffix(addr, "/")
am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i))
if err != nil {
@@ -263,58 +206,23 @@ const (
// GetTargets returns list of static or discovered targets
// via notifier configuration.
//
// Must be called after Init.
func GetTargets() map[TargetType][]Target {
if getActiveNotifiers == nil {
return nil
var targets = make(map[TargetType][]Target)
if staticNotifiersFn != nil {
for _, ns := range staticNotifiersFn() {
targets[TargetStatic] = append(targets[TargetStatic], Target{
Notifier: ns,
})
}
}
targets := make(map[TargetType][]Target)
// use cached targets from configWatcher instead of getActiveNotifiers for the extra target labels
if cw != nil {
cw.targetsMu.RLock()
for key, ns := range cw.targets {
targets[key] = append(targets[key], ns...)
}
cw.targetsMu.RUnlock()
return targets
}
// static notifiers don't have labels
for _, ns := range getActiveNotifiers() {
targets[TargetStatic] = append(targets[TargetStatic], Target{
Notifier: ns,
})
}
return targets
}
// Send sends alerts to all active notifiers
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) chan error {
alertsToSend := make([]Alert, 0, len(alerts))
lblss := make([][]prompb.Label, 0, len(alerts))
// apply global relabel config first without modifying original alerts in alerts
for _, a := range alerts {
lbls := a.applyRelabelingIfNeeded(globalRelabelCfg)
if len(lbls) == 0 {
continue
}
alertsToSend = append(alertsToSend, a)
lblss = append(lblss, lbls)
}
wg := sync.WaitGroup{}
activeNotifiers := getActiveNotifiers()
errCh := make(chan error, len(activeNotifiers))
defer close(errCh)
for i := range activeNotifiers {
nt := activeNotifiers[i]
wg.Go(func() {
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
errCh <- fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err)
}
})
}
wg.Wait()
return errCh
}

View File

@@ -1,17 +1,9 @@
package notifier
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"os"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
)
func TestInit(t *testing.T) {
@@ -20,13 +12,14 @@ func TestInit(t *testing.T) {
*addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"}
err := Init(nil, "")
fn, err := Init(nil, nil, "")
if err != nil {
t.Fatalf("%s", err)
}
if len(getActiveNotifiers()) != 2 {
t.Fatalf("expected to get 2 notifiers; got %d", len(getActiveNotifiers()))
nfs := fn()
if len(nfs) != 2 {
t.Fatalf("expected to get 2 notifiers; got %d", len(nfs))
}
targets := GetTargets()
@@ -55,22 +48,19 @@ func TestInitNegative(t *testing.T) {
*blackHole = oldBlackHole
}()
f := func(path string, addr []string, bh bool) {
f := func(path, addr string, bh bool) {
*configPath = path
*addrs = flagutil.ArrayString(addr)
*addrs = flagutil.ArrayString{addr}
*blackHole = bh
if err := Init(nil, ""); err == nil {
if _, err := Init(nil, nil, ""); err == nil {
t.Fatalf("expected to get error; got nil instead")
}
}
// *configPath, *addrs and *blackhole are mutually exclusive
f("/dummy/path", []string{"127.0.0.1"}, false)
f("/dummy/path", []string{}, true)
f("", []string{"127.0.0.1"}, true)
// addr cannot be ""
f("", []string{""}, false)
f("", []string{"127.0.0.1", ""}, false)
f("/dummy/path", "127.0.0.1", false)
f("/dummy/path", "", true)
f("", "127.0.0.1", true)
}
func TestBlackHole(t *testing.T) {
@@ -79,13 +69,14 @@ func TestBlackHole(t *testing.T) {
*blackHole = true
err := Init(nil, "")
fn, err := Init(nil, nil, "")
if err != nil {
t.Fatalf("%s", err)
}
if len(getActiveNotifiers()) != 1 {
t.Fatalf("expected to get 1 notifier; got %d", len(getActiveNotifiers()))
nfs := fn()
if len(nfs) != 1 {
t.Fatalf("expected to get 1 notifier; got %d", len(nfs))
}
targets := GetTargets()
@@ -100,114 +91,3 @@ func TestBlackHole(t *testing.T) {
t.Fatalf("expected to get \"blackhole\"; got %q instead", nf1.Addr())
}
}
func TestGetAlertURLGenerator(t *testing.T) {
oldAlertURLGeneratorFn := AlertURLGeneratorFn
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
testAlert := Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
u, _ := url.Parse("https://victoriametrics.com/path")
err := InitAlertURLGeneratorFn(u, "", false)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", "group_id", "alert_id")
if exp != AlertURLGeneratorFn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
}
err = InitAlertURLGeneratorFn(nil, "foo?{{invalid}}", true)
if err == nil {
t.Fatalf("expected template validation error got nil")
}
err = InitAlertURLGeneratorFn(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != AlertURLGeneratorFn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
}
}
func TestSendAlerts(t *testing.T) {
oldAlertURLGeneratorFn := AlertURLGeneratorFn
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
AlertURLGeneratorFn = func(alert Alert) string {
return ""
}
mux := http.NewServeMux()
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
t.Fatalf("should not be called")
})
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
var a []struct {
Labels map[string]string `json:"labels"`
}
if err := json.NewDecoder(r.Body).Decode(&a); err != nil {
t.Fatalf("can not unmarshal data into alert %s", err)
}
if len(a) != 2 {
t.Fatalf("expected 2 alert in array got %d", len(a))
}
if len(a[0].Labels) != 4 {
t.Fatalf("expected 4 labels got %d", len(a[0].Labels))
}
if a[0].Labels["env"] != "prod" {
t.Fatalf("expected env label to be prod during relabeling, got %s", a[0].Labels["env"])
}
if a[0].Labels["c"] != "baz" {
t.Fatalf("expected c label to be baz during relabeling, got %s", a[0].Labels["c"])
}
if len(a[1].Labels) != 1 {
t.Fatalf("expected 1 labels got %d", len(a[1].Labels))
}
})
srv := httptest.NewServer(mux)
defer srv.Close()
f, err := os.CreateTemp("", "")
if err != nil {
t.Fatal(err)
}
defer fs.MustRemovePath(f.Name())
rawConfig := `
static_configs:
- targets:
- %s
alert_relabel_configs:
- source_labels: [b]
target_label: "c"
alert_relabel_configs:
- source_labels: [a]
target_label: "b"
- target_label: "env"
replacement: "prod"
`
config := fmt.Sprintf(rawConfig, srv.URL+alertManagerPath)
writeToFile(f.Name(), config)
oldConfigPath := configPath
defer func() { configPath = oldConfigPath }()
*configPath = f.Name()
err = Init(nil, "")
if err != nil {
t.Fatalf("unexpected error when parse notifier config: %s", err)
}
firingAlerts := []Alert{
{
Name: "alert1",
Labels: map[string]string{"a": "baz"},
},
{
Name: "alert2",
Labels: map[string]string{},
},
}
errG := Send(context.Background(), firingAlerts, nil)
for err := range errG {
if err != nil {
t.Errorf("unexpected error when sending alerts: %s", err)
}
}
}

View File

@@ -1,21 +1,15 @@
package notifier
import (
"context"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
import "context"
// Notifier is a common interface for alert manager provider
type Notifier interface {
// Send sends the given list of alerts.
// Returns an error if fails to send the alerts.
// Must unblock if the given ctx is cancelled.
Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, notifierHeaders map[string]string) error
Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) error
// Addr returns address where alerts are sent.
Addr() string
// LastError returns error, that occured during last attempt to send data
LastError() string
// Close is a destructor for the Notifier
Close()
}

View File

@@ -1,10 +1,6 @@
package notifier
import (
"context"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
import "context"
// blackHoleNotifier is a Notifier stub, used when no notifications need
// to be sent.
@@ -14,7 +10,7 @@ type blackHoleNotifier struct {
}
// Send will send no notifications, but increase the metric.
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error { //nolint:revive
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error { //nolint:revive
bh.metrics.alertsSent.Add(len(alerts))
return nil
}
@@ -29,11 +25,6 @@ func (bh *blackHoleNotifier) Close() {
bh.metrics.close()
}
// LastError return last notifier's error
func (bh *blackHoleNotifier) LastError() string {
return ""
}
// newBlackHoleNotifier creates a new blackHoleNotifier
func newBlackHoleNotifier() *blackHoleNotifier {
address := "blackhole"

View File

@@ -5,7 +5,6 @@ import (
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
metricset "github.com/VictoriaMetrics/metrics"
)
@@ -17,7 +16,7 @@ func TestBlackHoleNotifier_Send(t *testing.T) {
Start: time.Now().UTC(),
End: time.Now().UTC(),
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
}}, [][]prompb.Label{{}}, nil); err != nil {
}}, nil); err != nil {
t.Fatalf("unexpected error %s", err)
}
@@ -35,7 +34,7 @@ func TestBlackHoleNotifier_Close(t *testing.T) {
Start: time.Now().UTC(),
End: time.Now().UTC(),
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
}}, [][]prompb.Label{{}}, nil); err != nil {
}}, nil); err != nil {
t.Fatalf("unexpected error %s", err)
}

View File

@@ -1,19 +0,0 @@
consul_sd_configs:
- server: localhost:8500
scheme: http
services:
- alertmanager
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "prod"
- server: localhost:8500
services:
- consul
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "(abc"
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"

View File

@@ -1,13 +0,0 @@
dns_sd_configs:
- names:
- cloudflare.com
type: 'A'
port: 9093
relabel_configs:
- source_labels: [__meta_dns_name]
replacement: '${1}'
target_label: dns_name
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "(abc"

View File

@@ -2,19 +2,12 @@ static_configs:
- targets:
- localhost:9093
- localhost:9095
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "static"
consul_sd_configs:
- server: localhost:8500
scheme: http
services:
- alertmanager
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "consul"
- server: localhost:8500
services:
- consul
@@ -24,10 +17,6 @@ dns_sd_configs:
- cloudflare.com
type: 'A'
port: 9093
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "dns"
relabel_configs:
- source_labels: [__meta_consul_tags]
@@ -36,4 +25,4 @@ relabel_configs:
target_label: __scheme__
- source_labels: [__meta_dns_name]
replacement: '${1}'
target_label: dns_name
target_label: dns_name

View File

@@ -1,14 +1,22 @@
headers:
- 'CustomHeader: foo'
static_configs:
- targets:
- http://192.168.0.101:9093
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"
- localhost:9093
- localhost:9095
- https://localhost:9093/test/api/v2/alerts
basic_auth:
username: foo
password: bar
- targets:
- http://192.168.0.101:9093
alert_relabel_configs:
- target_label: "foo"
replacement: "ccc"
- localhost:9096
- localhost:9097
basic_auth:
username: foo
password: baz
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"

View File

@@ -1,19 +0,0 @@
package notifier
// ApiNotifier represents a Notifier configuration for WEB view
type ApiNotifier struct {
// Kind is a Notifier type
Kind TargetType `json:"kind"`
// Targets is a list of Notifier targets
Targets []*ApiTarget `json:"targets"`
}
// ApiTarget represents a specific Notifier target for WEB view
type ApiTarget struct {
// Address is a URL for sending notifications
Address string `json:"address"`
// Labels is a list of labels to add to each sent notification
Labels map[string]string `json:"labels"`
// LastError contains the error faced while sending to notifier.
LastError string `json:"lastError"`
}

View File

@@ -14,9 +14,9 @@ import (
)
var (
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect. "+
"Remote read is used to restore alerts state. "+
"This configuration makes sense only if vmalert was configured with '-remoteWrite.url' before and has been successfully persisted its state. "+
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect."+
"Remote read is used to restore alerts state."+
"This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state. "+
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
"See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.")

View File

@@ -13,18 +13,14 @@ import (
"sync"
"time"
"github.com/cespare/xxhash/v2"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/metrics"
)
@@ -118,9 +114,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
}
for i := 0; i < cc; i++ {
c.wg.Go(func() {
c.run(ctx, i)
})
c.run(ctx)
}
return c, nil
}
@@ -162,7 +156,8 @@ func (c *Client) Close() error {
return nil
}
func (c *Client) run(ctx context.Context, id int) {
func (c *Client) run(ctx context.Context) {
ticker := time.NewTicker(c.flushInterval)
wr := &prompb.WriteRequest{}
shutdown := func() {
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
@@ -178,73 +173,42 @@ func (c *Client) run(ctx context.Context, id int) {
cancel()
}
// add jitter to spread remote write flushes over the flush interval to avoid congestion at the remote write destination
h := xxhash.Sum64(bytesutil.ToUnsafeBytes(fmt.Sprintf("%d", id)))
randJitter := uint64(float64(c.flushInterval) * (float64(h) / (1 << 64)))
timer := time.NewTimer(time.Duration(randJitter))
addJitter:
for {
select {
case <-c.doneCh:
timer.Stop()
shutdown()
return
case <-ctx.Done():
timer.Stop()
shutdown()
return
case <-timer.C:
break addJitter
}
}
ticker := time.NewTicker(c.flushInterval)
defer ticker.Stop()
for {
select {
case <-c.doneCh:
shutdown()
return
case <-ctx.Done():
shutdown()
return
case <-ticker.C:
c.flush(ctx, wr)
// drain the potential stale tick to avoid small or empty flushes after a slow flush.
c.wg.Add(1)
go func() {
defer c.wg.Done()
defer ticker.Stop()
for {
select {
case <-c.doneCh:
shutdown()
return
case <-ctx.Done():
shutdown()
return
case <-ticker.C:
default:
}
case ts, ok := <-c.input:
if !ok {
continue
}
wr.Timeseries = append(wr.Timeseries, ts)
if len(wr.Timeseries) >= c.maxBatchSize {
c.flush(ctx, wr)
case ts, ok := <-c.input:
if !ok {
continue
}
wr.Timeseries = append(wr.Timeseries, ts)
if len(wr.Timeseries) >= c.maxBatchSize {
c.flush(ctx, wr)
}
}
}
}
}()
}
var (
rwErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
rwTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
// sentRows and sentBytes are historical counters that can now be replaced by flushedRows and flushedBytes histograms. They may be deprecated in the future after the new histograms have been adopted for some time.
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
flushedRows = metrics.NewHistogram(`vmalert_remotewrite_sent_rows`)
flushedBytes = metrics.NewHistogram(`vmalert_remotewrite_sent_bytes`)
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
remoteWriteQueueSize = metrics.NewHistogram(`vmalert_remotewrite_queue_size`)
_ = metrics.NewGauge(`vmalert_remotewrite_queue_capacity`, func() float64 {
return float64(*maxQueueSize)
})
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
return float64(*concurrency)
@@ -258,7 +222,6 @@ func GetDroppedRows() int { return int(droppedRows.Get()) }
// it to remote-write endpoint. Flush performs limited amount of retries
// if request fails.
func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
remoteWriteQueueSize.Update(float64(len(c.input)))
if len(wr.Timeseries) < 1 {
return
}
@@ -268,16 +231,16 @@ func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
data := wr.MarshalProtobuf(nil)
b := snappy.Encode(nil, data)
maxRetryInterval := *retryMaxTime
bt := timeutil.NewBackoffTimer(*retryMinInterval, maxRetryInterval)
retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
if retryInterval > maxRetryInterval {
retryInterval = maxRetryInterval
}
timeStart := time.Now()
defer func() {
sendDuration.Add(time.Since(timeStart).Seconds())
}()
attempts := 0
L:
for {
for attempts := 0; ; attempts++ {
err := c.send(ctx, b)
if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) {
// Something in the middle between client and destination might be closing
@@ -287,8 +250,6 @@ L:
if err == nil {
sentRows.Add(len(wr.Timeseries))
sentBytes.Add(len(b))
flushedRows.Update(float64(len(wr.Timeseries)))
flushedBytes.Update(float64(len(b)))
return
}
@@ -314,13 +275,13 @@ L:
break
}
if bt.CurrentDelay() > timeLeftForRetries {
bt.SetDelay(timeLeftForRetries)
if retryInterval > timeLeftForRetries {
retryInterval = timeLeftForRetries
}
// sleeping to prevent remote db hammering
bt.Wait(ctx.Done())
time.Sleep(retryInterval)
retryInterval *= 2
attempts++
}
rwErrors.Inc()

View File

@@ -44,7 +44,7 @@ func TestClient_Push(t *testing.T) {
r := rand.New(rand.NewSource(1))
const rowsN = int(1e4)
for range rowsN {
for i := 0; i < rowsN; i++ {
s := prompb.TimeSeries{
Samples: []prompb.Sample{{
Value: r.Float64(),
@@ -102,7 +102,7 @@ func TestClient_run_maxBatchSizeDuringShutdown(t *testing.T) {
}
// push time series to the client.
for range pushCnt {
for i := 0; i < pushCnt; i++ {
if err = rwClient.Push(prompb.TimeSeries{}); err != nil {
t.Fatalf("cannot time series to the client: %s", err)
}

View File

@@ -22,7 +22,7 @@ func TestDebugClient_Push(t *testing.T) {
const rowsN = 100
var sent int
for i := range rowsN {
for i := 0; i < rowsN; i++ {
s := prompb.TimeSeries{
Samples: []prompb.Sample{{
Value: float64(i),

View File

@@ -2,7 +2,6 @@ package rule
import (
"context"
"errors"
"fmt"
"hash/fnv"
"math"
@@ -188,54 +187,6 @@ func (ar *AlertingRule) ID() uint64 {
return ar.RuleID
}
// ToAPI returns ApiRule representation of ar
func (ar *AlertingRule) ToAPI() ApiRule {
state := ar.state
lastState := state.getLast()
r := ApiRule{
Type: TypeAlerting,
DatasourceType: ar.Type.String(),
Name: ar.Name,
Query: ar.Expr,
Duration: ar.For.Seconds(),
KeepFiringFor: ar.KeepFiringFor.Seconds(),
Labels: ar.Labels,
Annotations: ar.Annotations,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
State: "inactive",
Alerts: ar.AlertsToAPI(),
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: state.size(),
Updates: state.getAll(),
Debug: ar.Debug,
// encode as strings to avoid rounding in JSON
ID: fmt.Sprintf("%d", ar.ID()),
GroupID: fmt.Sprintf("%d", ar.GroupID),
GroupName: ar.GroupName,
File: ar.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
// satisfy apiRule.State logic
if len(r.Alerts) > 0 {
r.State = notifier.StatePending.String()
stateFiring := notifier.StateFiring.String()
for _, a := range r.Alerts {
if a.State == stateFiring {
r.State = stateFiring
break
}
}
}
return r
}
// GetAlerts returns active alerts of rule
func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
ar.alertsMu.RLock()
@@ -247,6 +198,16 @@ func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
return alerts
}
// GetAlert returns alert if id exists
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
ar.alertsMu.RLock()
defer ar.alertsMu.RUnlock()
if ar.alerts == nil {
return nil
}
return ar.alerts[id]
}
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) {
if !ar.Debug {
return
@@ -312,11 +273,6 @@ type labelSet struct {
// On k conflicts in origin set, the original value is preferred and copied
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
func (ls *labelSet) add(k, v string) {
// do not add label with empty value, since it has no meaning.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
if v == "" {
return
}
ls.processed[k] = v
ov, ok := ls.origin[k]
if !ok {
@@ -346,13 +302,14 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
ls.processed[l.Name] = l.Value
}
// labels only support limited templating variables,
// including `labels`, `value` and `expr`, to avoid breaking alert states or causing cardinality issue with results
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
Labels: ls.origin,
Value: m.Values[0],
Expr: ar.Expr,
})
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %w", err)
}
for k, v := range extraLabels {
ls.add(k, v)
}
@@ -363,7 +320,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
if !*disableAlertGroupLabel && ar.GroupName != "" {
ls.add(alertGroupNameLabel, ar.GroupName)
}
return ls, err
return ls, nil
}
// execRange executes alerting rule on the given time range similarly to exec.
@@ -384,12 +341,16 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
return []datasource.Metric{{Timestamps: []int64{0}, Values: []float64{math.NaN()}}}, nil
}
for _, s := range res.Data {
ls, err := ar.expandLabelTemplates(s, qFn)
ls, err := ar.expandLabelTemplates(s)
if err != nil {
return nil, err
}
alertID := hash(ls.processed)
a := ar.newAlert(s, time.Time{}, ls.processed, nil) // initial alert
as, err := ar.expandAnnotationTemplates(s, qFn, time.Time{}, ls)
if err != nil {
return nil, err
}
a := ar.newAlert(s, time.Time{}, ls.processed, as) // initial alert
prevT := time.Time{}
for i := range s.Values {
@@ -405,6 +366,8 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
// reset to Pending if there are gaps > EvalInterval between DPs
a.State = notifier.StatePending
a.ActiveAt = at
// re-template the annotations as active timestamp is changed
a.Annotations, _ = ar.expandAnnotationTemplates(s, qFn, at, ls)
a.Start = time.Time{}
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
a.State = notifier.StateFiring
@@ -450,7 +413,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
defer func() {
ar.state.add(curState)
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
if curState.Err != nil {
ar.metrics.errors.Inc()
}
}()
@@ -459,8 +422,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
}
isPartial := isPartialResponse(res)
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartial)
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartialResponse(res))
qFn := func(query string) ([]datasource.Metric, error) {
res, _, err := ar.q.Query(ctx, query, ts)
return res.Data, err
@@ -472,11 +434,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
expandedLabels := make([]*labelSet, len(res.Data))
expandedAnnotations := make([]map[string]string, len(res.Data))
for i, m := range res.Data {
ls, err := ar.expandLabelTemplates(m, qFn)
ls, err := ar.expandLabelTemplates(m)
if err != nil {
// only set error in current state, but do not break alert processing
curState.Err = err
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
return nil, curState.Err
}
at := ts
alertID := hash(ls.processed)
@@ -486,11 +447,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
at = a.ActiveAt
}
}
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls, isPartial)
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls)
if err != nil {
// only set error in current state, but do not break alert processing
curState.Err = err
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
return nil, curState.Err
}
expandedLabels[i] = ls
expandedAnnotations[i] = as
@@ -596,29 +556,31 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
return append(tss, ar.toTimeSeries(ts.Unix())...), nil
}
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric) (*labelSet, error) {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported in rule label")
}
ls, err := ar.toLabels(m, qFn)
if err != nil {
return ls, fmt.Errorf("failed to expand label templates: %s", err)
return nil, fmt.Errorf("failed to expand label templates: %s", err)
}
return ls, nil
}
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet, isPartial bool) (map[string]string, error) {
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet) (map[string]string, error) {
tplData := notifier.AlertTplData{
Value: m.Values[0],
Type: ar.Type.String(),
Labels: ls.origin,
Expr: ar.Expr,
AlertID: hash(ls.processed),
GroupID: ar.GroupID,
ActiveAt: activeAt,
For: ar.For,
IsPartial: isPartial,
Value: m.Values[0],
Type: ar.Type.String(),
Labels: ls.origin,
Expr: ar.Expr,
AlertID: hash(ls.processed),
GroupID: ar.GroupID,
ActiveAt: activeAt,
For: ar.For,
}
as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData)
if err != nil {
return as, fmt.Errorf("failed to expand annotation templates: %s", err)
return nil, fmt.Errorf("failed to expand annotation templates: %s", err)
}
return as, nil
}
@@ -818,9 +780,7 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
// query ALERTS_FOR_STATE at `ts-1s` instead `ts` to avoid retrieving data written in the current run,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10335
res, _, err := q.Query(ctx, expr, ts.Add(-1*time.Second))
res, _, err := q.Query(ctx, expr, ts)
if err != nil {
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
}

View File

@@ -1,106 +0,0 @@
//go:build synctest
package rule
import (
"context"
"strings"
"testing"
"testing/synctest"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
)
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
// wrap into synctest because of time manipulations
synctest.Test(t, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
ar := &AlertingRule{
Name: "TestActiveAtPreservation",
Labels: map[string]string{
"test_query_in_label": `{{ "static_value" }}`,
},
Annotations: map[string]string{
"description": "Alert active since {{ $activeAt }}",
},
alerts: make(map[uint64]*notifier.Alert),
q: fq,
state: &ruleState{
entries: make([]StateEntry, 10),
},
}
// Mock query result - return empty result to make suppress_for_mass_alert = false
// (no need to add anything to fq for empty result)
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
// First execution - creates new alert
ts1 := time.Now()
_, err := ar.exec(context.TODO(), ts1, 0)
if err != nil {
t.Fatalf("unexpected error on first exec: %s", err)
}
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
firstAlert := ar.GetAlerts()[0]
// Verify first execution: activeAt should be ts1 and annotation should reflect it
if !firstAlert.ActiveAt.Equal(ts1) {
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
}
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
}
// Second execution - should preserve activeAt in annotation
// Ensure different timestamp with different seconds
// sleep is non-blocking thanks to synctest
time.Sleep(2 * time.Second)
ts2 := time.Now()
_, err = ar.exec(context.TODO(), ts2, 0)
if err != nil {
t.Fatalf("unexpected error on second exec: %s", err)
}
// Get the alert again (should be the same alert)
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
secondAlert := ar.GetAlerts()[0]
// Critical test: activeAt should still be ts1, not ts2
if !secondAlert.ActiveAt.Equal(ts1) {
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
}
// Critical test: annotation should still contain ts1 time, not ts2
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
}
// Additional verification: annotation should NOT contain ts2 time
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
}
// Verify query template in labels still works (this would fail if query templates were broken)
if firstAlert.Labels["test_query_in_label"] != "static_value" {
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
}
})
}

View File

@@ -663,7 +663,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-pending",
Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-pending"},
Annotations: map[string]string{},
Annotations: map[string]string{"activeAt": "5000"},
State: notifier.StatePending,
ActiveAt: time.Unix(5, 0),
Value: 1,
@@ -683,7 +683,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-firing",
Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-firing"},
Annotations: map[string]string{},
Annotations: map[string]string{"activeAt": "1000"},
State: notifier.StateFiring,
ActiveAt: time.Unix(1, 0),
Start: time.Unix(5, 0),
@@ -704,7 +704,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-hold-pending",
Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-hold-pending"},
Annotations: map[string]string{},
Annotations: map[string]string{"activeAt": "5000"},
State: notifier.StatePending,
ActiveAt: time.Unix(5, 0),
Value: 1,
@@ -826,9 +826,12 @@ func TestGroup_Restore(t *testing.T) {
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
fg.Init()
wg := sync.WaitGroup{}
wg.Go(func() {
fg.Start(context.Background(), nil, fqr)
})
wg.Add(1)
go func() {
nts := func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} }
fg.Start(context.Background(), nts, nil, fqr)
wg.Done()
}()
fg.Close()
wg.Wait()
@@ -1119,7 +1122,7 @@ func TestAlertingRuleLimit_Success(t *testing.T) {
}
func TestAlertingRule_Template(t *testing.T) {
f := func(rule *AlertingRule, metrics []datasource.Metric, isResponsePartial bool, alertsExpected map[uint64]*notifier.Alert) {
f := func(rule *AlertingRule, metrics []datasource.Metric, alertsExpected map[uint64]*notifier.Alert) {
t.Helper()
fakeGroup := Group{
@@ -1132,7 +1135,6 @@ func TestAlertingRule_Template(t *testing.T) {
entries: make([]StateEntry, 10),
}
fq.Add(metrics...)
fq.SetPartialResponse(isResponsePartial)
if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil {
t.Fatalf("unexpected error: %s", err)
@@ -1163,7 +1165,7 @@ func TestAlertingRule_Template(t *testing.T) {
}, []datasource.Metric{
metricWithValueAndLabels(t, 1, "instance", "foo"),
metricWithValueAndLabels(t, 1, "instance", "bar"),
}, false, map[uint64]*notifier.Alert{
}, map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
Annotations: map[string]string{
"summary": `common: Too high connection number for "foo"`,
@@ -1192,14 +1194,14 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "{{ $labels.instance }}",
},
Annotations: map[string]string{
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}".{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}"`,
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
},
alerts: make(map[uint64]*notifier.Alert),
}, []datasource.Metric{
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
}, false, map[uint64]*notifier.Alert{
}, map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
Labels: map[string]string{
alertNameLabel: "override label",
@@ -1207,7 +1209,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "foo",
},
Annotations: map[string]string{
"summary": `first: Too high connection number for "foo".`,
"summary": `first: Too high connection number for "foo"`,
"description": `override: It is 2 connections for "foo"`,
},
},
@@ -1218,7 +1220,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "bar",
},
Annotations: map[string]string{
"summary": `second: Too high connection number for "bar".`,
"summary": `second: Too high connection number for "bar"`,
"description": `override: It is 10 connections for "bar"`,
},
},
@@ -1231,7 +1233,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "{{ $labels.instance }}",
},
Annotations: map[string]string{
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}.{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
},
alerts: make(map[uint64]*notifier.Alert),
}, []datasource.Metric{
@@ -1239,7 +1241,7 @@ func TestAlertingRule_Template(t *testing.T) {
alertNameLabel, "originAlertname",
alertGroupNameLabel, "originGroupname",
"instance", "foo"),
}, true, map[uint64]*notifier.Alert{
}, map[uint64]*notifier.Alert{
hash(map[string]string{
alertNameLabel: "OriginLabels",
"exported_alertname": "originAlertname",
@@ -1255,7 +1257,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "foo",
},
Annotations: map[string]string{
"summary": `Alert "originAlertname(originGroupname)" for instance foo. WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.`,
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
},
},
})
@@ -1370,10 +1372,8 @@ func TestAlertingRule_ToLabels(t *testing.T) {
ar := &AlertingRule{
Labels: map[string]string{
"instance": "override", // this should override instance with new value
"group": "vmalert", // this shouldn't have effect since value in metric is equal
"invalid_label": "{{ .Values.mustRuntimeFail }}",
"empty_label": "", // this should be dropped
"instance": "override", // this should override instance with new value
"group": "vmalert", // this shouldn't have effect since value in metric is equal
},
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
Name: "AlertingRulesError",
@@ -1381,11 +1381,10 @@ func TestAlertingRule_ToLabels(t *testing.T) {
}
expectedOriginLabels := map[string]string{
"instance": "0.0.0.0:8800",
"group": "vmalert",
"alertname": "ConfigurationReloadFailure",
"alertgroup": "vmalert",
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
"instance": "0.0.0.0:8800",
"group": "vmalert",
"alertname": "ConfigurationReloadFailure",
"alertgroup": "vmalert",
}
expectedProcessedLabels := map[string]string{
@@ -1395,12 +1394,11 @@ func TestAlertingRule_ToLabels(t *testing.T) {
"exported_alertname": "ConfigurationReloadFailure",
"group": "vmalert",
"alertgroup": "vmalert",
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
}
ls, err := ar.toLabels(metric, nil)
if err == nil || !strings.Contains(err.Error(), "error evaluating template") {
t.Fatalf("unexpected error %q", err.Error())
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
@@ -1431,50 +1429,3 @@ func TestAlertingRuleExec_Partial(t *testing.T) {
t.Fatalf("unexpected error: %s", err)
}
}
func TestAlertingRule_QueryTemplateInLabels(t *testing.T) {
fq := &datasource.FakeQuerier{}
fakeGroup := Group{
Name: "TestQueryTemplateInLabels",
}
ar := &AlertingRule{
Name: "test_alert",
Labels: map[string]string{
"suppress_for_mass_alert": `{{ if (printf "ALERTS{alertname='SomeAlert', alertstate='firing', device='%s'} == 1" $labels.device | query) }}true{{ else }}false{{ end }}`,
},
Annotations: map[string]string{
"summary": "Test alert with query template in labels",
},
alerts: make(map[uint64]*notifier.Alert),
}
ar.GroupID = fakeGroup.GetID()
ar.q = fq
ar.state = &ruleState{
entries: make([]StateEntry, 10),
}
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "device", "sda1"))
ts := time.Now()
_, err := ar.exec(context.TODO(), ts, 0)
if err != nil {
t.Fatalf("unexpected error with query template in labels: %s", err)
}
// Verify that the alert was created and the query template was executed
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
alert := ar.GetAlerts()[0]
suppressLabel, exists := alert.Labels["suppress_for_mass_alert"]
if !exists {
t.Fatalf("expected 'suppress_for_mass_alert' label to exist")
}
// The query template should have been executed (even if it returns false due to mock data)
if suppressLabel != "true" && suppressLabel != "false" {
t.Fatalf("expected 'suppress_for_mass_alert' label to be 'true' or 'false', got '%s'", suppressLabel)
}
}

View File

@@ -2,11 +2,11 @@ package rule
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"hash/fnv"
"maps"
"net/url"
"sync"
"time"
@@ -25,14 +25,10 @@ import (
)
var (
ruleResultsLimit = flag.Int("rule.resultsLimit", 0, "Limits the number of alerts or recording results a single rule can produce. "+
"Can be overridden by the limit option under group if specified. "+
"If exceeded, the rule will be marked with an error and all its results will be discarded. "+
"0 means no limit.")
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier.")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
"which by default is 4 times evaluationInterval of the parent group")
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+
"Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+
@@ -40,8 +36,6 @@ var (
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+
"For example, if lookback=1h then range from now() to now()-1h will be scanned.")
maxStartDelay = flag.Duration("group.maxStartDelay", 5*time.Minute, "Defines the max delay before starting the group evaluation. Group's start is artificially delayed for random duration on interval"+
" [0..min(--group.maxStartDelay, group.interval)]. This helps smoothing out the load on the configured datasource, so evaluations aren't executed too close to each other.")
)
// Group is an entity for grouping rules
@@ -98,7 +92,9 @@ type groupMetrics struct {
// set2 has priority over set1.
func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string {
r := map[string]string{}
maps.Copy(r, set1)
for k, v := range set1 {
r[k] = v
}
for k, v := range set2 {
if prevV, ok := r[k]; ok {
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
@@ -116,6 +112,7 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
Name: cfg.Name,
File: cfg.File,
Interval: cfg.Interval.Duration(),
Limit: cfg.Limit,
Concurrency: cfg.Concurrency,
checksum: cfg.Checksum,
Params: cfg.Params,
@@ -132,11 +129,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
if g.Interval == 0 {
g.Interval = defaultInterval
}
if cfg.Limit != nil {
g.Limit = *cfg.Limit
} else {
g.Limit = *ruleResultsLimit
}
if g.Concurrency < 1 {
g.Concurrency = 1
}
@@ -297,7 +289,7 @@ func (g *Group) InterruptEval() {
}
}
// Close stops the group and its rules, unregisters group metrics
// Close stops the group and it's rules, unregisters group metrics
func (g *Group) Close() {
if g.doneCh == nil {
return
@@ -306,6 +298,10 @@ func (g *Group) Close() {
g.InterruptEval()
<-g.finishedCh
g.closeGroupMetrics()
}
func (g *Group) closeGroupMetrics() {
metrics.UnregisterSet(g.metrics.set, true)
}
@@ -331,13 +327,13 @@ func (g *Group) Init() {
}
// Start starts group's evaluation
func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
defer func() { close(g.finishedCh) }()
evalTS := time.Now()
// sleep random duration to spread group rules evaluation
// over maxStartDelay to reduce the load on datasource.
// over time in order to reduce load on datasource.
if !SkipRandSleepOnGroupStart {
sleepBeforeStart := g.delayBeforeStart(evalTS, *maxStartDelay)
sleepBeforeStart := delayBeforeStart(evalTS, g.GetID(), g.Interval, g.EvalOffset)
g.infof("will start in %v", sleepBeforeStart)
sleepTimer := time.NewTimer(sleepBeforeStart)
@@ -369,22 +365,21 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
e := &executor{
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
}
g.infof("started")
eval := func(ctx context.Context, ts time.Time) time.Time {
eval := func(ctx context.Context, ts time.Time) {
g.metrics.iterationTotal.Inc()
start := time.Now()
if len(g.Rules) < 1 {
g.metrics.iterationDuration.UpdateDuration(start)
g.mu.Lock()
g.LastEvaluation = start
g.mu.Unlock()
return ts
return
}
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
@@ -397,10 +392,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
}
}
g.metrics.iterationDuration.UpdateDuration(start)
g.mu.Lock()
g.LastEvaluation = start
g.mu.Unlock()
return ts
}
evalCtx, cancel := context.WithCancel(ctx)
@@ -409,15 +401,15 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
g.mu.Unlock()
defer g.evalCancel()
eval(evalCtx, evalTS)
t := time.NewTicker(g.Interval)
defer t.Stop()
realEvalTS := eval(evalCtx, evalTS)
// restore the rules state after the first evaluation
// so only active alerts can be restored.
if rr != nil {
err := g.restore(ctx, rr, realEvalTS, *remoteReadLookBack)
err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
if err != nil {
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
}
@@ -480,35 +472,32 @@ func (g *Group) UpdateWith(newGroup *Group) {
g.updateCh <- newGroup
}
// delayBeforeStart returns duration for delaying the evaluation start
// based on given ts and Group settings. The delay can't exceed maxDelay.
// maxDelay is ignored if g.EvalOffset != nil.
//
// Delaying is important to smooth out the load on the datasource when all groups start at the same time.
// delayBeforeStart calculates delay based on Group ID, so all groups will start at different moments of time.
func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Duration {
if g.EvalOffset != nil {
offset := *g.EvalOffset
// adjust the offset for negative evalOffset, the rule is:
// `eval_offset: -x` is equivalent to `eval_offset: y` for `interval: x+y`.
// For example, `eval_offset: -6m` is equivalent to `eval_offset: 4m` for `interval: 10m`.
if offset < 0 {
offset += g.Interval
}
// if offset is specified, ignore the maxDelay and return a duration aligned with offset
currentOffsetPoint := ts.Truncate(g.Interval).Add(offset)
// DeepCopy returns a deep copy of group
func (g *Group) DeepCopy() *Group {
g.mu.RLock()
data, _ := json.Marshal(g)
g.mu.RUnlock()
newG := Group{}
_ = json.Unmarshal(data, &newG)
newG.Rules = g.Rules
newG.id = g.id
return &newG
}
// if offset is specified, delayBeforeStart returns a duration to help aligning timestamp with offset;
// otherwise, it returns a random duration between [0..interval] based on group key.
func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *time.Duration) time.Duration {
if offset != nil {
currentOffsetPoint := ts.Truncate(interval).Add(*offset)
if currentOffsetPoint.Before(ts) {
// wait until the next offset point
return currentOffsetPoint.Add(g.Interval).Sub(ts)
return currentOffsetPoint.Add(interval).Sub(ts)
}
return currentOffsetPoint.Sub(ts)
}
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
// artificially limit interval, so groups with big intervals could start sooner.
interval := min(g.Interval, maxDelay)
var randSleep time.Duration
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64)))
randSleep = time.Duration(float64(interval) * (float64(key) / (1 << 64)))
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
if randSleep < sleepOffset {
randSleep += interval
@@ -570,13 +559,15 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
if !disableProgressBar {
bar = pb.StartNew(iterations * len(g.Rules))
}
for i := range g.Rules {
rule := g.Rules[i]
for _, r := range g.Rules {
sem <- struct{}{}
wg.Go(func() {
res <- replayRuleRange(rule, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
wg.Add(1)
go func(r Rule, ri rangeIterator) {
// pass ri as a copy, so it can be modified within the replayRuleRange
res <- replayRuleRange(r, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
<-sem
})
wg.Done()
}(r, ri)
}
wg.Wait()
@@ -606,10 +597,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1)
for ri.next() {
sem <- struct{}{}
start := ri.s
end := ri.e
wg.Go(func() {
n, err := replayRule(r, start, end, rw, replayRuleRetryAttempts)
wg.Add(1)
go func(s, e time.Time) {
n, err := replayRule(r, s, e, rw, replayRuleRetryAttempts)
if err != nil {
logger.Fatalf("rule %q: %s", r, err)
}
@@ -618,7 +609,8 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
}
res <- n
<-sem
})
wg.Done()
}(ri.s, ri.e)
}
wg.Wait()
close(res)
@@ -632,9 +624,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
}
// ExecOnce evaluates all the rules under group for once with given timestamp.
func (g *Group) ExecOnce(ctx context.Context, rw remotewrite.RWClient, evalTS time.Time) chan error {
func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
e := &executor{
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
}
if len(g.Rules) < 1 {
@@ -709,6 +702,7 @@ func (g *Group) getEvalDelay() time.Duration {
// executor contains group's notify and rw configs
type executor struct {
Notifiers func() []notifier.Notifier
notifierHeaders map[string]string
Rw remotewrite.RWClient
@@ -729,13 +723,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
sem := make(chan struct{}, concurrency)
go func() {
wg := sync.WaitGroup{}
for i := range rules {
rule := rules[i]
for _, r := range rules {
sem <- struct{}{}
wg.Go(func() {
res <- e.exec(ctx, rule, ts, resolveDuration, limit)
wg.Add(1)
go func(r Rule) {
res <- e.exec(ctx, r, ts, resolveDuration, limit)
<-sem
})
wg.Done()
}(r)
}
wg.Wait()
close(res)
@@ -764,7 +759,6 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
}
var errG vmalertutil.ErrGroup
if e.Rw != nil {
pushToRW := func(tss []prompb.TimeSeries) error {
var lastErr error
@@ -776,26 +770,31 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return lastErr
}
if err := pushToRW(tss); err != nil {
errG.Add(err)
return err
}
}
ar, ok := r.(*AlertingRule)
if !ok {
return errG.Err()
return nil
}
alerts := ar.alertsToSend(resolveDuration, *resendDelay)
if len(alerts) < 1 {
return errG.Err()
return nil
}
notifierErr := notifier.Send(ctx, alerts, e.notifierHeaders)
for err := range notifierErr {
if err != nil {
errG.Add(fmt.Errorf("rule %q: notifier failure: %w", r, err))
}
wg := sync.WaitGroup{}
errGr := new(vmalertutil.ErrGroup)
for _, nt := range e.Notifiers() {
wg.Add(1)
go func(nt notifier.Notifier) {
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
}
wg.Done()
}(nt)
}
return errG.Err()
wg.Wait()
return errGr.Err()
}

View File

@@ -262,7 +262,7 @@ func TestUpdateDuringRandSleep(t *testing.T) {
updateCh: make(chan *Group),
}
g.Init()
go g.Start(context.Background(), nil, nil)
go g.Start(context.Background(), nil, nil, nil)
rule1 := AlertingRule{
Name: "jobDown",
@@ -346,8 +346,7 @@ func TestGroupStart(t *testing.T) {
}
fs := &datasource.FakeQuerier{}
fn, cleanup := notifier.InitFakeNotifier()
defer cleanup()
fn := &notifier.FakeNotifier{}
const evalInterval = time.Millisecond
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
@@ -396,7 +395,7 @@ func TestGroupStart(t *testing.T) {
fs.Add(m2)
g.Init()
go func() {
g.Start(context.Background(), nil, fs)
g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
close(finished)
}()
@@ -405,8 +404,7 @@ func TestGroupStart(t *testing.T) {
var cur uint64
prev := g.metrics.iterationTotal.Get()
i := 0
for {
for i := 0; ; i++ {
if i > 40 {
t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i)
}
@@ -415,7 +413,6 @@ func TestGroupStart(t *testing.T) {
return
}
time.Sleep(interval)
i++
}
}
@@ -475,10 +472,15 @@ func TestFaultyNotifier(t *testing.T) {
r := newTestAlertingRule("instant", 0)
r.q = fq
fn, cleanup := notifier.InitFakeNotifier()
defer cleanup()
e := &executor{}
fn := &notifier.FakeNotifier{}
e := &executor{
Notifiers: func() []notifier.Notifier {
return []notifier.Notifier{
&notifier.FaultyNotifier{},
fn,
}
},
}
delay := 5 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), delay)
defer cancel()
@@ -551,7 +553,7 @@ func TestCloseWithEvalInterruption(t *testing.T) {
g := NewGroup(groups[0], fq, evalInterval, nil)
g.Init()
go g.Start(context.Background(), nil, nil)
go g.Start(context.Background(), nil, nil, nil)
time.Sleep(evalInterval * 20)
@@ -569,10 +571,9 @@ func TestCloseWithEvalInterruption(t *testing.T) {
func TestGroupStartDelay(t *testing.T) {
g := &Group{}
g.id = uint64(math.MaxUint64 / 10)
// interval of 5min and key generate a static delay of 30s
g.Interval = time.Minute * 5
maxDelay := time.Minute * 5
key := uint64(math.MaxUint64 / 10)
f := func(atS, expS string) {
t.Helper()
@@ -584,7 +585,7 @@ func TestGroupStartDelay(t *testing.T) {
if err != nil {
t.Fatal(err)
}
delay := g.delayBeforeStart(at, maxDelay)
delay := delayBeforeStart(at, key, g.Interval, g.EvalOffset)
gotStart := at.Add(delay)
if expTS != gotStart {
t.Fatalf("expected to get %v; got %v instead", expTS, gotStart)
@@ -605,24 +606,6 @@ func TestGroupStartDelay(t *testing.T) {
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
// test group with negative offset -2min, which is equivalent to 3min offset for 5min interval
offset = -2 * time.Minute
g.EvalOffset = &offset
f("2023-01-01T00:00:15.000+00:00", "2023-01-01T00:03:00.000+00:00")
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
maxDelay = time.Minute * 1
g.EvalOffset = nil
// test group with maxDelay, and offset disabled
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:06.000+00:00")
f("2023-01-01T00:00:01.000+00:00", "2023-01-01T00:00:06.000+00:00")
f("2023-01-01T00:00:06.100+00:00", "2023-01-01T00:01:06.000+00:00")
f("2023-01-01T00:00:11.000+00:00", "2023-01-01T00:01:06.000+00:00")
}
func TestGetPrometheusReqTimestamp(t *testing.T) {

View File

@@ -2,7 +2,6 @@ package rule
import (
"context"
"errors"
"fmt"
"strings"
"time"
@@ -82,37 +81,6 @@ func (rr *RecordingRule) ID() uint64 {
return rr.RuleID
}
// ToAPI returns ApiRule representation of rr
func (rr *RecordingRule) ToAPI() ApiRule {
state := rr.state
lastState := state.getLast()
r := ApiRule{
Type: TypeRecording,
DatasourceType: rr.Type.String(),
Name: rr.Name,
Query: rr.Expr,
Labels: rr.Labels,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: state.size(),
Updates: state.getAll(),
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", rr.ID()),
GroupID: fmt.Sprintf("%d", rr.GroupID),
GroupName: rr.GroupName,
File: rr.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
return r
}
// NewRecordingRule creates a new RecordingRule
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
debug := group.Debug
@@ -198,7 +166,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
defer func() {
rr.state.add(curState)
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
if curState.Err != nil {
rr.metrics.errors.Inc()
}
}()
@@ -237,8 +205,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
Labels: stringToLabels(k),
Samples: []prompb.Sample{
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
},
})
}})
}
rr.lastEvaluation = curEvaluation
return tss, nil
@@ -293,11 +260,6 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompb.TimeSeries {
}
// add extra labels configured by user
for k := range rr.Labels {
// do not add label with empty value, since it has no meaning.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
if rr.Labels[k] == "" {
continue
}
existingLabel := promrelabel.GetLabelByName(m.Labels, k)
if existingLabel != nil { // there is a conflict between extra and existing label
if existingLabel.Value == rr.Labels[k] {

View File

@@ -21,8 +21,6 @@ type Rule interface {
// ID returns unique ID that may be used for
// identifying this Rule among others.
ID() uint64
// ToAPI returns ApiRule representation of Rule
ToAPI() ApiRule
// exec executes the rule with given context at the given timestamp and limit.
// returns an err if number of resulting time series exceeds the limit.
exec(ctx context.Context, ts time.Time, limit int) ([]prompb.TimeSeries, error)
@@ -70,6 +68,39 @@ type StateEntry struct {
Curl string `json:"curl"`
}
// GetLastEntry returns latest stateEntry of rule
func GetLastEntry(r Rule) StateEntry {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.getLast()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.getLast()
}
return StateEntry{}
}
// GetRuleStateSize returns size of rule stateEntry
func GetRuleStateSize(r Rule) int {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.size()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.size()
}
return 0
}
// GetAllRuleState returns rule entire stateEntries
func GetAllRuleState(r Rule) []StateEntry {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.getAll()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.getAll()
}
return []StateEntry{}
}
func (s *ruleState) size() int {
s.RLock()
defer s.RUnlock()
@@ -121,7 +152,7 @@ func (s *ruleState) add(e StateEntry) {
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
var err error
var tss []prompb.TimeSeries
for i := range replayRuleRetryAttempts {
for i := 0; i < replayRuleRetryAttempts; i++ {
tss, err = r.execRange(context.Background(), start, end)
if err == nil {
break

View File

@@ -40,7 +40,7 @@ func TestRule_state(t *testing.T) {
}
var last time.Time
for range stateEntriesN * 2 {
for i := 0; i < stateEntriesN*2; i++ {
last = time.Now()
r.state.add(StateEntry{At: last})
}
@@ -65,15 +65,17 @@ func TestRule_stateConcurrent(_ *testing.T) {
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
const workers = 50
const iterations = 100
var wg sync.WaitGroup
for range workers {
wg.Go(func() {
for range iterations {
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func() {
defer wg.Done()
for i := 0; i < iterations; i++ {
r.state.add(StateEntry{At: time.Now()})
r.state.getAll()
r.state.getLast()
}
})
}()
}
wg.Wait()
}

View File

@@ -19,13 +19,13 @@ func CompareRules(t *testing.T, a, b Rule) error {
case *AlertingRule:
br, ok := b.(*AlertingRule)
if !ok {
return fmt.Errorf("rule %d supposed to be of type AlertingRule", b.ID())
return fmt.Errorf("rule %q supposed to be of type AlertingRule", b.ID())
}
return compareAlertingRules(t, v, br)
case *RecordingRule:
br, ok := b.(*RecordingRule)
if !ok {
return fmt.Errorf("rule %d supposed to be of type RecordingRule", b.ID())
return fmt.Errorf("rule %q supposed to be of type RecordingRule", b.ID())
}
return compareRecordingRules(t, v, br)
default:

View File

@@ -34,12 +34,11 @@ body {
padding-top: 4.5rem;
}
.vm-group {
.group-items {
cursor: pointer;
padding: 5px;
margin-top: 5px;
position: relative;
display: none;
}
.btn svg, .dropdown-item svg {
@@ -56,22 +55,14 @@ body {
height: 38px;
}
.vm-item:not(.vm-found) {
display: none;
.group-items:not(:has(.sub-item:not(.d-none))) {
display: none !important;
}
.vm-group:has(.vm-item:is(.vm-found)), .vm-group:is(.vm-found) {
display: flex;
}
.vm-group:hover {
.group-items:hover {
background-color: #f8f9fa!important;
}
.vm-group:is(.vm-found) .vm-item {
display: table-row;
}
.table {
table-layout: fixed;
}
@@ -120,9 +111,3 @@ textarea.curl-area {
.w-60 {
width: 60%;
}
.annotations {
white-space: pre-wrap;
color: gray;
word-wrap: break-word;
}

View File

@@ -11,7 +11,7 @@
<path d="M224.163 175.27a1.9 1.9 0 0 0 2.8 0l6-5.9a2.1 2.1 0 0 0 .2-2.7 1.9 1.9 0 0 0-3-.2l-2.6 2.6v-5.2c0-1.54-1.667-2.502-3-1.732-.619.357-1 1.017-1 1.732v5.2l-2.6-2.6a1.9 1.9 0 0 0-3 .2 2.1 2.1 0 0 0 .2 2.7zm-16.459-23.297h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1m36 4h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1m-16.59-23.517a1.9 1.9 0 0 0-2.8 0l-6 5.9a2.1 2.1 0 0 0-.2 2.7 1.9 1.9 0 0 0 3 .2l2.6-2.6v5.2c0 1.54 1.667 2.502 3 1.732.619-.357 1-1.017 1-1.732v-5.2l2.6 2.6a1.9 1.9 0 0 0 3-.2 2.1 2.1 0 0 0-.2-2.7z"/>
</symbol>
<symbol id="state" viewBox="-10 -10 320 310">
<symbol id="filter" viewBox="-10 -10 320 310">
<path d="M288.953 0h-277c-5.522 0-10 4.478-10 10v49.531c0 5.522 4.478 10 10 10h12.372l91.378 107.397v113.978a10 10 0 0 0 15.547 8.32l49.5-33a10 10 0 0 0 4.453-8.32v-80.978l91.378-107.397h12.372c5.522 0 10-4.478 10-10V10c0-5.522-4.477-10-10-10M167.587 166.77a10 10 0 0 0-2.384 6.48v79.305l-29.5 19.666V173.25a10 10 0 0 0-2.384-6.48L50.585 69.531h199.736zM278.953 49.531h-257V20h257z"/>
</symbol>

Before

Width:  |  Height:  |  Size: 4.7 KiB

After

Width:  |  Height:  |  Size: 4.7 KiB

View File

@@ -8,9 +8,9 @@ function actionAll(isCollapse) {
});
}
function groupForState(key) {
function groupFilter(key) {
if (key) {
location.href = `?state=${key}`;
location.href = `?filter=${key}`;
} else {
window.location = window.location.pathname;
}
@@ -65,34 +65,32 @@ function getParamURL(key) {
return url.searchParams.get(key)
}
function matchText(search, item) {
const text = item.innerText.toLowerCase();
return text.indexOf(search) >= 0;
}
function filterRules(searchPhrase) {
document.querySelectorAll('.vm-group').forEach((group) => {
if (!searchPhrase) {
group.classList.add('vm-found');
return;
}
for (const item of group.querySelectorAll('.vm-group-search')) {
if (matchText(searchPhrase, item)) {
group.classList.add('vm-found');
return;
document.querySelectorAll('.sub-items').forEach((rules) => {
let found = false;
rules.querySelectorAll('.sub-item').forEach((rule) => {
if (searchPhrase) {
const ruleName = rule.innerText.toLowerCase();
const matches = []
const hasValue = ruleName.indexOf(searchPhrase) >= 0;
rule.querySelectorAll('.label').forEach((label) => {
const text = label.innerText.toLowerCase();
if (text.indexOf(searchPhrase) >= 0) {
matches.push(text);
}
});
if (!matches.length && !hasValue) {
rule.classList.add('d-none');
return;
}
}
}
group.classList.remove('vm-found');
for (const item of group.querySelectorAll('.vm-item')) {
if (matchText(searchPhrase, item)) {
item.classList.add('vm-found');
continue;
}
if (Array.from(item.querySelectorAll('.label')).find(l => matchText(searchPhrase, l))) {
item.classList.add('vm-found');
continue;
}
item.classList.remove('vm-found');
rule.classList.remove('d-none');
found = true;
});
if (found && searchPhrase || !searchPhrase) {
rules.classList.remove('d-none');
} else {
rules.classList.add('d-none');
}
});
}

View File

@@ -485,12 +485,6 @@ func templateFuncs() textTpl.FuncMap {
/* Helpers */
// now returns the Unix timestamp in seconds at the time of the template evaluation.
// For example: {{ (now | toTime).Sub $activeAt }} will return the duration the alert has been active.
"now": func() float64 {
return float64(time.Now().Unix())
},
// Converts a list of objects to a map with keys arg0, arg1 etc.
// This is intended to allow multiple arguments to be passed to templates.
"args": func(args ...any) map[string]any {

View File

@@ -45,7 +45,7 @@ func (eg *ErrGroup) Error() string {
return ""
}
var b strings.Builder
fmt.Fprintf(&b, "errors(%d): \n", len(eg.errs))
fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
for i, err := range eg.errs {
b.WriteString(err.Error())
if i != len(eg.errs)-1 {

View File

@@ -30,8 +30,8 @@ func TestErrGroup(t *testing.T) {
}
f(nil, "")
f([]error{errors.New("timeout")}, "errors(1): \ntimeout")
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): \ntimeout\ndeadline")
f([]error{errors.New("timeout")}, "errors(1): timeout")
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): timeout\ndeadline")
}
// TestErrGroupConcurrent supposed to test concurrent
@@ -42,7 +42,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
const writersN = 4
payload := make(chan error, writersN)
for range writersN {
for i := 0; i < writersN; i++ {
go func() {
for err := range payload {
eg.Add(err)
@@ -51,7 +51,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
}
const iterations = 500
for i := range iterations {
for i := 0; i < iterations; i++ {
payload <- fmt.Errorf("error %d", i)
if i%10 == 0 {
_ = eg.Err()

View File

@@ -1,11 +1,9 @@
package main
import (
"cmp"
"embed"
"encoding/json"
"fmt"
"math"
"net/http"
"slices"
"strconv"
@@ -31,9 +29,7 @@ var (
{"api/v1/rules", "list all loaded groups and rules"},
{"api/v1/alerts", "list all active alerts"},
{"api/v1/notifiers", "list all notifiers"},
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamAlertID), "get alert status by group and alert ID"},
{fmt.Sprintf("api/v1/rule?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamRuleID), "get rule status by group and rule ID"},
{fmt.Sprintf("api/v1/group?%s=<int>", rule.ParamGroupID), "get group status by group ID"},
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", paramGroupID, paramAlertID), "get alert status by group and alert ID"},
}
systemLinks = [][2]string{
{"vmalert/groups", "UI"},
@@ -49,16 +45,9 @@ var (
{Name: "Docs", URL: "https://docs.victoriametrics.com/victoriametrics/vmalert/"},
}
ruleTypeMap = map[string]string{
"alert": rule.TypeAlerting,
"record": rule.TypeRecording,
"alert": ruleTypeAlerting,
"record": ruleTypeRecording,
}
// The "recovering", "noData", "normal", "error" states are used by Grafana.
// Ignore "recovering" since it is not currently acknowledged by vmalert,
// treat "noData" as an alias for "nomatch",
// treat "normal" as an alias for "inactive",
// treat "error" as an alias for "unhealthy"
ruleStates = []string{"ok", "nomatch", "inactive", "firing", "pending", "unhealthy", "recovering", "noData", "normal", "error"}
)
type requestHandler struct {
@@ -72,14 +61,6 @@ var (
staticServer = http.StripPrefix("/vmalert", staticHandler)
)
func marshalJson(v any, kind string) ([]byte, *httpserver.ErrorWithStatusCode) {
data, err := json.Marshal(v)
if err != nil {
return nil, errResponse(fmt.Errorf("failed to marshal %s: %s", kind, err), http.StatusInternalServerError)
}
return data, nil
}
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
if strings.HasPrefix(r.URL.Path, "/vmalert/static") {
staticServer.ServeHTTP(w, r)
@@ -111,32 +92,40 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
httpserver.Errorf(w, r, "%s", err)
return true
}
WriteRule(w, r, rule)
WriteRuleDetails(w, r, rule)
return true
// current used by old vmalert UI and Grafana Alerts
case "/vmalert/groups", "/rules":
case "/vmalert/groups":
rf, err := newRulesFilter(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
// only support filtering by a single state
state := ""
if len(rf.states) > 0 {
state = rf.states[0]
rf.states = rf.states[:1]
}
lr := rh.groups(rf)
WriteListGroups(w, r, lr.Data.Groups, state)
data := rh.groups(rf)
WriteListGroups(w, r, data, rf.filter)
return true
case "/vmalert/notifiers":
WriteListTargets(w, r, notifier.GetTargets())
return true
// special cases for Grafana requests,
// served without `vmalert` prefix:
case "/rules":
// Grafana makes an extra request to `/rules`
// handler in addition to `/api/v1/rules` calls in alerts UI
var data []*apiGroup
rf, err := newRulesFilter(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
data = rh.groups(rf)
WriteListGroups(w, r, data, rf.filter)
return true
case "/vmalert/api/v1/notifiers", "/api/v1/notifiers":
data, err := rh.listNotifiers()
if err != nil {
errJson(w, r, err)
httpserver.Errorf(w, r, "%s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
@@ -144,14 +133,15 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
return true
case "/vmalert/api/v1/rules", "/api/v1/rules":
// path used by Grafana for ng alerting
var data []byte
rf, err := newRulesFilter(r)
if err != nil {
errJson(w, r, err)
httpserver.Errorf(w, r, "%s", err)
return true
}
data, err := rh.listGroups(rf)
data, err = rh.listGroups(rf)
if err != nil {
errJson(w, r, err)
httpserver.Errorf(w, r, "%s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
@@ -160,14 +150,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
case "/vmalert/api/v1/alerts", "/api/v1/alerts":
// path used by Grafana for ng alerting
gf, err := newGroupsFilter(r)
rf, err := newRulesFilter(r)
if err != nil {
errJson(w, r, err)
httpserver.Errorf(w, r, "%s", err)
return true
}
data, err := rh.listAlerts(gf)
data, err := rh.listAlerts(rf)
if err != nil {
errJson(w, r, err)
httpserver.Errorf(w, r, "%s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
@@ -176,44 +166,30 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
case "/vmalert/api/v1/alert", "/api/v1/alert":
alert, err := rh.getAlert(r)
if err != nil {
errJson(w, r, err)
httpserver.Errorf(w, r, "%s", err)
return true
}
data, err := marshalJson(alert, "alert")
data, err := json.Marshal(alert)
if err != nil {
errJson(w, r, err)
httpserver.Errorf(w, r, "failed to marshal alert: %s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
w.Write(data)
return true
case "/vmalert/api/v1/rule", "/api/v1/rule":
apiRule, err := rh.getRule(r)
rule, err := rh.getRule(r)
if err != nil {
errJson(w, r, err)
httpserver.Errorf(w, r, "%s", err)
return true
}
rwu := rule.ApiRuleWithUpdates{
ApiRule: apiRule,
StateUpdates: apiRule.Updates,
rwu := apiRuleWithUpdates{
apiRule: rule,
StateUpdates: rule.Updates,
}
data, err := marshalJson(rwu, "rule")
data, err := json.Marshal(rwu)
if err != nil {
errJson(w, r, err)
return true
}
w.Header().Set("Content-Type", "application/json")
w.Write(data)
return true
case "/vmalert/api/v1/group", "/api/v1/group":
group, err := rh.getGroup(r)
if err != nil {
errJson(w, r, err)
return true
}
data, err := marshalJson(group, "group")
if err != nil {
errJson(w, r, err)
httpserver.Errorf(w, r, "failed to marshal rule: %s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
@@ -233,42 +209,30 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
}
}
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, *httpserver.ErrorWithStatusCode) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
if err != nil {
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
}
obj, err := rh.m.groupAPI(groupID)
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 64)
if err != nil {
return nil, errResponse(err, http.StatusNotFound)
}
return obj, nil
}
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, *httpserver.ErrorWithStatusCode) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
if err != nil {
return rule.ApiRule{}, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
}
ruleID, err := strconv.ParseUint(r.FormValue(rule.ParamRuleID), 10, 64)
if err != nil {
return rule.ApiRule{}, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err), http.StatusBadRequest)
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramRuleID, err)
}
obj, err := rh.m.ruleAPI(groupID, ruleID)
if err != nil {
return rule.ApiRule{}, errResponse(err, http.StatusNotFound)
return apiRule{}, errResponse(err, http.StatusNotFound)
}
return obj, nil
}
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, *httpserver.ErrorWithStatusCode) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
if err != nil {
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
return nil, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
}
alertID, err := strconv.ParseUint(r.FormValue(rule.ParamAlertID), 10, 64)
alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 64)
if err != nil {
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err), http.StatusBadRequest)
return nil, fmt.Errorf("failed to read %q param: %w", paramAlertID, err)
}
a, err := rh.m.alertAPI(groupID, alertID)
if err != nil {
@@ -278,76 +242,28 @@ func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, *httpserver
}
type listGroupsResponse struct {
Status string `json:"status"`
Page int `json:"page,omitempty"`
TotalPages int `json:"total_pages,omitempty"`
TotalGroups int `json:"total_groups,omitempty"`
TotalRules int `json:"total_rules,omitempty"`
Data struct {
Groups []*rule.ApiGroup `json:"groups"`
Status string `json:"status"`
Data struct {
Groups []*apiGroup `json:"groups"`
} `json:"data"`
}
type groupsFilter struct {
groupNames []string
files []string
dsType config.Type
}
func newGroupsFilter(r *http.Request) (*groupsFilter, *httpserver.ErrorWithStatusCode) {
_ = r.ParseForm()
vs := r.Form
gf := &groupsFilter{
groupNames: vs["rule_group[]"],
files: vs["file[]"],
}
dsType := vs.Get("datasource_type")
if len(dsType) > 0 {
if config.SupportedType(dsType) {
gf.dsType = config.NewRawType(dsType)
} else {
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
}
}
return gf, nil
}
func (gf *groupsFilter) matches(group *rule.Group) bool {
if len(gf.groupNames) > 0 && !slices.Contains(gf.groupNames, group.Name) {
return false
}
if len(gf.files) > 0 && !slices.Contains(gf.files, group.File) {
return false
}
if len(gf.dsType.Name) > 0 && gf.dsType.String() != group.Type.String() {
return false
}
return true
}
// see https://prometheus.io/docs/prometheus/latest/querying/api/#rules
type rulesFilter struct {
gf *groupsFilter
ruleNames []string
ruleType string
excludeAlerts bool
states []string
maxGroups int
pageNum int
search string
extendedStates bool
files []string
groupNames []string
ruleNames []string
ruleType string
excludeAlerts bool
filter string
dsType config.Type
}
func newRulesFilter(r *http.Request) (*rulesFilter, *httpserver.ErrorWithStatusCode) {
gf, err := newGroupsFilter(r)
if err != nil {
return nil, err
}
func newRulesFilter(r *http.Request) (*rulesFilter, error) {
rf := &rulesFilter{}
query := r.URL.Query()
var rf rulesFilter
rf.gf = gf
vs := r.Form
ruleTypeParam := vs.Get("type")
ruleTypeParam := query.Get("type")
if len(ruleTypeParam) > 0 {
if ruleType, ok := ruleTypeMap[ruleTypeParam]; ok {
rf.ruleType = ruleType
@@ -356,155 +272,102 @@ func newRulesFilter(r *http.Request) (*rulesFilter, *httpserver.ErrorWithStatusC
}
}
states := vs["state"]
if len(states) == 0 {
states = vs["filter"]
dsType := query.Get("datasource_type")
if len(dsType) > 0 {
if config.SupportedType(dsType) {
rf.dsType = config.NewRawType(dsType)
} else {
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
}
}
for _, s := range states {
values := strings.Split(s, ",")
for _, v := range values {
if len(v) == 0 {
continue
}
if !slices.Contains(ruleStates, v) {
return nil, errResponse(fmt.Errorf(`invalid parameter "state": contains not supported value %q`, v), http.StatusBadRequest)
}
// Replace grafana states with supported internal states
switch v {
case "noData":
v = "nomatch"
case "normal":
v = "inactive"
case "error":
v = "unhealthy"
}
rf.states = append(rf.states, v)
filter := strings.ToLower(query.Get("filter"))
if len(filter) > 0 {
if filter == "nomatch" || filter == "unhealthy" {
rf.filter = filter
} else {
return nil, errResponse(fmt.Errorf(`invalid parameter "filter": not supported value %q`, filter), http.StatusBadRequest)
}
}
rf.excludeAlerts = httputil.GetBool(r, "exclude_alerts")
rf.extendedStates = httputil.GetBool(r, "extended_states")
rf.ruleNames = append([]string{}, vs["rule_name[]"]...)
rf.search = strings.ToLower(vs.Get("search"))
pageNum := vs.Get("page_num")
maxGroups := vs.Get("group_limit")
if pageNum != "" {
if maxGroups == "" {
return nil, errResponse(fmt.Errorf(`"group_limit" needs to be present in order to paginate over the groups`), http.StatusBadRequest)
}
v, err := strconv.Atoi(pageNum)
if err != nil || v <= 0 {
return nil, errResponse(fmt.Errorf(`"page_num" is expected to be a positive number, found %q`, pageNum), http.StatusBadRequest)
}
rf.pageNum = v
}
if maxGroups != "" {
v, err := strconv.Atoi(maxGroups)
if err != nil || v <= 0 {
return nil, errResponse(fmt.Errorf(`"group_limit" is expected to be a positive number, found %q`, maxGroups), http.StatusBadRequest)
}
rf.maxGroups = v
}
return &rf, nil
rf.ruleNames = append([]string{}, r.Form["rule_name[]"]...)
rf.groupNames = append([]string{}, r.Form["rule_group[]"]...)
rf.files = append([]string{}, r.Form["file[]"]...)
return rf, nil
}
func (rf *rulesFilter) matchesRule(r *rule.ApiRule) bool {
if rf.ruleType != "" && rf.ruleType != r.Type {
func (rf *rulesFilter) matchesGroup(group *rule.Group) bool {
if len(rf.groupNames) > 0 && !slices.Contains(rf.groupNames, group.Name) {
return false
}
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, r.Name) {
if len(rf.files) > 0 && !slices.Contains(rf.files, group.File) {
return false
}
if len(rf.states) == 0 {
return true
if len(rf.dsType.Name) > 0 && rf.dsType.String() != group.Type.String() {
return false
}
return slices.Contains(rf.states, r.State)
return true
}
func (rh *requestHandler) groups(rf *rulesFilter) *listGroupsResponse {
func (rh *requestHandler) groups(rf *rulesFilter) []*apiGroup {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
skipGroups := (rf.pageNum - 1) * rf.maxGroups
lr := &listGroupsResponse{
Status: "success",
}
lr.Data.Groups = make([]*rule.ApiGroup, 0)
if skipGroups >= len(rh.m.groups) {
return lr
}
// sort list of groups for deterministic output
groups := make([]*rule.Group, 0, len(rh.m.groups))
groups := make([]*apiGroup, 0)
for _, group := range rh.m.groups {
groups = append(groups, group)
}
slices.SortFunc(groups, func(a, b *rule.Group) int {
nameCmp := cmp.Compare(a.Name, b.Name)
if nameCmp != 0 {
return nameCmp
}
return cmp.Compare(a.File, b.File)
})
for _, group := range groups {
if !rf.gf.matches(group) {
if !rf.matchesGroup(group) {
continue
}
groupFound := len(rf.search) == 0 || strings.Contains(strings.ToLower(group.Name), rf.search) || strings.Contains(strings.ToLower(group.File), rf.search)
g := group.ToAPI()
g := groupToAPI(group)
// the returned list should always be non-nil
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
filteredRules := make([]rule.ApiRule, 0)
filteredRules := make([]apiRule, 0)
for _, rule := range g.Rules {
if !groupFound && !strings.Contains(strings.ToLower(rule.Name), rf.search) {
if rf.ruleType != "" && rf.ruleType != rule.Type {
continue
}
if rf.extendedStates {
rule.ExtendState()
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, rule.Name) {
continue
}
if !rf.matchesRule(&rule) {
if (rule.LastError == "" && rf.filter == "unhealthy") || (!isNoMatch(rule) && rf.filter == "nomatch") {
continue
}
if rf.excludeAlerts {
rule.Alerts = nil
}
g.States[rule.State]++
if rule.LastError != "" {
g.Unhealthy++
} else {
g.Healthy++
}
if isNoMatch(rule) {
g.NoMatch++
}
filteredRules = append(filteredRules, rule)
}
if len(g.Rules) == 0 || len(filteredRules) > 0 {
if rf.maxGroups > 0 {
lr.TotalGroups++
lr.TotalRules += len(filteredRules)
}
if skipGroups > 0 {
skipGroups--
continue
}
if rf.maxGroups == 0 || len(lr.Data.Groups) < rf.maxGroups {
g.Rules = filteredRules
lr.Data.Groups = append(lr.Data.Groups, g)
}
g.Rules = filteredRules
groups = append(groups, g)
}
// sort list of groups for deterministic output
slices.SortFunc(groups, func(a, b *apiGroup) int {
if a.Name != b.Name {
return strings.Compare(a.Name, b.Name)
}
}
if rf.maxGroups > 0 {
lr.Page = rf.pageNum
lr.TotalPages = max(int(math.Ceil(float64(lr.TotalGroups)/float64(rf.maxGroups))), 1)
}
return lr
return strings.Compare(a.File, b.File)
})
return groups
}
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, *httpserver.ErrorWithStatusCode) {
lr := rh.groups(rf)
if rf.pageNum > 1 && len(lr.Data.Groups) == 0 {
return nil, errResponse(fmt.Errorf(`page_num exceeds total amount of pages`), http.StatusBadRequest)
}
if lr.Page > lr.TotalPages {
return nil, errResponse(fmt.Errorf(`page_num=%d exceeds total amount of pages in result=%d`, lr.Page, lr.TotalPages), http.StatusBadRequest)
}
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, error) {
lr := listGroupsResponse{Status: "success"}
lr.Data.Groups = rh.groups(rf)
b, err := json.Marshal(lr)
if err != nil {
return nil, errResponse(fmt.Errorf(`error encoding list of groups: %w`, err), http.StatusInternalServerError)
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
StatusCode: http.StatusInternalServerError,
}
}
return b, nil
}
@@ -512,64 +375,67 @@ func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, *httpserver.Error
type listAlertsResponse struct {
Status string `json:"status"`
Data struct {
Alerts []*rule.ApiAlert `json:"alerts"`
Alerts []*apiAlert `json:"alerts"`
} `json:"data"`
}
func (rh *requestHandler) groupAlerts() []rule.GroupAlerts {
func (rh *requestHandler) groupAlerts() []groupAlerts {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
var gAlerts []rule.GroupAlerts
for _, group := range rh.m.groups {
var alerts []*rule.ApiAlert
g := group.ToAPI()
var gAlerts []groupAlerts
for _, g := range rh.m.groups {
var alerts []*apiAlert
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
a, ok := r.(*rule.AlertingRule)
if !ok {
continue
}
alerts = append(alerts, r.Alerts...)
alerts = append(alerts, ruleToAPIAlert(a)...)
}
if len(alerts) > 0 {
gAlerts = append(gAlerts, rule.GroupAlerts{
Group: g,
gAlerts = append(gAlerts, groupAlerts{
Group: groupToAPI(g),
Alerts: alerts,
})
}
}
slices.SortFunc(gAlerts, func(a, b rule.GroupAlerts) int {
slices.SortFunc(gAlerts, func(a, b groupAlerts) int {
return strings.Compare(a.Group.Name, b.Group.Name)
})
return gAlerts
}
func (rh *requestHandler) listAlerts(gf *groupsFilter) ([]byte, *httpserver.ErrorWithStatusCode) {
func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
lr := listAlertsResponse{Status: "success"}
lr.Data.Alerts = make([]*rule.ApiAlert, 0)
lr.Data.Alerts = make([]*apiAlert, 0)
for _, group := range rh.m.groups {
if !gf.matches(group) {
if !rf.matchesGroup(group) {
continue
}
g := group.ToAPI()
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
for _, r := range group.Rules {
a, ok := r.(*rule.AlertingRule)
if !ok {
continue
}
lr.Data.Alerts = append(lr.Data.Alerts, r.Alerts...)
lr.Data.Alerts = append(lr.Data.Alerts, ruleToAPIAlert(a)...)
}
}
// sort list of alerts for deterministic output
slices.SortFunc(lr.Data.Alerts, func(a, b *rule.ApiAlert) int {
slices.SortFunc(lr.Data.Alerts, func(a, b *apiAlert) int {
return strings.Compare(a.ID, b.ID)
})
b, err := json.Marshal(lr)
if err != nil {
return nil, errResponse(fmt.Errorf(`error encoding list of active alerts: %w`, err), http.StatusInternalServerError)
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
StatusCode: http.StatusInternalServerError,
}
}
return b, nil
}
@@ -577,33 +443,35 @@ func (rh *requestHandler) listAlerts(gf *groupsFilter) ([]byte, *httpserver.Erro
type listNotifiersResponse struct {
Status string `json:"status"`
Data struct {
Notifiers []*notifier.ApiNotifier `json:"notifiers"`
Notifiers []*apiNotifier `json:"notifiers"`
} `json:"data"`
}
func (rh *requestHandler) listNotifiers() ([]byte, *httpserver.ErrorWithStatusCode) {
func (rh *requestHandler) listNotifiers() ([]byte, error) {
targets := notifier.GetTargets()
lr := listNotifiersResponse{Status: "success"}
lr.Data.Notifiers = make([]*notifier.ApiNotifier, 0)
lr.Data.Notifiers = make([]*apiNotifier, 0)
for protoName, protoTargets := range targets {
nr := &notifier.ApiNotifier{
Kind: protoName,
Targets: make([]*notifier.ApiTarget, 0, len(protoTargets)),
notifier := &apiNotifier{
Kind: string(protoName),
Targets: make([]*apiTarget, 0, len(protoTargets)),
}
for _, target := range protoTargets {
nr.Targets = append(nr.Targets, &notifier.ApiTarget{
Address: target.Addr(),
Labels: target.Labels.ToMap(),
LastError: target.LastError(),
notifier.Targets = append(notifier.Targets, &apiTarget{
Address: target.Addr(),
Labels: target.Labels.ToMap(),
})
}
lr.Data.Notifiers = append(lr.Data.Notifiers, nr)
lr.Data.Notifiers = append(lr.Data.Notifiers, notifier)
}
b, err := json.Marshal(lr)
if err != nil {
return nil, errResponse(fmt.Errorf(`error encoding list of notifiers: %w`, err), http.StatusInternalServerError)
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf(`error encoding list of notifiers: %w`, err),
StatusCode: http.StatusInternalServerError,
}
}
return b, nil
}
@@ -614,8 +482,3 @@ func errResponse(err error, sc int) *httpserver.ErrorWithStatusCode {
StatusCode: sc,
}
}
func errJson(w http.ResponseWriter, r *http.Request, err *httpserver.ErrorWithStatusCode) {
w.Header().Set("Content-Type", "application/json")
httpserver.Errorf(w, r, `{"error":%q,"errorType":%d}`, err, err.StatusCode)
}

View File

@@ -8,11 +8,9 @@
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
) %}
{% func Controls(prefix, currentIcon, currentText string, icons, states map[string]string, search bool) %}
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
<div class="btn-toolbar mb-3" role="toolbar">
<div class="d-flex gap-2 justify-content-between w-100">
<div class="d-flex gap-2 align-items-center">
@@ -28,10 +26,10 @@
<use href="{%s prefix %}static/icons/icons.svg#expand"/>
</svg>
</a>
{% if len(states) > 0 %}
{% if len(filters) > 0 %}
<span class="d-none d-md-inline-block">Filter by status:</span>
<svg class="d-md-none" width="20" height="20">
<use href="{%s prefix %}static/icons/icons.svg#state">
<use href="{%s prefix %}static/icons/icons.svg#filter">
</svg>
<div class="dropdown">
<button
@@ -46,10 +44,10 @@
</svg>
</button>
<ul class="dropdown-menu">
{% for key, title := range states %}
{% for key, title := range filters %}
{% if title != currentText %}
<li>
<a class="dropdown-item" onclick="groupForState('{%s key %}')">
<a class="dropdown-item" onclick="groupFilter('{%s key %}')">
<span class="d-none d-md-inline-block">{%s title %}</span>
<svg class="d-md-none" width="22" height="22">
<use href="{%s prefix %}static/icons/icons.svg#{%s icons[key] %}"/>
@@ -79,8 +77,6 @@
{% func Welcome(r *http.Request) %}
{%= tpl.Header(r, navItems, "vmalert", getLastConfigError()) %}
<p>
Version {%s buildinfo.Version %} <br>
API:<br>
{% for _, p := range apiLinks %}
{%code p, doc := p[0], p[1] %}
@@ -97,10 +93,10 @@
{%= tpl.Footer(r) %}
{% endfunc %}
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, state string) %}
{% func ListGroups(r *http.Request, groups []*apiGroup, filter string) %}
{%code
prefix := vmalertutil.Prefix(r.URL.Path)
states := map[string]string{
filters := map[string]string{
"": "All",
"unhealthy": "Unhealthy",
"nomatch": "No Match",
@@ -110,29 +106,26 @@
"unhealthy": "unhealthy",
"nomatch": "nomatch",
}
currentText := states[state]
currentIcon := icons[state]
currentText := filters[filter]
currentIcon := icons[filter]
%}
{%= tpl.Header(r, navItems, "Groups", getLastConfigError()) %}
{%= Controls(prefix, currentIcon, currentText, icons, states, true) %}
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
{% if len(groups) > 0 %}
{% for _, g := range groups %}
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.States["unhealthy"] > 0 %} alert-danger{% endif %}">
<div id="group-{%s g.ID %}" class="d-flex w-100 border-0 flex-column group-items{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
<span class="d-flex justify-content-between">
<a
class="vm-group-search"
href="#group-{%s g.ID %}"
>{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
<span
class="flex-grow-1 d-flex justify-content-end"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}"
data-bs-target="#sub-{%s g.ID %}"
>
<span class="d-flex gap-2">
{% if g.States["unhealthy"] > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.States["unhealthy"] %}</span> {% endif %}
{% if g.States["nomatch"] > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d g.States["nomatch"] %}</span> {% endif %}
<span class="badge bg-success" title="Number of rules with status Ok">{%d g.States["ok"] %}</span>
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
{% if g.NoMatch > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d g.NoMatch %}</span> {% endif %}
<span class="badge bg-success" title="Number of rules with status Ok">{%d g.Healthy %}</span>
</span>
</span>
</span>
@@ -140,9 +133,9 @@
class="d-flex flex-column row-gap-2 mb-2"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}"
data-bs-target="#sub-{%s g.ID %}"
>
<span class="fs-6 text-start vm-group-search w-100 fw-lighter">{%s g.File %}</span>
<span class="fs-6 text-start w-100 fw-lighter">{%s g.File %}</span>
{% if len(g.Params) > 0 %}
<span class="fs-6 text-start w-100 d-flex justify-content-between fw-lighter">
<span>Extra params</span>
@@ -164,7 +157,7 @@
</span>
{% endif %}
</span>
<div class="collapse" id="item-{%s g.ID %}">
<div class="collapse sub-items" id="sub-{%s g.ID %}">
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
@@ -175,7 +168,7 @@
</thead>
<tbody>
{% for _, r := range g.Rules %}
<tr class="vm-item{% if r.LastError != "" %} alert-danger{% endif %}">
<tr class="sub-item{% if r.LastError != "" %} alert-danger{% endif %}">
<td>
<div class="row">
<div class="col-12 mb-2">
@@ -189,7 +182,7 @@
<b>record:</b> {%s r.Name %}
{% endif %}
|
{%= seriesFetchedWarn(prefix, &r) %}
{%= seriesFetchedWarn(prefix, r) %}
<span><a target="_blank" href="{%s prefix+r.WebLink() %}">Details</a></span>
</div>
<div class="col-12">
@@ -212,12 +205,7 @@
</div>
</td>
<td class="text-center">{%d r.LastSamples %}</td>
<td class="text-center">{% if r.LastEvaluation.IsZero() %}
Never
{% else %}
{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago
{% endif %}
</td>
<td class="text-center">{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago</td>
</tr>
{% endfor %}
</tbody>
@@ -234,7 +222,7 @@
{% endfunc %}
{% func ListAlerts(r *http.Request, groupAlerts []rule.GroupAlerts) %}
{% func ListAlerts(r *http.Request, groupAlerts []groupAlerts) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %}
{%= Controls(prefix, "", "", nil, nil, true) %}
@@ -243,7 +231,7 @@
{%code
g := ga.Group
var keys []string
alertsByRule := make(map[string][]*rule.ApiAlert)
alertsByRule := make(map[string][]*apiAlert)
for _, alert := range ga.Alerts {
if len(alertsByRule[alert.RuleID]) < 1 {
keys = append(keys, alert.RuleID)
@@ -252,14 +240,14 @@
}
sort.Strings(keys)
%}
<div class="w-100 flex-column vm-group alert-danger">
<div class="d-flex w-100 flex-column group-items alert-danger">
<span id="group-{%s g.ID %}" class="d-flex justify-content-between">
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %}</a>
<span
class="flex-grow-1 d-flex justify-content-end"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}"
data-bs-target="#sub-{%s g.ID %}"
>
<span class="badge bg-danger" title="Number of active alerts">{%d len(ga.Alerts) %}</span>
</span>
@@ -269,10 +257,10 @@
class="fs-6 text-start w-100 fw-lighter"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}"
data-bs-target="#sub-{%s g.ID %}"
>{%s g.File %}</span>
</span>
<div class="collapse" id="item-{%s g.ID %}">
<div class="collapse sub-items" id="sub-{%s g.ID %}">
{% for _, ruleID := range keys %}
{%code
defaultAR := alertsByRule[ruleID][0]
@@ -283,7 +271,7 @@
sort.Strings(labelKeys)
%}
<br>
<div class="vm-item">
<div class="sub-item">
<b>alert:</b> {%s defaultAR.Name %} ({%d len(alertsByRule[ruleID]) %})
| <span><a target="_blank" href="{%s defaultAR.SourceLink %}">Source</a></span>
<br>
@@ -348,20 +336,20 @@
typeK, ns := keys[i], targets[notifier.TargetType(keys[i])]
count := len(ns)
%}
<div class="w-100 flex-column vm-group">
<div class="d-flex w-100 flex-column group-items">
<span class="d-flex justify-content-between" id="group-{%s typeK %}">
<a href="#group-{%s typeK %}">{%s typeK %} ({%d count %})</a>
<span
class="flex-grow-1"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s typeK %}"
data-bs-target="#sub-{%s typeK %}"
></span>
</span>
<div id="item-{%s typeK %}" class="collapse show">
<div id="sub-{%s typeK %}" class="collapse show sub-items">
<table class="table table-striped table-hover table-sm">
<thead>
<tr class="vm-item">
<tr class="sub-item">
<th scope="col">Labels</th>
<th scope="col">Address</th>
</tr>
@@ -390,7 +378,7 @@
{%= tpl.Footer(r) %}
{% endfunc %}
{% func Alert(r *http.Request, alert *rule.ApiAlert) %}
{% func Alert(r *http.Request, alert *apiAlert) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
{%code
@@ -446,7 +434,7 @@
<div class="col">
{% for _, k := range annotationKeys %}
<b>{%s k %}:</b><br>
<p class="annotations">{%s alert.Annotations[k] %}</p>
<p>{%s alert.Annotations[k] %}</p>
{% endfor %}
</div>
</div>
@@ -476,7 +464,7 @@
{% endfunc %}
{% func Rule(r *http.Request, rule rule.ApiRule) %}
{% func RuleDetails(r *http.Request, rule apiRule) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
{%code
@@ -560,7 +548,7 @@
<div class="col">
{% for _, k := range annotationKeys %}
<b>{%s k %}:</b><br>
<p class="annotations">{%s rule.Annotations[k] %}</p>
<p>{%s rule.Annotations[k] %}</p>
{% endfor %}
</div>
</div>
@@ -605,11 +593,11 @@
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
<th scope="col" title="The time when the rule was executed">Updated at</th>
<th scope="col" title="The time when event was created">Updated at</th>
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
<th scope="col" class="text-center" title="The time used in execution query request">Execution timestamp</th>
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
</tr>
</thead>
@@ -661,8 +649,8 @@
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
{% endfunc %}
{% func seriesFetchedWarn(prefix string, r *rule.ApiRule) %}
{% if r.IsNoMatch() %}
{% func seriesFetchedWarn(prefix string, r apiRule) %}
{% if isNoMatch(r) %}
<svg
data-bs-toggle="tooltip"
title="No match! This rule's last evaluation hasn't selected any time series from the datasource.
@@ -673,3 +661,9 @@
</svg>
{% endif %}
{% endfunc %}
{%code
func isNoMatch (r apiRule) bool {
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
}
%}

File diff suppressed because it is too large Load Diff

View File

@@ -23,12 +23,8 @@ func TestHandler(t *testing.T) {
Timestamps: []int64{0},
})
m := &manager{groups: map[uint64]*rule.Group{}}
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
var ar *rule.AlertingRule
var rr *rule.RecordingRule
var groupIDs []uint64
for _, dsType := range []string{"prometheus", "", "graphite"} {
g := rule.NewGroup(config.Group{
Name: "group",
@@ -48,10 +44,8 @@ func TestHandler(t *testing.T) {
}, fq, 1*time.Minute, nil)
ar = g.Rules[0].(*rule.AlertingRule)
rr = g.Rules[1].(*rule.RecordingRule)
g.ExecOnce(context.Background(), nil, time.Time{})
id := g.CreateID()
m.groups[id] = g
groupIDs = append(groupIDs, id)
g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, nil, time.Time{})
m.groups[g.CreateID()] = g
}
rh := &requestHandler{m: m}
@@ -88,22 +82,22 @@ func TestHandler(t *testing.T) {
})
t.Run("/vmalert/rule", func(t *testing.T) {
a := ar.ToAPI()
a := ruleToAPI(ar)
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
r := rr.ToAPI()
r := ruleToAPI(rr)
getResp(t, ts.URL+"/vmalert/"+r.WebLink(), nil, 200)
})
t.Run("/vmalert/alert", func(t *testing.T) {
alerts := ar.AlertsToAPI()
alerts := ruleToAPIAlert(ar)
for _, a := range alerts {
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
}
})
t.Run("/vmalert/rule?badParam", func(t *testing.T) {
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamRuleID)
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramRuleID)
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamRuleID)
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramRuleID)
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
})
@@ -130,14 +124,14 @@ func TestHandler(t *testing.T) {
}
})
t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) {
expAlert := rule.NewAlertAPI(ar, ar.GetAlerts()[0])
alert := &rule.ApiAlert{}
expAlert := newAlertAPI(ar, ar.GetAlerts()[0])
alert := &apiAlert{}
getResp(t, ts.URL+"/"+expAlert.APILink(), alert, 200)
if !reflect.DeepEqual(alert, expAlert) {
t.Fatalf("expected %v is equal to %v", alert, expAlert)
}
alert = &rule.ApiAlert{}
alert = &apiAlert{}
getResp(t, ts.URL+"/vmalert/"+expAlert.APILink(), alert, 200)
if !reflect.DeepEqual(alert, expAlert) {
t.Fatalf("expected %v is equal to %v", alert, expAlert)
@@ -145,16 +139,16 @@ func TestHandler(t *testing.T) {
})
t.Run("/api/v1/alert?badParams", func(t *testing.T) {
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamAlertID)
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramAlertID)
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamAlertID)
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramAlertID)
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
// bad request, alertID is missing
params = fmt.Sprintf("?%s=1", rule.ParamGroupID)
params = fmt.Sprintf("?%s=1", paramGroupID)
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 400)
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 400)
})
@@ -173,44 +167,29 @@ func TestHandler(t *testing.T) {
}
})
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
expRule := ar.ToAPI()
gotRule := rule.ApiRule{}
expRule := ruleToAPI(ar)
gotRule := apiRule{}
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRule, 200)
if expRule.ID != gotRule.ID {
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
}
gotRule = rule.ApiRule{}
gotRule = apiRule{}
getResp(t, ts.URL+"/vmalert/"+expRule.APILink(), &gotRule, 200)
if expRule.ID != gotRule.ID {
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
}
gotRuleWithUpdates := rule.ApiRuleWithUpdates{}
gotRuleWithUpdates := apiRuleWithUpdates{}
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRuleWithUpdates, 200)
if len(gotRuleWithUpdates.StateUpdates) < 1 {
t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates)
}
})
t.Run("/api/v1/group?groupID", func(t *testing.T) {
id := groupIDs[0]
g := m.groups[id]
expGroup := g.ToAPI()
gotGroup := rule.ApiGroup{}
getResp(t, ts.URL+"/"+expGroup.APILink(), &gotGroup, 200)
if expGroup.ID != gotGroup.ID {
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
}
gotGroup = rule.ApiGroup{}
getResp(t, ts.URL+"/vmalert/"+expGroup.APILink(), &gotGroup, 200)
if expGroup.ID != gotGroup.ID {
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
}
})
t.Run("/api/v1/rules&states", func(t *testing.T) {
t.Run("/api/v1/rules&filters", func(t *testing.T) {
check := func(url string, statusCode, expGroups, expRules int) {
t.Helper()
lr := listGroupsResponse{}
@@ -252,15 +231,9 @@ func TestHandler(t *testing.T) {
check("/api/v1/rules?rule_group[]=group&file[]=foo", 200, 0, 0)
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml", 200, 3, 6)
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 0, 0)
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 3, 0)
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert", 200, 3, 3)
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert&rule_name[]=record", 200, 3, 6)
check("/api/v1/rules?group_limit=1", 200, 1, 2)
check("/api/v1/rules?group_limit=1&type=alert", 200, 1, 1)
check("/api/v1/rules?group_limit=1&type=record", 200, 1, 1)
check("/api/v1/rules?group_limit=2", 200, 2, 4)
check(fmt.Sprintf("/api/v1/rules?group_limit=1&page_num=%d", 1), 200, 1, 2)
})
t.Run("/api/v1/rules&exclude_alerts=true", func(t *testing.T) {
// check if response returns active alerts by default

View File

@@ -1,4 +1,4 @@
package rule
package main
import (
"fmt"
@@ -8,28 +8,79 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
)
const (
// ParamGroupID is group id key in url parameter
ParamGroupID = "group_id"
paramGroupID = "group_id"
// ParamAlertID is alert id key in url parameter
ParamAlertID = "alert_id"
paramAlertID = "alert_id"
// ParamRuleID is rule id key in url parameter
ParamRuleID = "rule_id"
// TypeRecording is a RecordingRule type
TypeRecording = "recording"
// TypeAlerting is an AlertingRule type
TypeAlerting = "alerting"
paramRuleID = "rule_id"
)
// ApiGroup represents a Group for web view
type ApiGroup struct {
type apiNotifier struct {
Kind string `json:"kind"`
Targets []*apiTarget `json:"targets"`
}
type apiTarget struct {
Address string `json:"address"`
Labels map[string]string `json:"labels"`
}
// apiAlert represents a notifier.AlertingRule state
// for WEB view
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type apiAlert struct {
State string `json:"state"`
Name string `json:"name"`
Value string `json:"value"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations"`
ActiveAt time.Time `json:"activeAt"`
// Additional fields
// ID is an unique Alert's ID within a group
ID string `json:"id"`
// RuleID is an unique Rule's ID within a group
RuleID string `json:"rule_id"`
// GroupID is an unique Group's ID
GroupID string `json:"group_id"`
// Expression contains the PromQL/MetricsQL expression
// for Rule's evaluation
Expression string `json:"expression"`
// SourceLink contains a link to a system which should show
// why Alert was generated
SourceLink string `json:"source"`
// Restored shows whether Alert's state was restored on restart
Restored bool `json:"restored"`
// Stabilizing shows when firing state is kept because of
// `keep_firing_for` instead of real alert
Stabilizing bool `json:"stabilizing"`
}
// WebLink returns a link to the alert which can be used in UI.
func (aa *apiAlert) WebLink() string {
return fmt.Sprintf("alert?%s=%s&%s=%s",
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
}
// APILink returns a link to the alert's JSON representation.
func (aa *apiAlert) APILink() string {
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
}
// apiGroup represents Group for web view
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type apiGroup struct {
// Name is the group name as present in the config
Name string `json:"name"`
// Rules contains both recording and alerting rules
Rules []ApiRule `json:"rules"`
Rules []apiRule `json:"rules"`
// Interval is the Group's evaluation interval in float seconds as present in the file.
Interval float64 `json:"interval"`
// LastEvaluation is the timestamp of the last time the Group was executed
@@ -57,24 +108,23 @@ type ApiGroup struct {
EvalOffset float64 `json:"eval_offset,omitempty"`
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
EvalDelay float64 `json:"eval_delay,omitempty"`
// States represents counts per each rule state
States map[string]int `json:"states"`
// Unhealthy unhealthy rules count
Unhealthy int
// Healthy passing rules count
Healthy int
// NoMatch not matching rules count
NoMatch int
}
// APILink returns a link to the group's JSON representation.
func (ag *ApiGroup) APILink() string {
return fmt.Sprintf("api/v1/group?%s=%s", ParamGroupID, ag.ID)
// groupAlerts represents a group of alerts for WEB view
type groupAlerts struct {
Group *apiGroup
Alerts []*apiAlert
}
// GroupAlerts represents a Group with its Alerts for web view
type GroupAlerts struct {
Group *ApiGroup
Alerts []*ApiAlert
}
// ApiRule represents a Rule for web view
// apiRule represents a Rule for web view
// see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type ApiRule struct {
type apiRule struct {
// State must be one of these under following scenarios
// "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState.
// "firing": at least 1 alert in the rule in firing state.
@@ -96,7 +146,7 @@ type ApiRule struct {
// LastEvaluation is the timestamp of the last time the rule was executed
LastEvaluation time.Time `json:"lastEvaluation"`
// Alerts is the list of all the alerts in this rule that are currently pending or firing
Alerts []*ApiAlert `json:"alerts,omitempty"`
Alerts []*apiAlert `json:"alerts,omitempty"`
// Health is the health of rule evaluation.
// It MUST be one of "ok", "err", "unknown"
Health string `json:"health"`
@@ -127,92 +177,143 @@ type ApiRule struct {
// MaxUpdates is the max number of recorded ruleStateEntry objects
MaxUpdates int `json:"max_updates_entries"`
// Updates contains the ordered list of recorded ruleStateEntry objects
Updates []StateEntry `json:"-"`
Updates []rule.StateEntry `json:"-"`
}
// IsNoMatch returns true if rule is in nomatch state
func (r *ApiRule) IsNoMatch() bool {
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
}
// ApiAlert represents a notifier.AlertingRule state
// for WEB view
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type ApiAlert struct {
State string `json:"state"`
Name string `json:"name"`
Value string `json:"value"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations"`
ActiveAt time.Time `json:"activeAt"`
// Additional fields
// ID is an unique Alert's ID within a group
ID string `json:"id"`
// RuleID is an unique Rule's ID within a group
RuleID string `json:"rule_id"`
// GroupID is an unique Group's ID
GroupID string `json:"group_id"`
// Expression contains the PromQL/MetricsQL expression
// for Rule's evaluation
Expression string `json:"expression"`
// SourceLink contains a link to a system which should show
// why Alert was generated
SourceLink string `json:"source"`
// Restored shows whether Alert's state was restored on restart
Restored bool `json:"restored"`
// Stabilizing shows when firing state is kept because of
// `keep_firing_for` instead of real alert
Stabilizing bool `json:"stabilizing"`
}
// WebLink returns a link to the alert which can be used in UI.
func (aa *ApiAlert) WebLink() string {
return fmt.Sprintf("alert?%s=%s&%s=%s",
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
}
// APILink returns a link to the alert's JSON representation.
func (aa *ApiAlert) APILink() string {
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
}
// ApiRuleWithUpdates represents ApiRule but with extra fields for marshalling
type ApiRuleWithUpdates struct {
ApiRule
// apiRuleWithUpdates represents apiRule but with extra fields for marshalling
type apiRuleWithUpdates struct {
apiRule
// Updates contains the ordered list of recorded ruleStateEntry objects
StateUpdates []StateEntry `json:"updates,omitempty"`
StateUpdates []rule.StateEntry `json:"updates,omitempty"`
}
// APILink returns a link to the rule's JSON representation.
func (ar ApiRule) APILink() string {
func (ar apiRule) APILink() string {
return fmt.Sprintf("api/v1/rule?%s=%s&%s=%s",
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID)
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
}
// WebLink returns a link to the alert which can be used in UI.
func (ar ApiRule) WebLink() string {
func (ar apiRule) WebLink() string {
return fmt.Sprintf("rule?%s=%s&%s=%s",
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID)
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
}
// AlertsToAPI returns list of ApiAlert objects from existing alerts
func (ar *AlertingRule) AlertsToAPI() []*ApiAlert {
var alerts []*ApiAlert
func ruleToAPI(r any) apiRule {
if ar, ok := r.(*rule.AlertingRule); ok {
return alertingToAPI(ar)
}
if rr, ok := r.(*rule.RecordingRule); ok {
return recordingToAPI(rr)
}
return apiRule{}
}
const (
ruleTypeRecording = "recording"
ruleTypeAlerting = "alerting"
)
func recordingToAPI(rr *rule.RecordingRule) apiRule {
lastState := rule.GetLastEntry(rr)
r := apiRule{
Type: ruleTypeRecording,
DatasourceType: rr.Type.String(),
Name: rr.Name,
Query: rr.Expr,
Labels: rr.Labels,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: rule.GetRuleStateSize(rr),
Updates: rule.GetAllRuleState(rr),
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", rr.ID()),
GroupID: fmt.Sprintf("%d", rr.GroupID),
GroupName: rr.GroupName,
File: rr.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
return r
}
// alertingToAPI returns Rule representation in form of apiRule
func alertingToAPI(ar *rule.AlertingRule) apiRule {
lastState := rule.GetLastEntry(ar)
r := apiRule{
Type: ruleTypeAlerting,
DatasourceType: ar.Type.String(),
Name: ar.Name,
Query: ar.Expr,
Duration: ar.For.Seconds(),
KeepFiringFor: ar.KeepFiringFor.Seconds(),
Labels: ar.Labels,
Annotations: ar.Annotations,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
State: "inactive",
Alerts: ruleToAPIAlert(ar),
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: rule.GetRuleStateSize(ar),
Updates: rule.GetAllRuleState(ar),
Debug: ar.Debug,
// encode as strings to avoid rounding in JSON
ID: fmt.Sprintf("%d", ar.ID()),
GroupID: fmt.Sprintf("%d", ar.GroupID),
GroupName: ar.GroupName,
File: ar.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
// satisfy apiRule.State logic
if len(r.Alerts) > 0 {
r.State = notifier.StatePending.String()
stateFiring := notifier.StateFiring.String()
for _, a := range r.Alerts {
if a.State == stateFiring {
r.State = stateFiring
break
}
}
}
return r
}
// ruleToAPIAlert generates list of apiAlert objects from existing alerts
func ruleToAPIAlert(ar *rule.AlertingRule) []*apiAlert {
var alerts []*apiAlert
for _, a := range ar.GetAlerts() {
if a.State == notifier.StateInactive {
continue
}
alerts = append(alerts, NewAlertAPI(ar, a))
alerts = append(alerts, newAlertAPI(ar, a))
}
return alerts
}
// alertToAPI generates apiAlert object from alert by its id(hash)
func alertToAPI(ar *rule.AlertingRule, id uint64) *apiAlert {
a := ar.GetAlert(id)
if a == nil {
return nil
}
return newAlertAPI(ar, a)
}
// NewAlertAPI creates apiAlert for notifier.Alert
func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
aa := &ApiAlert{
func newAlertAPI(ar *rule.AlertingRule, a *notifier.Alert) *apiAlert {
aa := &apiAlert{
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", a.ID),
GroupID: fmt.Sprintf("%d", a.GroupID),
@@ -227,8 +328,8 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
Restored: a.Restored,
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
}
if notifier.AlertURLGeneratorFn != nil {
aa.SourceLink = notifier.AlertURLGeneratorFn(*a)
if alertURLGeneratorFn != nil {
aa.SourceLink = alertURLGeneratorFn(*a)
}
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
aa.Stabilizing = true
@@ -236,25 +337,9 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
return aa
}
func (r *ApiRule) ExtendState() {
if len(r.Alerts) > 0 {
return
}
if r.State == "" {
r.State = "ok"
}
if r.Health != "ok" {
r.State = "unhealthy"
} else if r.IsNoMatch() {
r.State = "nomatch"
}
}
// ToAPI returns ApiGroup representation of g
func (g *Group) ToAPI() *ApiGroup {
g.mu.RLock()
defer g.mu.RUnlock()
ag := ApiGroup{
func groupToAPI(g *rule.Group) *apiGroup {
g = g.DeepCopy()
ag := apiGroup{
// encode as string to avoid rounding
ID: strconv.FormatUint(g.GetID(), 10),
Name: g.Name,
@@ -267,7 +352,6 @@ func (g *Group) ToAPI() *ApiGroup {
Headers: headersToStrings(g.Headers),
NotifierHeaders: headersToStrings(g.NotifierHeaders),
Labels: g.Labels,
States: make(map[string]int),
}
if g.EvalOffset != nil {
ag.EvalOffset = g.EvalOffset.Seconds()
@@ -275,10 +359,9 @@ func (g *Group) ToAPI() *ApiGroup {
if g.EvalDelay != nil {
ag.EvalDelay = g.EvalDelay.Seconds()
}
ag.Rules = make([]ApiRule, 0, len(g.Rules))
ag.Rules = make([]apiRule, 0)
for _, r := range g.Rules {
ar := r.ToAPI()
ag.Rules = append(ag.Rules, ar)
ag.Rules = append(ag.Rules, ruleToAPI(r))
}
return &ag
}

View File

@@ -1,4 +1,4 @@
package rule
package main
import (
"fmt"
@@ -8,6 +8,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
)
func TestRecordingToApi(t *testing.T) {
@@ -16,7 +17,7 @@ func TestRecordingToApi(t *testing.T) {
Values: []float64{1}, Timestamps: []int64{0},
})
entriesLimit := 44
g := NewGroup(config.Group{
g := rule.NewGroup(config.Group{
Name: "group",
File: "rules.yaml",
Concurrency: 1,
@@ -30,24 +31,24 @@ func TestRecordingToApi(t *testing.T) {
},
},
}, fq, 1*time.Minute, nil)
rr := g.Rules[0].(*RecordingRule)
rr := g.Rules[0].(*rule.RecordingRule)
expectedRes := ApiRule{
expectedRes := apiRule{
Name: "record_name",
Query: "up",
Labels: map[string]string{"label": "value"},
Health: "ok",
Type: TypeRecording,
Type: ruleTypeRecording,
DatasourceType: "prometheus",
ID: "1248",
GroupID: fmt.Sprintf("%d", g.CreateID()),
GroupName: "group",
File: "rules.yaml",
MaxUpdates: 44,
Updates: make([]StateEntry, 0),
Updates: make([]rule.StateEntry, 0),
}
res := rr.ToAPI()
res := recordingToAPI(rr)
if !reflect.DeepEqual(res, expectedRes) {
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expectedRes, res)

View File

@@ -27,9 +27,6 @@ vmauth-linux-ppc64le-prod:
vmauth-linux-386-prod:
APP_NAME=vmauth $(MAKE) app-via-docker-linux-386
vmauth-linux-s390x-prod:
APP_NAME=vmauth $(MAKE) app-via-docker-linux-s390x
vmauth-darwin-amd64-prod:
APP_NAME=vmauth $(MAKE) app-via-docker-darwin-amd64

View File

@@ -4,7 +4,6 @@ import (
"bytes"
"context"
"encoding/base64"
"errors"
"flag"
"fmt"
"math"
@@ -13,7 +12,6 @@ import (
"net/url"
"os"
"regexp"
"slices"
"sort"
"strconv"
"strings"
@@ -29,7 +27,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fscore"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
@@ -44,9 +41,6 @@ var (
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing for details")
defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+
"Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing")
defaultMergeQueryArgs = flagutil.NewArrayString("mergeQueryArgs", "An optional list of client query arg names, which must be merged with args at backend urls. "+
"The rest of client query args are replaced by the corresponding query args from backend urls for security reasons; "+
"see https://docs.victoriametrics.com/victoriametrics/vmauth/#query-args-handling")
discoverBackendIPsGlobal = flag.Bool("discoverBackendIPs", false, "Whether to discover backend IPs via periodic DNS queries to hostnames specified in url_prefix. "+
"This may be useful when url_prefix points to a hostname with dynamically scaled instances behind it. See https://docs.victoriametrics.com/victoriametrics/vmauth/#discovering-backend-ips")
discoverBackendIPsInterval = flag.Duration("discoverBackendIPsInterval", 10*time.Second, "The interval for re-discovering backend IPs if -discoverBackendIPs command-line flag is set. "+
@@ -67,11 +61,10 @@ type AuthConfig struct {
type UserInfo struct {
Name string `yaml:"name,omitempty"`
BearerToken string `yaml:"bearer_token,omitempty"`
JWT *JWTConfig `yaml:"jwt,omitempty"`
AuthToken string `yaml:"auth_token,omitempty"`
Username string `yaml:"username,omitempty"`
Password string `yaml:"password,omitempty"`
BearerToken string `yaml:"bearer_token,omitempty"`
AuthToken string `yaml:"auth_token,omitempty"`
Username string `yaml:"username,omitempty"`
Password string `yaml:"password,omitempty"`
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
DiscoverBackendIPs *bool `yaml:"discover_backend_ips,omitempty"`
@@ -82,7 +75,6 @@ type UserInfo struct {
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
TLSCAFile string `yaml:"tls_ca_file,omitempty"`
TLSCertFile string `yaml:"tls_cert_file,omitempty"`
@@ -92,79 +84,30 @@ type UserInfo struct {
MetricLabels map[string]string `yaml:"metric_labels,omitempty"`
AccessLog *AccessLog `yaml:"access_log,omitempty"`
concurrencyLimitCh chan struct{}
concurrencyLimitReached *metrics.Counter
rt http.RoundTripper
requests *metrics.Counter
requestErrors *metrics.Counter
backendRequests *metrics.Counter
backendErrors *metrics.Counter
requestsDuration *metrics.Summary
}
// AccessLog represents configuration for access log settings.
type AccessLog struct {
Filters *AccessLogFilters `yaml:"filters"`
}
// AccessLogFilters represents list of filters for access logs printing
type AccessLogFilters struct {
// SkipStatusCodes is a list of HTTP status codes for which access logs will be skipped
SkipStatusCodes []int `yaml:"skip_status_codes"`
}
func (ui *UserInfo) logRequest(r *http.Request, userName string, statusCode int, duration time.Duration) {
if ui.AccessLog == nil {
return
}
filters := ui.AccessLog.Filters
if filters != nil && len(filters.SkipStatusCodes) > 0 {
if slices.Contains(filters.SkipStatusCodes, statusCode) {
return
}
}
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Infof("access_log request_host=%q request_uri=%q status_code=%d remote_addr=%s user_agent=%q referer=%q duration_ms=%d username=%q",
r.Host, requestURI, statusCode, remoteAddr, r.UserAgent(), r.Referer(), duration.Milliseconds(), userName)
}
// HeadersConf represents config for request and response headers.
type HeadersConf struct {
RequestHeaders []*Header `yaml:"headers,omitempty"`
ResponseHeaders []*Header `yaml:"response_headers,omitempty"`
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
hasAnyPlaceHolders bool
RequestHeaders []*Header `yaml:"headers,omitempty"`
ResponseHeaders []*Header `yaml:"response_headers,omitempty"`
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
}
func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
func (ui *UserInfo) beginConcurrencyLimit() error {
select {
case ui.concurrencyLimitCh <- struct{}{}:
return nil
default:
// The number of concurrently executed requests for the given user equals the limit.
// Wait until some of the currently executed requests are finished, so the current request could be executed.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
select {
case ui.concurrencyLimitCh <- struct{}{}:
return nil
case <-ctx.Done():
err := ctx.Err()
if errors.Is(err, context.DeadlineExceeded) {
// The current request couldn't be executed until the request timeout.
ui.concurrencyLimitReached.Inc()
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because %d concurrent requests from the user %s are executed",
*maxQueueDuration, ui.getMaxConcurrentRequests(), ui.name())
}
return fmt.Errorf("cannot start executing the request because %d concurrent requests from the user %s are executed: %w",
ui.getMaxConcurrentRequests(), ui.name(), err)
}
ui.concurrencyLimitReached.Inc()
return fmt.Errorf("cannot handle more than %d concurrent requests from user %s", ui.getMaxConcurrentRequests(), ui.name())
}
}
@@ -180,28 +123,6 @@ func (ui *UserInfo) getMaxConcurrentRequests() int {
return mcr
}
func (ui *UserInfo) stopHealthChecks() {
if ui == nil {
return
}
if ui.URLPrefix != nil {
bus := ui.URLPrefix.bus.Load()
bus.stopHealthChecks()
}
if ui.DefaultURL != nil {
bus := ui.DefaultURL.bus.Load()
bus.stopHealthChecks()
}
for i := range ui.URLMaps {
um := &ui.URLMaps[i]
if um.URLPrefix != nil {
bus := um.URLPrefix.bus.Load()
bus.stopHealthChecks()
}
}
}
// Header is `Name: Value` http header, which must be added to the proxied request.
type Header struct {
Name string
@@ -261,11 +182,6 @@ type URLMap struct {
// LoadBalancingPolicy is load balancing policy among UrlPrefix backends.
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
// MergeQueryArgs is a list of client query args, which must be merged with the existing backend query args.
//
// The rest of client query args are replaced with the corresponding backend query args for security reasons.
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
// DropSrcPathPrefixParts is the number of `/`-delimited request path prefix parts to drop before proxying the request to backend.
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
}
@@ -312,7 +228,7 @@ func (qa *QueryArg) MarshalYAML() (any, error) {
return qa.sOriginal, nil
}
// URLPrefix represents the `url_prefix` from auth config.
// URLPrefix represents passed `url_prefix`
type URLPrefix struct {
// requests are re-tried on other backend urls for these http response status codes
retryStatusCodes []int
@@ -320,11 +236,6 @@ type URLPrefix struct {
// load balancing policy used
loadBalancingPolicy string
// the list of client query args, which must be merged with backend query args.
//
// By default backend query args replace all the client query args for security reasons.
mergeQueryArgs []string
// how many request path prefix parts to drop before routing the request to backendURL
dropSrcPathPrefixParts int
@@ -337,7 +248,7 @@ type URLPrefix struct {
// the list of backend urls
//
// the list can be dynamically updated if `discover_backend_ips` option is set.
bus atomic.Pointer[backendURLs]
bus atomic.Pointer[[]*backendURL]
// if this option is set, then backend ips for busOriginal are periodically re-discovered and put to bus.
discoverBackendIPs bool
@@ -361,94 +272,21 @@ func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
}
}
type backendURLs struct {
healthChecksContext context.Context
healthChecksCancel func()
healthChecksWG sync.WaitGroup
bus []*backendURL
}
func newBackendURLs() *backendURLs {
ctx, cancel := context.WithCancel(context.Background())
return &backendURLs{
healthChecksContext: ctx,
healthChecksCancel: cancel,
}
}
func (bus *backendURLs) add(u *url.URL) {
bus.bus = append(bus.bus, &backendURL{
url: u,
healthCheckContext: bus.healthChecksContext,
healthCheckWG: &bus.healthChecksWG,
hasPlaceHolders: hasAnyPlaceholders(u),
})
}
func (bus *backendURLs) stopHealthChecks() {
bus.healthChecksCancel()
bus.healthChecksWG.Wait()
}
type backendURL struct {
broken atomic.Bool
healthCheckContext context.Context
healthCheckWG *sync.WaitGroup
brokenDeadline atomic.Uint64
concurrentRequests atomic.Int32
url *url.URL
hasPlaceHolders bool
}
func (bu *backendURL) isBroken() bool {
return bu.broken.Load()
ct := fasttime.UnixTimestamp()
return ct < bu.brokenDeadline.Load()
}
func (bu *backendURL) setBroken() {
if bu.broken.CompareAndSwap(false, true) {
bu.healthCheckWG.Go(func() {
bu.runHealthCheck()
bu.broken.Store(false)
})
}
}
func (bu *backendURL) runHealthCheck() {
port := bu.url.Port()
if port == "" {
port = "80"
}
addr := net.JoinHostPort(bu.url.Hostname(), port)
t := time.NewTicker(*failTimeout)
defer t.Stop()
for {
select {
case <-t.C:
// Verify network connectivity via TCP dial before marking backend healthy.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997
ctx, cancel := context.WithTimeout(bu.healthCheckContext, time.Second)
c, err := netutil.Dialer.DialContext(ctx, "tcp", addr)
cancel()
if err != nil {
if errors.Is(bu.healthCheckContext.Err(), context.Canceled) {
return
}
logger.Warnf("ignoring the backend at %s for %s because of dial error: %s", addr, *failTimeout, err)
continue
}
_ = c.Close()
return
case <-bu.healthCheckContext.Done():
return
}
}
deadline := fasttime.UnixTimestamp() + uint64((*failTimeout).Seconds())
bu.brokenDeadline.Store(deadline)
}
func (bu *backendURL) get() {
@@ -460,8 +298,8 @@ func (bu *backendURL) put() {
}
func (up *URLPrefix) getBackendsCount() int {
bus := up.bus.Load()
return len(bus.bus)
pbus := up.bus.Load()
return len(*pbus)
}
// getBackendURL returns the backendURL depending on the load balance policy.
@@ -472,15 +310,16 @@ func (up *URLPrefix) getBackendsCount() int {
func (up *URLPrefix) getBackendURL() *backendURL {
up.discoverBackendAddrsIfNeeded()
bus := up.bus.Load()
if len(bus.bus) == 0 {
pbus := up.bus.Load()
bus := *pbus
if len(bus) == 0 {
return nil
}
if up.loadBalancingPolicy == "first_available" {
return getFirstAvailableBackendURL(bus.bus)
return getFirstAvailableBackendURL(bus)
}
return getLeastLoadedBackendURL(bus.bus, &up.n)
return getLeastLoadedBackendURL(bus, &up.n)
}
func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
@@ -554,24 +393,25 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
cancel()
// generate new backendURLs for the resolved IPs
busNew := newBackendURLs()
var busNew []*backendURL
for _, bu := range up.busOriginal {
host := bu.Hostname()
for _, addr := range hostToAddrs[host] {
buCopy := *bu
buCopy.Host = addr
busNew.add(&buCopy)
busNew = append(busNew, &backendURL{
url: &buCopy,
})
}
}
bus := up.bus.Load()
if areEqualBackendURLs(bus.bus, busNew.bus) {
pbus := up.bus.Load()
if areEqualBackendURLs(*pbus, busNew) {
return
}
// Store new backend urls
up.bus.Store(busNew)
bus.stopHealthChecks()
up.bus.Store(&busNew)
}
func areEqualBackendURLs(a, b []*backendURL) bool {
@@ -602,66 +442,53 @@ func getFirstAvailableBackendURL(bus []*backendURL) *backendURL {
for i := 1; i < len(bus); i++ {
if !bus[i].isBroken() {
bu = bus[i]
bu.get()
return bu
break
}
}
return nil
bu.get()
return bu
}
// getLeastLoadedBackendURL returns a non-broken backendURL with the lowest number of concurrent requests.
// getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
//
// backendURL.put() must be called on the returned backendURL after the request is complete.
func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL {
if len(bus) == 1 {
// Fast path - return the only backend url.
bu := bus[0]
if bu.isBroken() {
return nil
}
bu.get()
return bu
}
// Slow path - select other backend urls.
n := atomicCounter.Add(1) - 1
for i := range uint32(len(bus)) {
for i := uint32(0); i < uint32(len(bus)); i++ {
idx := (n + i) % uint32(len(bus))
bu := bus[idx]
if bu.isBroken() {
continue
}
// The Load() in front of CompareAndSwap() avoids CAS overhead for items with values bigger than 0.
if bu.concurrentRequests.Load() == 0 && bu.concurrentRequests.CompareAndSwap(0, 1) {
atomicCounter.CompareAndSwap(n+1, idx+1)
// There is no need in the call bu.get(), because we already incremented bu.concurrentRequests above.
if bu.concurrentRequests.Load() == 0 {
// Fast path - return the backend with zero concurrently executed requests.
// Do not use CompareAndSwap() instead of Load(), since it is much slower on systems with many CPU cores.
bu.concurrentRequests.Add(1)
return bu
}
}
// Slow path - return the backend with the minimum number of concurrently executed requests.
buMinIdx := n % uint32(len(bus))
minRequests := bus[buMinIdx].concurrentRequests.Load()
for i := uint32(1); i < uint32(len(bus)); i++ {
idx := (n + i) % uint32(len(bus))
bu := bus[idx]
buMin := bus[n%uint32(len(bus))]
minRequests := buMin.concurrentRequests.Load()
for _, bu := range bus {
if bu.isBroken() {
continue
}
reqs := bu.concurrentRequests.Load()
if reqs < minRequests || bus[buMinIdx].isBroken() {
buMinIdx = idx
minRequests = reqs
if n := bu.concurrentRequests.Load(); n < minRequests || buMin.isBroken() {
buMin = bu
minRequests = n
}
}
buMin := bus[buMinIdx]
if buMin.isBroken() {
return nil
}
buMin.get()
atomicCounter.CompareAndSwap(n+1, buMinIdx+1)
return buMin
}
@@ -778,9 +605,11 @@ func initAuthConfig() {
configTimestamp.Set(fasttime.UnixTimestamp())
stopCh = make(chan struct{})
authConfigWG.Go(func() {
authConfigWG.Add(1)
go func() {
defer authConfigWG.Done()
authConfigReloader(sighupCh)
})
}()
}
func stopAuthConfig() {
@@ -836,9 +665,6 @@ var (
// authUsers contains the currently loaded auth users
authUsers atomic.Pointer[map[string]*UserInfo]
// jwt authentication cache
jwtAuthCache atomic.Pointer[jwtCache]
authConfigWG sync.WaitGroup
stopCh chan struct{}
)
@@ -855,7 +681,7 @@ func reloadAuthConfig() (bool, error) {
ok, err := reloadAuthConfigData(data)
if err != nil {
return false, fmt.Errorf("failed to parse -auth.config=%q: %w", *authConfigPath, err)
return false, fmt.Errorf("failed to pars -auth.config=%q: %w", *authConfigPath, err)
}
if !ok {
return false, nil
@@ -878,16 +704,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
return false, fmt.Errorf("failed to parse auth config: %w", err)
}
jui, oidcDP, err := parseJWTUsers(ac)
if err != nil {
return false, fmt.Errorf("failed to parse JWT users from auth config: %w", err)
}
oidcDP.startDiscovery()
jwtc := &jwtCache{
users: jui,
oidcDP: oidcDP,
}
m, err := parseAuthConfigUsers(ac)
if err != nil {
return false, fmt.Errorf("failed to parse users from auth config: %w", err)
@@ -895,24 +711,13 @@ func reloadAuthConfigData(data []byte) (bool, error) {
acPrev := authConfig.Load()
if acPrev != nil {
acPrev.UnauthorizedUser.stopHealthChecks()
for i := range acPrev.Users {
acPrev.Users[i].stopHealthChecks()
}
metrics.UnregisterSet(acPrev.ms, true)
}
metrics.RegisterSet(ac.ms)
jwtcPrev := jwtAuthCache.Load()
if jwtcPrev != nil {
jwtcPrev.oidcDP.stopDiscovery()
}
authConfig.Store(ac)
authConfigData.Store(&data)
authUsers.Store(&m)
jwtAuthCache.Store(jwtc)
return true, nil
}
@@ -937,18 +742,12 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
if ui.BearerToken != "" {
return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section")
}
if ui.JWT != nil {
return nil, fmt.Errorf("field jwt can't be specified for unauthorized_user section")
}
if ui.AuthToken != "" {
return nil, fmt.Errorf("field auth_token can't be specified for unauthorized_user section")
}
if ui.Name != "" {
return nil, fmt.Errorf("field name can't be specified for unauthorized_user section")
}
if err := parseJWTPlaceholdersForUserInfo(ui, false); err != nil {
return nil, err
}
if err := ui.initURLs(); err != nil {
return nil, err
}
@@ -958,8 +757,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
}
ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
ui.requestErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_errors_total` + metricLabels)
ui.backendRequests = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_requests_total` + metricLabels)
ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
@@ -989,27 +786,16 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
}
for i := range uis {
ui := &uis[i]
// users with jwt tokens are parsed by parseJWTUsers function.
// the function also checks that users with jwt tokens do not have auth tokens, bearer tokens, usernames and passwords.
if ui.JWT != nil {
continue
}
ats, err := getAuthTokens(ui.AuthToken, ui.BearerToken, ui.Username, ui.Password)
if err != nil {
return nil, err
}
for _, at := range ats {
if uiOld := byAuthToken[at]; uiOld != nil {
return nil, fmt.Errorf("duplicate auth token=%q found for username=%q, name=%q; the previous one is set for username=%q, name=%q",
at, ui.Username, ui.Name, uiOld.Username, uiOld.Name)
}
}
if err := parseJWTPlaceholdersForUserInfo(ui, false); err != nil {
return nil, err
}
if err := ui.initURLs(); err != nil {
return nil, err
}
@@ -1019,8 +805,6 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
}
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
mcr := ui.getMaxConcurrentRequests()
@@ -1072,7 +856,6 @@ func (ui *UserInfo) getMetricLabels() (string, error) {
func (ui *UserInfo) initURLs() error {
retryStatusCodes := defaultRetryStatusCodes.Values()
loadBalancingPolicy := *defaultLoadBalancingPolicy
mergeQueryArgs := *defaultMergeQueryArgs
dropSrcPathPrefixParts := 0
discoverBackendIPs := *discoverBackendIPsGlobal
if ui.RetryStatusCodes != nil {
@@ -1081,9 +864,6 @@ func (ui *UserInfo) initURLs() error {
if ui.LoadBalancingPolicy != "" {
loadBalancingPolicy = ui.LoadBalancingPolicy
}
if len(ui.MergeQueryArgs) != 0 {
mergeQueryArgs = ui.MergeQueryArgs
}
if ui.DropSrcPathPrefixParts != nil {
dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts
}
@@ -1091,25 +871,22 @@ func (ui *UserInfo) initURLs() error {
discoverBackendIPs = *ui.DiscoverBackendIPs
}
up := ui.URLPrefix
if up != nil {
if err := up.sanitizeAndInitialize(); err != nil {
if ui.URLPrefix != nil {
if err := ui.URLPrefix.sanitizeAndInitialize(); err != nil {
return err
}
up.retryStatusCodes = retryStatusCodes
up.dropSrcPathPrefixParts = dropSrcPathPrefixParts
up.discoverBackendIPs = discoverBackendIPs
if err := up.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
ui.URLPrefix.retryStatusCodes = retryStatusCodes
ui.URLPrefix.dropSrcPathPrefixParts = dropSrcPathPrefixParts
ui.URLPrefix.discoverBackendIPs = discoverBackendIPs
if err := ui.URLPrefix.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
return err
}
up.mergeQueryArgs = mergeQueryArgs
}
if ui.DefaultURL != nil {
if err := ui.DefaultURL.sanitizeAndInitialize(); err != nil {
return err
}
}
for _, e := range ui.URLMaps {
if len(e.SrcPaths) == 0 && len(e.SrcHosts) == 0 && len(e.SrcQueryArgs) == 0 && len(e.SrcHeaders) == 0 {
return fmt.Errorf("missing `src_paths`, `src_hosts`, `src_query_args` and `src_headers` in `url_map`")
@@ -1122,7 +899,6 @@ func (ui *UserInfo) initURLs() error {
}
rscs := retryStatusCodes
lbp := loadBalancingPolicy
mqa := mergeQueryArgs
dsp := dropSrcPathPrefixParts
dbd := discoverBackendIPs
if e.RetryStatusCodes != nil {
@@ -1131,9 +907,6 @@ func (ui *UserInfo) initURLs() error {
if e.LoadBalancingPolicy != "" {
lbp = e.LoadBalancingPolicy
}
if len(e.MergeQueryArgs) != 0 {
mqa = e.MergeQueryArgs
}
if e.DropSrcPathPrefixParts != nil {
dsp = *e.DropSrcPathPrefixParts
}
@@ -1144,7 +917,6 @@ func (ui *UserInfo) initURLs() error {
if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil {
return err
}
e.URLPrefix.mergeQueryArgs = mqa
e.URLPrefix.dropSrcPathPrefixParts = dsp
e.URLPrefix.discoverBackendIPs = dbd
}
@@ -1169,9 +941,6 @@ func (ui *UserInfo) name() string {
h := xxhash.Sum64([]byte(ui.AuthToken))
return fmt.Sprintf("auth_token:hash:%016X", h)
}
if ui.JWT != nil {
return `jwt`
}
return ""
}
@@ -1259,11 +1028,13 @@ func (up *URLPrefix) sanitizeAndInitialize() error {
}
// Initialize up.bus
bus := newBackendURLs()
for _, bu := range up.busOriginal {
bus.add(bu)
bus := make([]*backendURL, len(up.busOriginal))
for i, bu := range up.busOriginal {
bus[i] = &backendURL{
url: bu,
}
}
up.bus.Store(bus)
up.bus.Store(&bus)
return nil
}

Some files were not shown because too many files have changed in this diff Show More