Compare commits

..

1 Commits

Author SHA1 Message Date
Max Kotliar
e9261be945 lib/promutil: Weak pointer based labels compressor 2025-08-20 20:02:11 +03:00
3450 changed files with 231135 additions and 293796 deletions

View File

@@ -5,7 +5,7 @@ body:
- type: textarea - type: textarea
id: describe-the-component id: describe-the-component
attributes: attributes:
label: Is your question related to a specific component? label: Is your question request related to a specific component?
placeholder: | placeholder: |
VictoriaMetrics, vmagent, vmalert, vmui, etc... VictoriaMetrics, vmagent, vmalert, vmui, etc...
validations: validations:

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env sh
set -e
CHANGELOG_FILE="docs/victoriametrics/changelog/CHANGELOG.md"
GITHUB_BASE_REF=${GITHUB_BASE_REF:-"master"}
GIT_REMOTE=${GIT_REMOTE:-"origin"}
git diff "${GIT_REMOTE}/${GITHUB_BASE_REF}"...HEAD -- $CHANGELOG_FILE > diff.txt
if ! grep -q "^+" diff.txt; then
echo "No additions in CHANGELOG.md"
exit 0
fi
ADDED_LINES=$(grep "^+\S" diff.txt | sed 's/^+//')
START_TIP=$(grep -n "^## tip" "$CHANGELOG_FILE" | head -1 | cut -d: -f1)
if [ -z "$START_TIP" ]; then
echo "ERROR: ${CHANGELOG_FILE} does not contain a ## tip section"
exit 1
fi
END_TIP=$(awk "NR>$START_TIP && /^## / {print NR; exit}" "${CHANGELOG_FILE}")
if [ -z "$END_TIP" ]; then
END_TIP=$(wc -l < "$CHANGELOG_FILE")
fi
BAD=0
while IFS= read -r line; do
# Grep exact line inside the file and get line numbers
MATCHES=$(grep -n -F "$line" "$CHANGELOG_FILE" | cut -d: -f1)
for m in $MATCHES; do
if [ "$m" -lt "$START_TIP" ] || [ "$m" -gt "$END_TIP" ]; then
echo "'$line' on line ${m} is outside ## tip section (lines ${START_TIP}-${END_TIP})"
BAD=1
fi
done
done << EOF
$ADDED_LINES
EOF
if [ "$BAD" -ne 0 ]; then
echo "CHANGELOG modifications must be placed inside the ## tip section."
exit 1
fi
echo "CHANGELOG modifications are valid."

View File

@@ -47,8 +47,6 @@ jobs:
arch: arm arch: arm
- os: linux - os: linux
arch: ppc64le arch: ppc64le
- os: linux
arch: s390x
- os: darwin - os: darwin
arch: amd64 arch: amd64
- os: darwin - os: darwin
@@ -61,18 +59,17 @@ jobs:
arch: amd64 arch: amd64
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@v6 uses: actions/checkout@v5
- name: Setup Go - name: Setup Go
id: go id: go
uses: actions/setup-go@v6 uses: actions/setup-go@v5
with: with:
cache-dependency-path: | cache-dependency-path: |
go.sum go.sum
Makefile Makefile
app/**/Makefile app/**/Makefile
go-version-file: 'go.mod' go-version: stable
- run: go version
- name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }} - name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }}
run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }} run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }}

View File

@@ -1,19 +0,0 @@
name: 'changelog-linter'
on:
pull_request:
paths:
- "docs/victoriametrics/changelog/CHANGELOG.md"
jobs:
tip-lint:
runs-on: 'ubuntu-latest'
steps:
- uses: 'actions/checkout@v6'
with:
# needed for proper diff
fetch-depth: 0
- name: 'Validate that changelog changes are under ## tip'
run: |
GITHUB_BASE_REF=${{ github.base_ref }} ./.github/scripts/lint-changelog-tip.sh

View File

@@ -1,37 +0,0 @@
name: check-commit-signed
on:
pull_request:
jobs:
check-commit-signed:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
fetch-depth: 0 # we need full history for commit verification
- name: Check commit signatures
run: |
if [ "${{ github.event_name }}" != "pull_request" ]; then
echo "Not a PR event, skipping signature check"
exit 0
fi
RANGE="${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
echo "Checking commits in PR range: $RANGE"
if [ -z "$(git rev-list $RANGE)" ]; then
echo "No new commits in this PR, skipping signature check"
exit 0
fi
unsigned=$(git log --pretty="%H %G?" $RANGE | grep -vE " (G|E)$" || true)
if [ -n "$unsigned" ]; then
echo "Found unsigned commits:"
echo "$unsigned"
exit 1
fi
echo "All commits in PR are signed (G or E)"

View File

@@ -19,13 +19,11 @@ jobs:
- name: Setup Go - name: Setup Go
id: go id: go
uses: actions/setup-go@v6 uses: actions/setup-go@v5
with: with:
go-version-file: 'go.mod' go-version: stable
cache: false cache: false
- run: go version
- name: Cache Go artifacts - name: Cache Go artifacts
uses: actions/cache@v4 uses: actions/cache@v4
with: with:
@@ -34,7 +32,7 @@ jobs:
~/go/pkg/mod ~/go/pkg/mod
~/go/bin ~/go/bin
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }} key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}- restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
- name: Check License - name: Check License
run: make check-licenses run: make check-licenses

View File

@@ -29,15 +29,14 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v6 uses: actions/checkout@v5
- name: Set up Go - name: Set up Go
id: go id: go
uses: actions/setup-go@v6 uses: actions/setup-go@v5
with: with:
cache: false cache: false
go-version-file: 'go.mod' go-version: stable
- run: go version
- name: Cache Go artifacts - name: Cache Go artifacts
uses: actions/cache@v4 uses: actions/cache@v4
@@ -47,17 +46,17 @@ jobs:
~/go/bin ~/go/bin
~/go/pkg/mod ~/go/pkg/mod
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }} key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}- restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
- name: Initialize CodeQL - name: Initialize CodeQL
uses: github/codeql-action/init@v4 uses: github/codeql-action/init@v3
with: with:
languages: go languages: go
- name: Autobuild - name: Autobuild
uses: github/codeql-action/autobuild@v4 uses: github/codeql-action/autobuild@v3
- name: Perform CodeQL Analysis - name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4 uses: github/codeql-action/analyze@v3
with: with:
category: 'language:go' category: 'language:go'

View File

@@ -16,12 +16,12 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@v6 uses: actions/checkout@v5
with: with:
path: __vm path: __vm
- name: Checkout private code - name: Checkout private code
uses: actions/checkout@v6 uses: actions/checkout@v5
with: with:
repository: VictoriaMetrics/vmdocs repository: VictoriaMetrics/vmdocs
token: ${{ secrets.VM_BOT_GH_TOKEN }} token: ${{ secrets.VM_BOT_GH_TOKEN }}

View File

@@ -32,19 +32,18 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@v6 uses: actions/checkout@v5
- name: Setup Go - name: Setup Go
id: go id: go
uses: actions/setup-go@v6 uses: actions/setup-go@v5
with: with:
cache-dependency-path: | cache-dependency-path: |
go.sum go.sum
Makefile Makefile
app/**/Makefile app/**/Makefile
go-version-file: 'go.mod' go-version: stable
- run: go version
- name: Cache golangci-lint - name: Cache golangci-lint
uses: actions/cache@v4 uses: actions/cache@v4
@@ -52,7 +51,7 @@ jobs:
path: | path: |
~/.cache/golangci-lint ~/.cache/golangci-lint
~/go/bin ~/go/bin
key: golangci-lint-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('.golangci.yml') }} key: golangci-lint-${{ runner.os }}-${{ hashFiles('.golangci.yml') }}
- name: Run check-all - name: Run check-all
run: | run: |
@@ -72,45 +71,43 @@ jobs:
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@v6 uses: actions/checkout@v5
- name: Setup Go - name: Setup Go
id: go id: go
uses: actions/setup-go@v6 uses: actions/setup-go@v5
with: with:
cache-dependency-path: | cache-dependency-path: |
go.sum go.sum
Makefile Makefile
app/**/Makefile app/**/Makefile
go-version-file: 'go.mod' go-version: stable
- run: go version
- name: Run tests - name: Run tests
run: make ${{ matrix.scenario}} run: GOGC=10 make ${{ matrix.scenario}}
- name: Publish coverage - name: Publish coverage
uses: codecov/codecov-action@v5 uses: codecov/codecov-action@v5
with: with:
files: ./coverage.txt files: ./coverage.txt
apptest: integration:
name: apptest name: integration
runs-on: apptest runs-on: ubuntu-latest
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@v6 uses: actions/checkout@v5
- name: Setup Go - name: Setup Go
id: go id: go
uses: actions/setup-go@v6 uses: actions/setup-go@v5
with: with:
cache-dependency-path: | cache-dependency-path: |
go.sum go.sum
Makefile Makefile
app/**/Makefile app/**/Makefile
go-version-file: 'go.mod' go-version: stable
- run: go version
- name: Run app tests - name: Run integration tests
run: make apptest run: make integration-test

View File

@@ -32,41 +32,35 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@v6 uses: actions/checkout@v5
- name: Cache node_modules - name: Setup Node
id: cache uses: actions/setup-node@v4
uses: actions/cache@v5
with: with:
path: app/vmui/packages/vmui/node_modules node-version: '24.x'
key: vmui-deps-${{ runner.os }}-${{ hashFiles('app/vmui/packages/vmui/package-lock.json', 'app/vmui/Dockerfile-build') }}
restore-keys: |
vmui-deps-${{ runner.os }}-
- name: Install dependencies - name: Cache node-modules
if: steps.cache.outputs.cache-hit != 'true' uses: actions/cache@v4
run: make vmui-install with:
path: |
app/vmui/packages/vmui/node_modules
key: vmui-artifacts-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
restore-keys: vmui-artifacts-${{ runner.os }}-
- name: Run lint - name: Run lint
id: lint id: lint
run: make vmui-lint run: make vmui-lint
continue-on-error: true continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run tests - name: Run tests
id: test id: test
run: make vmui-test run: make vmui-test
continue-on-error: true continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run typecheck - name: Run typecheck
id: typecheck id: typecheck
run: make vmui-typecheck run: make vmui-typecheck
continue-on-error: true continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Annotate Code Linting Results - name: Annotate Code Linting Results
uses: ataylorme/eslint-annotate-action@v3 uses: ataylorme/eslint-annotate-action@v3

View File

@@ -175,7 +175,7 @@
END OF TERMS AND CONDITIONS END OF TERMS AND CONDITIONS
Copyright 2019-2026 VictoriaMetrics, Inc. Copyright 2019-2025 VictoriaMetrics, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.

View File

@@ -17,7 +17,7 @@ EXTRA_GO_BUILD_TAGS ?=
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)' GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
TAR_OWNERSHIP ?= --owner=1000 --group=1000 TAR_OWNERSHIP ?= --owner=1000 --group=1000
GOLANGCI_LINT_VERSION := 2.9.0 GOLANGCI_LINT_VERSION := 2.4.0
.PHONY: $(MAKECMDGOALS) .PHONY: $(MAKECMDGOALS)
@@ -125,15 +125,6 @@ vmutils-linux-ppc64le: \
vmrestore-linux-ppc64le \ vmrestore-linux-ppc64le \
vmctl-linux-ppc64le vmctl-linux-ppc64le
vmutils-linux-s390x: \
vmagent-linux-s390x \
vmalert-linux-s390x \
vmalert-tool-linux-s390x \
vmauth-linux-s390x \
vmbackup-linux-s390x \
vmrestore-linux-s390x \
vmctl-linux-s390x
vmutils-darwin-amd64: \ vmutils-darwin-amd64: \
vmagent-darwin-amd64 \ vmagent-darwin-amd64 \
vmalert-darwin-amd64 \ vmalert-darwin-amd64 \
@@ -266,7 +257,6 @@ release-victoria-metrics: \
release-victoria-metrics-linux-amd64 \ release-victoria-metrics-linux-amd64 \
release-victoria-metrics-linux-arm \ release-victoria-metrics-linux-arm \
release-victoria-metrics-linux-arm64 \ release-victoria-metrics-linux-arm64 \
release-victoria-metrics-linux-s390x \
release-victoria-metrics-darwin-amd64 \ release-victoria-metrics-darwin-amd64 \
release-victoria-metrics-darwin-arm64 \ release-victoria-metrics-darwin-arm64 \
release-victoria-metrics-freebsd-amd64 \ release-victoria-metrics-freebsd-amd64 \
@@ -285,9 +275,6 @@ release-victoria-metrics-linux-arm:
release-victoria-metrics-linux-arm64: release-victoria-metrics-linux-arm64:
GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch
release-victoria-metrics-linux-s390x:
GOOS=linux GOARCH=s390x $(MAKE) release-victoria-metrics-goos-goarch
release-victoria-metrics-darwin-amd64: release-victoria-metrics-darwin-amd64:
GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
@@ -327,7 +314,6 @@ release-vmutils: \
release-vmutils-linux-amd64 \ release-vmutils-linux-amd64 \
release-vmutils-linux-arm64 \ release-vmutils-linux-arm64 \
release-vmutils-linux-arm \ release-vmutils-linux-arm \
release-vmutils-linux-s390x \
release-vmutils-darwin-amd64 \ release-vmutils-darwin-amd64 \
release-vmutils-darwin-arm64 \ release-vmutils-darwin-arm64 \
release-vmutils-freebsd-amd64 \ release-vmutils-freebsd-amd64 \
@@ -346,9 +332,6 @@ release-vmutils-linux-arm64:
release-vmutils-linux-arm: release-vmutils-linux-arm:
GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch
release-vmutils-linux-s390x:
GOOS=linux GOARCH=s390x $(MAKE) release-vmutils-goos-goarch
release-vmutils-darwin-amd64: release-vmutils-darwin-amd64:
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
@@ -435,7 +418,7 @@ release-vmutils-windows-goarch: \
vmctl-windows-$(GOARCH)-prod.exe vmctl-windows-$(GOARCH)-prod.exe
pprof-cpu: pprof-cpu:
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics $(PPROF_FILE) go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
fmt: fmt:
gofmt -l -w -s ./lib gofmt -l -w -s ./lib
@@ -443,7 +426,7 @@ fmt:
gofmt -l -w -s ./apptest gofmt -l -w -s ./apptest
vet: vet:
go vet -tags 'synctest' ./lib/... GOEXPERIMENT=synctest go vet ./lib/...
go vet ./app/... go vet ./app/...
go vet ./apptest/... go vet ./apptest/...
@@ -452,52 +435,39 @@ check-all: fmt vet golangci-lint govulncheck
clean-checkers: remove-golangci-lint remove-govulncheck clean-checkers: remove-golangci-lint remove-govulncheck
test: test:
go test -tags 'synctest' ./lib/... ./app/... GOEXPERIMENT=synctest go test ./lib/... ./app/...
test-race: test-race:
go test -tags 'synctest' -race ./lib/... ./app/... GOEXPERIMENT=synctest go test -race ./lib/... ./app/...
test-pure: test-pure:
CGO_ENABLED=0 go test -tags 'synctest' ./lib/... ./app/... GOEXPERIMENT=synctest CGO_ENABLED=0 go test ./lib/... ./app/...
test-full: test-full:
go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/... GOEXPERIMENT=synctest go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
test-full-386: test-full-386:
GOARCH=386 go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/... GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
integration-test:
$(MAKE) apptest
apptest: apptest:
$(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore $(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
go test ./apptest/... -skip="^Test(Cluster|Legacy).*" go test ./apptest/... -skip="^TestCluster.*"
apptest-legacy: victoria-metrics vmbackup vmrestore
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
VERSION=v1.132.0; \
VMSINGLE=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}.tar.gz; \
VMCLUSTER=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}-cluster.tar.gz; \
URL=https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/$${VERSION}; \
DIR=/tmp/$${VERSION}; \
test -d $${DIR} || (mkdir $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
); \
VM_LEGACY_VMSINGLE_PATH=$${DIR}/victoria-metrics-prod \
VM_LEGACY_VMSTORAGE_PATH=$${DIR}/vmstorage-prod \
go test ./apptest/tests -run="^TestLegacySingle.*"
benchmark: benchmark:
go test -run=NO_TESTS -bench=. ./lib/... GOEXPERIMENT=synctest go test -bench=. ./lib/...
go test -run=NO_TESTS -bench=. ./app/... go test -bench=. ./app/...
benchmark-pure: benchmark-pure:
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./lib/... GOEXPERIMENT=synctest CGO_ENABLED=0 go test -bench=. ./lib/...
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./app/... CGO_ENABLED=0 go test -bench=. ./app/...
vendor-update: vendor-update:
go get -u ./lib/... go get -u ./lib/...
go get -u ./app/... go get -u ./app/...
go mod tidy -compat=1.26 go mod tidy -compat=1.24
go mod vendor go mod vendor
app-local: app-local:
@@ -513,15 +483,14 @@ app-local-windows-goarch:
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME) CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
quicktemplate-gen: install-qtc quicktemplate-gen: install-qtc
qtc -dir=lib qtc
qtc -dir=app
install-qtc: install-qtc:
which qtc || go install github.com/valyala/quicktemplate/qtc@latest which qtc || go install github.com/valyala/quicktemplate/qtc@latest
golangci-lint: install-golangci-lint golangci-lint: install-golangci-lint
golangci-lint run --build-tags 'synctest' GOEXPERIMENT=synctest golangci-lint run
install-golangci-lint: install-golangci-lint:
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION) which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)

View File

@@ -3,7 +3,7 @@
[![Latest Release](https://img.shields.io/github/v/release/VictoriaMetrics/VictoriaMetrics?sort=semver&label=&filter=!*-victorialogs&logo=github&labelColor=gray&color=gray&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Freleases%2Flatest)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) [![Latest Release](https://img.shields.io/github/v/release/VictoriaMetrics/VictoriaMetrics?sort=semver&label=&filter=!*-victorialogs&logo=github&labelColor=gray&color=gray&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Freleases%2Flatest)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
![Docker Pulls](https://img.shields.io/docker/pulls/victoriametrics/victoria-metrics?label=&logo=docker&logoColor=white&labelColor=2496ED&color=2496ED&link=https%3A%2F%2Fhub.docker.com%2Fr%2Fvictoriametrics%2Fvictoria-metrics) ![Docker Pulls](https://img.shields.io/docker/pulls/victoriametrics/victoria-metrics?label=&logo=docker&logoColor=white&labelColor=2496ED&color=2496ED&link=https%3A%2F%2Fhub.docker.com%2Fr%2Fvictoriametrics%2Fvictoria-metrics)
[![Go Report](https://goreportcard.com/badge/github.com/VictoriaMetrics/VictoriaMetrics?link=https%3A%2F%2Fgoreportcard.com%2Freport%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics)](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics) [![Go Report](https://goreportcard.com/badge/github.com/VictoriaMetrics/VictoriaMetrics?link=https%3A%2F%2Fgoreportcard.com%2Freport%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics)](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml/badge.svg?branch=master&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Factions)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml) [![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml/badge.svg?branch=master&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Factions)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml)
[![codecov](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics/branch/master/graph/badge.svg?link=https%3A%2F%2Fcodecov.io%2Fgh%2FVictoriaMetrics%2FVictoriaMetrics)](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics) [![codecov](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics/branch/master/graph/badge.svg?link=https%3A%2F%2Fcodecov.io%2Fgh%2FVictoriaMetrics%2FVictoriaMetrics)](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
[![License](https://img.shields.io/github/license/VictoriaMetrics/VictoriaMetrics?labelColor=green&label=&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Fblob%2Fmaster%2FLICENSE)](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE) [![License](https://img.shields.io/github/license/VictoriaMetrics/VictoriaMetrics?labelColor=green&label=&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Fblob%2Fmaster%2FLICENSE)](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
![Slack](https://img.shields.io/badge/Join-4A154B?logo=slack&link=https%3A%2F%2Fslack.victoriametrics.com) ![Slack](https://img.shields.io/badge/Join-4A154B?logo=slack&link=https%3A%2F%2Fslack.victoriametrics.com)
@@ -16,21 +16,16 @@
<img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo"> <img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo">
</picture> </picture>
VictoriaMetrics is a fast, cost-effective, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes. VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
Here are some resources and information about VictoriaMetrics: Here are some resources and information about VictoriaMetrics:
- **Case studies**: [Grammarly, Roblox, Wix, Spotify,...](https://docs.victoriametrics.com/victoriametrics/casestudies/). - Documentation: [docs.victoriametrics.com](https://docs.victoriametrics.com)
- **Available**: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), Docker images on [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics). - Case studies: [Grammarly, Roblox, Wix,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
- **Deployment types**: [Single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) under [Apache License 2.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE). - Available: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), docker images [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics)
- **Getting started:** Read [key concepts](https://docs.victoriametrics.com/victoriametrics/keyconcepts/) and follow the - Deployment types: [Single-node version](https://docs.victoriametrics.com/), [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/), and [Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/)
[quick start guide](https://docs.victoriametrics.com/victoriametrics/quick-start/). - Changelog: [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics)
- **Community**: [Slack](https://slack.victoriametrics.com/) (join via [Slack Inviter](https://slack.victoriametrics.com/)), [X (Twitter)](https://x.com/VictoriaMetrics), [YouTube](https://www.youtube.com/@VictoriaMetrics). See full list [here](https://docs.victoriametrics.com/victoriametrics/#community-and-contributions). - Community: [Slack](https://slack.victoriametrics.com/), [X (Twitter)](https://x.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
- **Changelog**: Project evolves fast - check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).
- **Enterprise support:** [Contact us](mailto:info@victoriametrics.com) for commercial support with additional [enterprise features](https://docs.victoriametrics.com/victoriametrics/enterprise/).
- **Enterprise releases:** Enterprise and [long-term support releases (LTS)](https://docs.victoriametrics.com/victoriametrics/lts-releases/) are publicly available and can be evaluated for free
using a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
- **Security:** we achieved [security certifications](https://victoriametrics.com/security/) for Database Software Development and Software-Based Monitoring Services.
Yes, we open-source both the single-node VictoriaMetrics and the cluster version. Yes, we open-source both the single-node VictoriaMetrics and the cluster version.

View File

@@ -4,11 +4,12 @@
The following versions of VictoriaMetrics receive regular security fixes: The following versions of VictoriaMetrics receive regular security fixes:
| Version | Supported | | Version | Supported |
|--------------------------------------------------------------------------------|--------------------| |---------|--------------------|
| [Latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: | | [latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
| [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: | | v1.102.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| other releases | :x: | | v1.110.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| other releases | :x: |
See [this page](https://victoriametrics.com/security/) for more details. See [this page](https://victoriametrics.com/security/) for more details.

View File

@@ -27,9 +27,6 @@ victoria-metrics-linux-ppc64le-prod:
victoria-metrics-linux-386-prod: victoria-metrics-linux-386-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386 APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386
victoria-metrics-linux-s390x-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-s390x
victoria-metrics-darwin-amd64-prod: victoria-metrics-darwin-amd64-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64 APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64

View File

@@ -134,7 +134,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
} }
w.Header().Add("Content-Type", "text/html; charset=utf-8") w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>") fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>")
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>") fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>") fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{ httpserver.WriteAPIHelp(w, [][2]string{
@@ -170,7 +169,7 @@ func usage() {
const s = ` const s = `
victoria-metrics is a time series database and monitoring solution. victoria-metrics is a time series database and monitoring solution.
See the docs at https://docs.victoriametrics.com/victoriametrics/ See the docs at https://docs.victoriametrics.com/
` `
flagutil.Usage(s) flagutil.Usage(s)
} }

View File

@@ -10,11 +10,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal" "github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits" "github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
) )
@@ -29,9 +27,11 @@ var selfScraperWG sync.WaitGroup
func startSelfScraper() { func startSelfScraper() {
selfScraperStopCh = make(chan struct{}) selfScraperStopCh = make(chan struct{})
selfScraperWG.Go(func() { selfScraperWG.Add(1)
go func() {
defer selfScraperWG.Done()
selfScraper(*selfScrapeInterval) selfScraper(*selfScrapeInterval)
}) }()
} }
func stopSelfScraper() { func stopSelfScraper() {
@@ -48,7 +48,6 @@ func selfScraper(scrapeInterval time.Duration) {
var bb bytesutil.ByteBuffer var bb bytesutil.ByteBuffer
var rows prometheus.Rows var rows prometheus.Rows
var metadataRows prometheus.MetadataRows
var mrs []storage.MetricRow var mrs []storage.MetricRow
var labels []prompb.Label var labels []prompb.Label
t := time.NewTicker(scrapeInterval) t := time.NewTicker(scrapeInterval)
@@ -58,12 +57,8 @@ func selfScraper(scrapeInterval time.Duration) {
appmetrics.WritePrometheusMetrics(&bb) appmetrics.WritePrometheusMetrics(&bb)
s := bytesutil.ToUnsafeString(bb.B) s := bytesutil.ToUnsafeString(bb.B)
rows.Reset() rows.Reset()
// Parse metrics and optionally metadata when enabled // VictoriaMetrics components don't expose metadata yet, only need to parse samples
if prommetadata.IsEnabled() { rows.UnmarshalWithErrLogger(s, nil)
rows, metadataRows = prometheus.UnmarshalWithMetadata(rows, metadataRows, s, nil)
} else {
rows.UnmarshalWithErrLogger(s, nil)
}
mrs = mrs[:0] mrs = mrs[:0]
for i := range rows.Rows { for i := range rows.Rows {
r := &rows.Rows[i] r := &rows.Rows[i]
@@ -96,19 +91,6 @@ func selfScraper(scrapeInterval time.Duration) {
if err := vmstorage.AddRows(mrs); err != nil { if err := vmstorage.AddRows(mrs); err != nil {
logger.Errorf("cannot store self-scraped metrics: %s", err) logger.Errorf("cannot store self-scraped metrics: %s", err)
} }
if len(metadataRows.Rows) > 0 {
mms := make([]metricsmetadata.Row, 0, len(metadataRows.Rows))
for _, mm := range metadataRows.Rows {
mms = append(mms, metricsmetadata.Row{
MetricFamilyName: bytesutil.ToUnsafeBytes(mm.Metric),
Help: bytesutil.ToUnsafeBytes(mm.Help),
Type: mm.Type,
})
}
if err := vmstorage.AddMetadataRows(mms); err != nil {
logger.Errorf("cannot store self-scraped metrics metadata: %s", err)
}
}
} }
for { for {
select { select {

View File

@@ -33,13 +33,13 @@ func PopulateTimeTpl(b []byte, tGlobal time.Time) []byte {
} }
switch strings.TrimSpace(parts[0]) { switch strings.TrimSpace(parts[0]) {
case `TIME_S`: case `TIME_S`:
return fmt.Appendf(nil, "%d", t.Unix()) return []byte(fmt.Sprintf("%d", t.Unix()))
case `TIME_MSZ`: case `TIME_MSZ`:
return fmt.Appendf(nil, "%d", t.Unix()*1e3) return []byte(fmt.Sprintf("%d", t.Unix()*1e3))
case `TIME_MS`: case `TIME_MS`:
return fmt.Appendf(nil, "%d", timeToMillis(t)) return []byte(fmt.Sprintf("%d", timeToMillis(t)))
case `TIME_NS`: case `TIME_NS`:
return fmt.Appendf(nil, "%d", t.UnixNano()) return []byte(fmt.Sprintf("%d", t.UnixNano()))
default: default:
log.Fatalf("unknown time pattern %s in %s", parts[0], repl) log.Fatalf("unknown time pattern %s in %s", parts[0], repl)
} }

View File

@@ -27,9 +27,6 @@ vmagent-linux-ppc64le-prod:
vmagent-linux-386-prod: vmagent-linux-386-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-386 APP_NAME=vmagent $(MAKE) app-via-docker-linux-386
vmagent-linux-s390x-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-s390x
vmagent-darwin-amd64-prod: vmagent-darwin-amd64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64 APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64

View File

@@ -27,7 +27,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo" "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -75,7 +74,7 @@ var (
"See also -opentsdbHTTPListenAddr.useProxyProtocol") "See also -opentsdbHTTPListenAddr.useProxyProtocol")
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+ opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt") "at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config and /remotewrite-.*-config pages. It must be passed via authKey query arg. It overrides -httpAuth.*") configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config page. It must be passed via authKey query arg. It overrides -httpAuth.*")
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*") reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+ dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+ "-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
@@ -245,7 +244,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
} }
w.Header().Add("Content-Type", "text/html; charset=utf-8") w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>vmagent</h2>") fmt.Fprintf(w, "<h2>vmagent</h2>")
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>") fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>") fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{ httpserver.WriteAPIHelp(w, [][2]string{
@@ -254,8 +252,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
{"metric-relabel-debug", "debug metric relabeling"}, {"metric-relabel-debug", "debug metric relabeling"},
{"api/v1/targets", "advanced information about discovered targets in JSON format"}, {"api/v1/targets", "advanced information about discovered targets in JSON format"},
{"config", "-promscrape.config contents"}, {"config", "-promscrape.config contents"},
{"remotewrite-relabel-config", "-remoteWrite.relabelConfig contents"},
{"remotewrite-url-relabel-config", "-remoteWrite.urlRelabelConfig contents"},
{"metrics", "available service metrics"}, {"metrics", "available service metrics"},
{"flags", "command-line flags"}, {"flags", "command-line flags"},
{"-/reload", "reload configuration"}, {"-/reload", "reload configuration"},
@@ -352,17 +348,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
} }
firehose.WriteSuccessResponse(w, r) firehose.WriteSuccessResponse(w, r)
return true return true
case "/zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(nil, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "/newrelic": case "/newrelic":
newrelicCheckRequest.Inc() newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
@@ -492,42 +477,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
promscrape.WriteConfigData(&bb) promscrape.WriteConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B))) fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true return true
case "/remotewrite-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
remotewrite.WriteRelabelConfigData(w)
return true
case "/api/v1/status/remotewrite-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteStatusRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
remotewrite.WriteRelabelConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/remotewrite-url-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteURLRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
remotewrite.WriteURLRelabelConfigData(w)
return true
case "/api/v1/status/remotewrite-url-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteStatusURLRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
remotewrite.WriteURLRelabelConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/prometheus/-/reload", "/-/reload": case "/prometheus/-/reload", "/-/reload":
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) { if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
return true return true
@@ -657,17 +606,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
} }
firehose.WriteSuccessResponse(w, r) firehose.WriteSuccessResponse(w, r)
return true return true
case "zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(at, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "newrelic": case "newrelic":
newrelicCheckRequest.Inc() newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
@@ -789,9 +727,6 @@ var (
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`) opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`) opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
zabbixconnectorHistoryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
zabbixconnectorHistoryErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`) newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`) newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
@@ -812,12 +747,6 @@ var (
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`) promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`) promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`)
remoteWriteRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-relabel-config"}`)
remoteWriteStatusRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-relabel-config"}`)
remoteWriteURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-url-relabel-config"}`)
remoteWriteStatusURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-url-relabel-config"}`)
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`) promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
) )

View File

@@ -78,7 +78,7 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompb.Label)
if !remotewrite.TryPush(at, &ctx.WriteRequest) { if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry return remotewrite.ErrQueueFullHTTPRetry
} }
rowsInserted.Add(samplesCount) rowsInserted.Add(len(rows))
if at != nil { if at != nil {
rowsTenantInserted.Get(at).Add(samplesCount) rowsTenantInserted.Get(at).Add(samplesCount)
} }

View File

@@ -2,14 +2,13 @@ package opentelemetry
import ( import (
"fmt" "fmt"
"io"
"net/http" "net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
@@ -25,13 +24,6 @@ var (
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`) rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
) )
// InsertHandlerForReader processes metrics from given reader.
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
return insertRows(at, tss, mms, nil)
})
}
// InsertHandler processes opentelemetry metrics. // InsertHandler processes opentelemetry metrics.
func InsertHandler(at *auth.Token, req *http.Request) error { func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req) extraLabels, err := protoparserutil.GetExtraLabels(req)
@@ -76,7 +68,7 @@ func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMeta
ctx.WriteRequest.Timeseries = tssDst ctx.WriteRequest.Timeseries = tssDst
var metadataTotal int var metadataTotal int
if prommetadata.IsEnabled() { if promscrape.IsMetadataEnabled() {
var accountID, projectID uint32 var accountID, projectID uint32
if at != nil { if at != nil {
accountID = at.AccountID accountID = at.AccountID

View File

@@ -7,8 +7,8 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
@@ -36,7 +36,7 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
return err return err
} }
encoding := req.Header.Get("Content-Encoding") encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, defaultTimestamp, encoding, true, prommetadata.IsEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error { return stream.Parse(req.Body, defaultTimestamp, encoding, true, promscrape.IsMetadataEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
return insertRows(at, rows, mms, extraLabels) return insertRows(at, rows, mms, extraLabels)
}, func(s string) { }, func(s string) {
httpserver.LogError(req, s) httpserver.LogError(req, s)

View File

@@ -6,8 +6,8 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics" "github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
@@ -71,7 +71,7 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
ctx.WriteRequest.Timeseries = tssDst ctx.WriteRequest.Timeseries = tssDst
var metadataTotal int var metadataTotal int
if prommetadata.IsEnabled() { if promscrape.IsMetadataEnabled() {
var accountID, projectID uint32 var accountID, projectID uint32
if at != nil { if at != nil {
accountID = at.AccountID accountID = at.AccountID

View File

@@ -15,6 +15,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi" "github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@@ -202,10 +203,14 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL)) c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL)) c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 { metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
return float64(concurrency) return float64(*queues)
}) })
for range concurrency { for i := 0; i < concurrency; i++ {
c.wg.Go(c.runWorker) c.wg.Add(1)
go func() {
defer c.wg.Done()
c.runWorker()
}()
} }
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL) logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
} }
@@ -458,6 +463,12 @@ again:
// - Real-world implementations of v1 use both 400 and 415 status codes. // - Real-world implementations of v1 use both 400 and 415 status codes.
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054 // See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
case 415, 400: case 415, 400:
if c.canDowngradeVMProto.Swap(false) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
c.useVMProto.Store(false)
}
if encoding.IsZstd(block) { if encoding.IsZstd(block) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+ logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL) "See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
@@ -549,9 +560,9 @@ func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.D
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417 // For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) { func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
plainBlock := make([]byte, 0, len(zstdBlock)*2) plainBlock := make([]byte, 0, len(zstdBlock)*2)
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock) plainBlock, err := zstd.Decompress(plainBlock, zstdBlock)
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("zstd: decompress: %s", err)
} }
return snappy.Encode(nil, plainBlock), nil return snappy.Encode(nil, plainBlock), nil

View File

@@ -18,7 +18,7 @@ func TestCalculateRetryDuration(t *testing.T) {
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) { f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
t.Helper() t.Helper()
for range n { for i := 0; i < n; i++ {
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute) retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
} }
@@ -93,7 +93,10 @@ func TestParseRetryAfterHeader(t *testing.T) {
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration. // helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
func helper(d time.Duration) time.Duration { func helper(d time.Duration) time.Duration {
dv := min(d/10, 10*time.Second) dv := d / 10
if dv > 10*time.Second {
dv = 10 * time.Second
}
return d + dv return d + dv
} }

View File

@@ -48,7 +48,11 @@ func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite *atomic.Boo
ps.wr.significantFigures = significantFigures ps.wr.significantFigures = significantFigures
ps.wr.roundDigits = roundDigits ps.wr.roundDigits = roundDigits
ps.stopCh = make(chan struct{}) ps.stopCh = make(chan struct{})
ps.periodicFlusherWG.Go(ps.periodicFlusher) ps.periodicFlusherWG.Add(1)
go func() {
defer ps.periodicFlusherWG.Done()
ps.periodicFlusher()
}()
return &ps return &ps
} }

View File

@@ -51,9 +51,9 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest { func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
var wr prompb.WriteRequest var wr prompb.WriteRequest
for i := range seriesCount { for i := 0; i < seriesCount; i++ {
var labels []prompb.Label var labels []prompb.Label
for j := range labelsCount { for j := 0; j < labelsCount; j++ {
labels = append(labels, prompb.Label{ labels = append(labels, prompb.Label{
Name: fmt.Sprintf("label_%d_%d", i, j), Name: fmt.Sprintf("label_%d_%d", i, j),
Value: fmt.Sprintf("value_%d_%d", i, j), Value: fmt.Sprintf("value_%d_%d", i, j),

View File

@@ -3,24 +3,22 @@ package remotewrite
import ( import (
"flag" "flag"
"fmt" "fmt"
"io"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
"sync/atomic"
"github.com/VictoriaMetrics/metrics"
"gopkg.in/yaml.v2"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/metrics"
) )
var ( var (
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to all -remoteWrite.url.") unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+ relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+ "to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
"The path can point either to local file or to http url. "+ "The path can point either to local file or to http url. "+
@@ -34,12 +32,9 @@ var (
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels") "See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
) )
var labelsGlobal []prompb.Label
var ( var (
labelsGlobal []prompb.Label
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
remoteWriteURLRelabelConfigData atomic.Pointer[[]any]
relabelConfigReloads *metrics.Counter relabelConfigReloads *metrics.Counter
relabelConfigReloadErrors *metrics.Counter relabelConfigReloadErrors *metrics.Counter
relabelConfigSuccess *metrics.Gauge relabelConfigSuccess *metrics.Gauge
@@ -72,42 +67,6 @@ func initRelabelConfigs() {
} }
} }
// WriteRelabelConfigData writes -remoteWrite.relabelConfig contents to w
func WriteRelabelConfigData(w io.Writer) {
p := remoteWriteRelabelConfigData.Load()
if p == nil {
// Nothing to write to w
return
}
_, _ = w.Write(*p)
}
// WriteURLRelabelConfigData writes -remoteWrite.urlRelabelConfig contents to w
func WriteURLRelabelConfigData(w io.Writer) {
p := remoteWriteURLRelabelConfigData.Load()
if p == nil {
// Nothing to write to w
return
}
type urlRelabelCfg struct {
Url string `yaml:"url"`
RelabelConfig any `yaml:"relabel_config"`
}
var cs []urlRelabelCfg
for i, url := range *remoteWriteURLs {
cfgData := (*p)[i]
if !*showRemoteWriteURL {
url = fmt.Sprintf("%d:secret-url", i+1)
}
cs = append(cs, urlRelabelCfg{
Url: url,
RelabelConfig: cfgData,
})
}
d, _ := yaml.Marshal(cs)
_, _ = w.Write(d)
}
func reloadRelabelConfigs() { func reloadRelabelConfigs() {
rcs := allRelabelConfigs.Load() rcs := allRelabelConfigs.Load()
if !rcs.isSet() { if !rcs.isSet() {
@@ -131,43 +90,28 @@ func reloadRelabelConfigs() {
func loadRelabelConfigs() (*relabelConfigs, error) { func loadRelabelConfigs() (*relabelConfigs, error) {
var rcs relabelConfigs var rcs relabelConfigs
if *relabelConfigPathGlobal != "" { if *relabelConfigPathGlobal != "" {
global, rawCfg, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal) global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err) return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
} }
remoteWriteRelabelConfigData.Store(&rawCfg)
rcs.global = global rcs.global = global
} }
if len(*relabelConfigPaths) > len(*remoteWriteURLs) { if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d", return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
len(*relabelConfigPaths), (len(*remoteWriteURLs))) len(*relabelConfigPaths), (len(*remoteWriteURLs)))
} }
var urlRelabelCfgs []any
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs)) rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
for i, path := range *relabelConfigPaths { for i, path := range *relabelConfigPaths {
if len(path) == 0 { if len(path) == 0 {
urlRelabelCfgs = append(urlRelabelCfgs, nil) // Skip empty relabel config.
continue continue
} }
prc, rawCfg, err := promrelabel.LoadRelabelConfigs(path) prc, err := promrelabel.LoadRelabelConfigs(path)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err) return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
} }
rcs.perURL[i] = prc rcs.perURL[i] = prc
var parsedCfg any
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
} }
if len(*remoteWriteURLs) > len(*relabelConfigPaths) {
// fill the urlRelabelCfgs with empty relabel configs if not set
for i := len(*relabelConfigPaths); i < len(*remoteWriteURLs); i++ {
urlRelabelCfgs = append(urlRelabelCfgs, nil)
}
}
remoteWriteURLRelabelConfigData.Store(&urlRelabelCfgs)
return &rcs, nil return &rcs, nil
} }
@@ -176,9 +120,19 @@ type relabelConfigs struct {
perURL []*promrelabel.ParsedConfigs perURL []*promrelabel.ParsedConfigs
} }
// isSet indicates whether (global or per-URL) command-line flags is set
func (rcs *relabelConfigs) isSet() bool { func (rcs *relabelConfigs) isSet() bool {
return *relabelConfigPathGlobal != "" || len(*relabelConfigPaths) > 0 if rcs == nil {
return false
}
if rcs.global.Len() > 0 {
return true
}
for _, pc := range rcs.perURL {
if pc.Len() > 0 {
return true
}
}
return false
} }
// initLabelsGlobal must be called after parsing command-line flags. // initLabelsGlobal must be called after parsing command-line flags.

View File

@@ -27,7 +27,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter" "github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr" "github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits" "github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
"github.com/VictoriaMetrics/metrics" "github.com/VictoriaMetrics/metrics"
@@ -59,7 +58,7 @@ var (
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue") "See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+ keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.") "Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
queues = flagutil.NewArrayInt("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+ queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
"isn't enough for sending high volume of collected data to remote storage. "+ "isn't enough for sending high volume of collected data to remote storage. "+
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage") "Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+ showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
@@ -176,6 +175,13 @@ func Init() {
}) })
} }
if *queues > maxQueues {
*queues = maxQueues
}
if *queues <= 0 {
*queues = 1
}
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 { if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " + logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages") "see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
@@ -208,7 +214,9 @@ func Init() {
dropDanglingQueues() dropDanglingQueues()
// Start config reloader. // Start config reloader.
configReloaderWG.Go(func() { configReloaderWG.Add(1)
go func() {
defer configReloaderWG.Done()
for { for {
select { select {
case <-configReloaderStopCh: case <-configReloaderStopCh:
@@ -218,7 +226,7 @@ func Init() {
reloadRelabelConfigs() reloadRelabelConfigs()
reloadStreamAggrConfigs() reloadStreamAggrConfigs()
} }
}) }()
} }
func dropDanglingQueues() { func dropDanglingQueues() {
@@ -258,6 +266,17 @@ func initRemoteWriteCtxs(urls []string) {
if len(urls) == 0 { if len(urls) == 0 {
logger.Panicf("BUG: urls must be non-empty") logger.Panicf("BUG: urls must be non-empty")
} }
maxInmemoryBlocks := memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
if maxInmemoryBlocks / *queues > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 100 * *queues
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
rwctxs := make([]*remoteWriteCtx, len(urls)) rwctxs := make([]*remoteWriteCtx, len(urls))
rwctxIdx := make([]int, len(urls)) rwctxIdx := make([]int, len(urls))
if retryMaxTime.String() != "" { if retryMaxTime.String() != "" {
@@ -272,7 +291,7 @@ func initRemoteWriteCtxs(urls []string) {
if *showRemoteWriteURL { if *showRemoteWriteURL {
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL) sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
} }
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, sanitizedURL) rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
rwctxIdx[i] = i rwctxIdx[i] = i
} }
@@ -466,9 +485,6 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
matchIdxs.B = sas.Push(tssBlock, matchIdxs.B) matchIdxs.B = sas.Push(tssBlock, matchIdxs.B)
if !*streamAggrGlobalKeepInput { if !*streamAggrGlobalKeepInput {
tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput) tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput)
} else if *streamAggrGlobalDropInput {
// if both keep_input and drop_input are true, we keep only the aggregated series
tssBlock = dropUnaggregatedSeries(tssBlock, matchIdxs.B)
} }
matchIdxsPool.Put(matchIdxs) matchIdxsPool.Put(matchIdxs)
} }
@@ -538,9 +554,11 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
// Push metadata to remote storage systems in parallel to reduce // Push metadata to remote storage systems in parallel to reduce
// the time needed for sending the data to multiple remote storage systems. // the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(len(rwctxs))
var anyPushFailed atomic.Bool var anyPushFailed atomic.Bool
for _, rwctx := range rwctxs { for _, rwctx := range rwctxs {
wg.Go(func() { go func(rwctx *remoteWriteCtx) {
defer wg.Done()
if !rwctx.tryPushMetadataInternal(mms) { if !rwctx.tryPushMetadataInternal(mms) {
rwctx.pushFailures.Inc() rwctx.pushFailures.Inc()
if forceDropSamplesOnFailure { if forceDropSamplesOnFailure {
@@ -549,7 +567,7 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
} }
anyPushFailed.Store(true) anyPushFailed.Store(true)
} }
}) }(rwctx)
} }
wg.Wait() wg.Wait()
return !anyPushFailed.Load() return !anyPushFailed.Load()
@@ -581,13 +599,15 @@ func tryPushTimeSeriesToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prom
// Push tssBlock to remote storage systems in parallel to reduce // Push tssBlock to remote storage systems in parallel to reduce
// the time needed for sending the data to multiple remote storage systems. // the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(len(rwctxs))
var anyPushFailed atomic.Bool var anyPushFailed atomic.Bool
for _, rwctx := range rwctxs { for _, rwctx := range rwctxs {
wg.Go(func() { go func(rwctx *remoteWriteCtx) {
defer wg.Done()
if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) { if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) {
anyPushFailed.Store(true) anyPushFailed.Store(true)
} }
}) }(rwctx)
} }
wg.Wait() wg.Wait()
return !anyPushFailed.Load() return !anyPushFailed.Load()
@@ -609,11 +629,13 @@ func tryShardingTimeSeriesAmongRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock
if len(shard) == 0 { if len(shard) == 0 {
continue continue
} }
wg.Go(func() { wg.Add(1)
if !rwctx.TryPushTimeSeries(shard, forceDropSamplesOnFailure) { go func(rwctx *remoteWriteCtx, tss []prompb.TimeSeries) {
defer wg.Done()
if !rwctx.TryPushTimeSeries(tss, forceDropSamplesOnFailure) {
anyPushFailed.Store(true) anyPushFailed.Store(true)
} }
}) }(rwctx, shard)
} }
wg.Wait() wg.Wait()
return !anyPushFailed.Load() return !anyPushFailed.Load()
@@ -822,7 +844,7 @@ type remoteWriteCtx struct {
rowsDroppedOnPushFailure *metrics.Counter rowsDroppedOnPushFailure *metrics.Counter
} }
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string) *remoteWriteCtx { func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
// strip query params, otherwise changing params resets pq // strip query params, otherwise changing params resets pq
pqURL := *remoteWriteURL pqURL := *remoteWriteURL
pqURL.RawQuery = "" pqURL.RawQuery = ""
@@ -837,23 +859,6 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
} }
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx) isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
queuesSize := queues.GetOptionalArg(argIdx)
if queuesSize > maxQueues {
queuesSize = maxQueues
} else if queuesSize <= 0 {
queuesSize = 1
}
maxInmemoryBlocks := memory.Allowed() / len(*remoteWriteURLs) / *maxRowsPerBlock / 100
if maxInmemoryBlocks/queuesSize > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 100 * queuesSize
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled) fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 { _ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
return float64(fq.GetPendingBytes()) return float64(fq.GetPendingBytes())
@@ -871,16 +876,16 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
var c *client var c *client
switch remoteWriteURL.Scheme { switch remoteWriteURL.Scheme {
case "http", "https": case "http", "https":
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, queuesSize) c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
default: default:
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL) logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
} }
c.init(argIdx, queuesSize, sanitizedURL) c.init(argIdx, *queues, sanitizedURL)
// Initialize pss // Initialize pss
sf := significantFigures.GetOptionalArg(argIdx) sf := significantFigures.GetOptionalArg(argIdx)
rd := roundDigits.GetOptionalArg(argIdx) rd := roundDigits.GetOptionalArg(argIdx)
pssLen := queuesSize pssLen := *queues
if n := cgroup.AvailableCPUs(); pssLen > n { if n := cgroup.AvailableCPUs(); pssLen > n {
// There is no sense in running more than availableCPUs concurrent pendingSeries, // There is no sense in running more than availableCPUs concurrent pendingSeries,
// since every pendingSeries can saturate up to a single CPU. // since every pendingSeries can saturate up to a single CPU.
@@ -983,17 +988,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
tss = append(*v, tss...) tss = append(*v, tss...)
} }
tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput) tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput)
} else if rwctx.streamAggrDropInput {
// if both keep_input and drop_input are true, we keep only the aggregated series
if rctx == nil {
rctx = getRelabelCtx()
// Make a copy of tss before dropping aggregated series
v = tssPool.Get().(*[]prompb.TimeSeries)
tss = append(*v, tss...)
}
tss = dropUnaggregatedSeries(tss, matchIdxs.B)
} }
matchIdxsPool.Put(matchIdxs) matchIdxsPool.Put(matchIdxs)
} }
if rwctx.deduplicator != nil { if rwctx.deduplicator != nil {
@@ -1016,10 +1011,9 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
return false return false
} }
var matchIdxsPool slicesutil.BufferPool[uint32] var matchIdxsPool bytesutil.ByteBufferPool
// dropAggregatedSeries drops matched series, also the unmatched if dropInput is true. func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []byte, dropInput bool) []prompb.TimeSeries {
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput bool) []prompb.TimeSeries {
dst := src[:0] dst := src[:0]
if !dropInput { if !dropInput {
for i, match := range matchIdxs { for i, match := range matchIdxs {
@@ -1034,20 +1028,6 @@ func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput
return dst return dst
} }
// dropUnaggregatedSeries drops unmatched series.
func dropUnaggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32) []prompb.TimeSeries {
dst := src[:0]
for i, match := range matchIdxs {
if match == 0 {
continue
}
dst = append(dst, src[i])
}
tail := src[len(dst):]
clear(tail)
return dst
}
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) { func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) {
if rwctx.tryPushTimeSeriesInternal(tss) { if rwctx.tryPushTimeSeriesInternal(tss) {
return return
@@ -1080,7 +1060,7 @@ func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries)
}() }()
if len(labelsGlobal) > 0 { if len(labelsGlobal) > 0 {
// Make a copy of tss before adding extra labels to prevent // Make a copy of tss before adding extra labels in order to prevent
// from affecting time series for other remoteWrite.url configs. // from affecting time series for other remoteWrite.url configs.
rctx = getRelabelCtx() rctx = getRelabelCtx()
v = tssPool.Get().(*[]prompb.TimeSeries) v = tssPool.Get().(*[]prompb.TimeSeries)

View File

@@ -10,8 +10,6 @@ import (
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash" "github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
@@ -28,12 +26,12 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
itemsCount := 1_000 * bucketsCount itemsCount := 1_000 * bucketsCount
m := make([]int, bucketsCount) m := make([]int, bucketsCount)
var labels []prompb.Label var labels []prompb.Label
for i := range itemsCount { for i := 0; i < itemsCount; i++ {
labels = append(labels[:0], prompb.Label{ labels = append(labels[:0], prompb.Label{
Name: "__name__", Name: "__name__",
Value: fmt.Sprintf("some_name_%d", i), Value: fmt.Sprintf("some_name_%d", i),
}) })
for j := range 10 { for j := 0; j < 10; j++ {
labels = append(labels, prompb.Label{ labels = append(labels, prompb.Label{
Name: fmt.Sprintf("label_%d", j), Name: fmt.Sprintf("label_%d", j),
Value: fmt.Sprintf("value_%d_%d", i, j), Value: fmt.Sprintf("value_%d_%d", i, j),
@@ -59,8 +57,8 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
f(10) f(10)
} }
func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) { func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string, expectedRowsPushedAfterRelabel, expectedPushedSample int) { f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string) {
t.Helper() t.Helper()
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig)) perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
if err != nil { if err != nil {
@@ -73,16 +71,10 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
} }
allRelabelConfigs.Store(rcs) allRelabelConfigs.Store(rcs)
path := "fast-queue-write-test"
fs.MustRemoveDir(path)
fq := persistentqueue.MustOpenFastQueue(path, "test", 100, 0, false)
defer fs.MustRemoveDir(path)
defer fq.MustClose()
pss := make([]*pendingSeries, 1) pss := make([]*pendingSeries, 1)
isVMProto := &atomic.Bool{} isVMProto := &atomic.Bool{}
isVMProto.Store(true) isVMProto.Store(true)
pss[0] = newPendingSeries(fq, isVMProto, 0, 100) pss[0] = newPendingSeries(nil, isVMProto, 0, 100)
rwctx := &remoteWriteCtx{ rwctx := &remoteWriteCtx{
idx: 0, idx: 0,
streamAggrKeepInput: keepInput, streamAggrKeepInput: keepInput,
@@ -91,8 +83,6 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`), rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`), rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
} }
defer metrics.UnregisterAllMetrics()
if dedupInterval > 0 { if dedupInterval > 0 {
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global") rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
} }
@@ -114,27 +104,23 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs) inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
expectedTss := make([]prompb.TimeSeries, len(inputTss)) expectedTss := make([]prompb.TimeSeries, len(inputTss))
// check inputTss is not modified after TryPushTimeSeries // copy inputTss to make sure it is not mutated during TryPush call
copy(expectedTss, inputTss) copy(expectedTss, inputTss)
if !rwctx.TryPushTimeSeries(inputTss, false) { if !rwctx.TryPushTimeSeries(inputTss, false) {
t.Fatalf("cannot push samples to rwctx") t.Fatalf("cannot push samples to rwctx")
} }
if int(rwctx.rowsPushedAfterRelabel.Get()) != expectedRowsPushedAfterRelabel {
t.Fatalf("unexpected number of rows after relabel; got %d; want %d", rwctx.rowsPushedAfterRelabel.Get(), expectedRowsPushedAfterRelabel)
}
if len(pss[0].wr.tss) != expectedPushedSample {
t.Fatalf("unexpected number of pushed samples; got %d; want %d", len(pss[0].wr.tss), expectedPushedSample)
}
if !reflect.DeepEqual(expectedTss, inputTss) { if !reflect.DeepEqual(expectedTss, inputTss) {
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss) t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
} }
} }
// relabeling f(`
f(``, ` - interval: 1m
outputs: [sum_samples]
- interval: 2m
outputs: [count_series]
`, `
- action: keep - action: keep
source_labels: [env] source_labels: [env]
regex: "dev" regex: "dev"
@@ -143,66 +129,53 @@ metric{env="dev"} 10
metric{env="bar"} 20 metric{env="bar"} 20
metric{env="dev"} 15 metric{env="dev"} 15
metric{env="bar"} 25 metric{env="bar"} 25
`, 2, 2) `)
// relabeling + aggregation
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, `
- action: keep
source_labels: [env]
regex: ".*"
`, false, 0, false, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 2)
// aggregation + keepInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, true, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 4)
// aggregation + dropInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, false, true, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 0)
// aggregation + keepInput + dropInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, true, true, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="bar"} 25
`, 3, 1)
// aggregation + deduplication
f(``, ``, true, time.Hour, false, false, ` f(``, ``, true, time.Hour, false, false, `
metric{env="dev"} 10 metric{env="dev"} 10
metric{env="foo"} 20 metric{env="foo"} 20
metric{env="dev"} 15 metric{env="dev"} 15
metric{env="foo"} 25 metric{env="foo"} 25
`, 4, 0) `)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, false, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, true, false, `
metric{env="test"} 10
metric{env="dev"} 20
metric{env="foo"} 15
metric{env="dev"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, false, true, `
metric{env="foo"} 10
metric{env="dev"} 20
metric{env="foo"} 15
metric{env="dev"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, true, true, `
metric{env="dev"} 10
metric{env="test"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`)
} }
func TestShardAmountRemoteWriteCtx(t *testing.T) { func TestShardAmountRemoteWriteCtx(t *testing.T) {
@@ -248,7 +221,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
seriesCount := 100000 seriesCount := 100000
// build 1000000 series // build 1000000 series
tssBlock := make([]prompb.TimeSeries, 0, seriesCount) tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
for i := range seriesCount { for i := 0; i < seriesCount; i++ {
tssBlock = append(tssBlock, prompb.TimeSeries{ tssBlock = append(tssBlock, prompb.TimeSeries{
Labels: []prompb.Label{ Labels: []prompb.Label{
{ {
@@ -269,7 +242,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
// build active time series set // build active time series set
nodes := make([]string, 0, remoteWriteCount) nodes := make([]string, 0, remoteWriteCount)
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount) activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
for i := range remoteWriteCount { for i := 0; i < remoteWriteCount; i++ {
nodes = append(nodes, fmt.Sprintf("node%d", i)) nodes = append(nodes, fmt.Sprintf("node%d", i))
activeTimeSeriesByNodes[i] = make(map[string]struct{}) activeTimeSeriesByNodes[i] = make(map[string]struct{})
} }

View File

@@ -18,12 +18,12 @@ var (
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+ streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+ "See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval") "See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples that match any rule in "+ streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation "+
"-streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+ "with -streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the remote storage. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/") "are written to remote storages write. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop input samples that not matching any rule in "+ streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation "+
"-streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+ "with -remoteWrite.streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the remote storage. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/") "are written to remote storages write. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+ streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
"aggregator before optional aggregation with -streamAggr.config . "+ "aggregator before optional aggregation with -streamAggr.config . "+
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication") "See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
@@ -43,11 +43,11 @@ var (
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+ streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+ "See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval") "See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop input samples that not matching any rule in "+ streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop all the input samples after the aggregation "+
"the corresponding -remoteWrite.streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+ "with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/") "are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples that match any rule in "+ streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep all the input samples after the aggregation "+
"the corresponding -remoteWrite.streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+ "with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/") "are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+ streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication") "with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")

View File

@@ -1,80 +0,0 @@
package zabbixconnector
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="zabbixconnector"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="zabbixconnector"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="zabbixconnector"}`)
)
// InsertHandlerForHTTP processes remote write for ZabbixConnector POST /zabbixconnector/v1/history request.
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, encoding, func(rows []zabbixconnector.Row) error {
return insertRows(at, rows, extraLabels)
})
}
func insertRows(at *auth.Token, rows []zabbixconnector.Row, extraLabels []prompb.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := len(rows)
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
Name: bytesutil.ToUnsafeString(tag.Key),
Value: bytesutil.ToUnsafeString(tag.Value),
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
samples = append(samples, prompb.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -27,9 +27,6 @@ vmalert-tool-linux-ppc64le-prod:
vmalert-tool-linux-386-prod: vmalert-tool-linux-386-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386 APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
vmalert-tool-linux-s390x-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-s390x
vmalert-tool-darwin-amd64-prod: vmalert-tool-darwin-amd64-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64 APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64

View File

@@ -41,7 +41,7 @@ func TestParseInputValue_Success(t *testing.T) {
if len(outputExpected) != len(output) { if len(outputExpected) != len(output) {
t.Fatalf("unexpected output length; got %d; want %d", len(outputExpected), len(output)) t.Fatalf("unexpected output length; got %d; want %d", len(outputExpected), len(output))
} }
for i := range outputExpected { for i := 0; i < len(outputExpected); i++ {
if outputExpected[i].Omitted != output[i].Omitted { if outputExpected[i].Omitted != output[i].Omitted {
t.Fatalf("unexpected Omitted field in the output\ngot\n%v\nwant\n%v", output, outputExpected) t.Fatalf("unexpected Omitted field in the output\ngot\n%v\nwant\n%v", output, outputExpected)
} }

View File

@@ -4,7 +4,6 @@ import (
"context" "context"
"flag" "flag"
"fmt" "fmt"
"maps"
"net" "net"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
@@ -13,7 +12,6 @@ import (
"os/signal" "os/signal"
"path/filepath" "path/filepath"
"reflect" "reflect"
"slices"
"sort" "sort"
"strings" "strings"
"syscall" "syscall"
@@ -36,7 +34,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"github.com/VictoriaMetrics/metrics" "github.com/VictoriaMetrics/metrics"
) )
@@ -87,8 +84,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
defer server.Close() defer server.Close()
} else { } else {
httpListenAddr = httpListenPort httpListenAddr = httpListenPort
ln, err := net.Listen("tcp", fmt.Sprintf(":%s", httpListenPort))
ln, err := net.Listen(netutil.GetTCPNetwork(), fmt.Sprintf(":%s", httpListenPort))
if err != nil { if err != nil {
logger.Fatalf("cannot listen on port %s: %v", httpListenPort, err) logger.Fatalf("cannot listen on port %s: %v", httpListenPort, err)
} }
@@ -134,7 +130,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
} }
labels[s[:n]] = s[n+1:] labels[s[:n]] = s[n+1:]
} }
err = notifier.Init(labels, externalURL) _, err = notifier.Init(nil, labels, externalURL)
if err != nil { if err != nil {
logger.Fatalf("failed to init notifier: %v", err) logger.Fatalf("failed to init notifier: %v", err)
} }
@@ -350,7 +346,9 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
for k := range alertEvalTimesMap { for k := range alertEvalTimesMap {
alertEvalTimes = append(alertEvalTimes, k) alertEvalTimes = append(alertEvalTimes, k)
} }
slices.Sort(alertEvalTimes) sort.Slice(alertEvalTimes, func(i, j int) bool {
return alertEvalTimes[i] < alertEvalTimes[j]
})
// sort group eval order according to the given "group_eval_order". // sort group eval order according to the given "group_eval_order".
sort.Slice(testGroups, func(i, j int) bool { sort.Slice(testGroups, func(i, j int) bool {
@@ -361,8 +359,12 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
var groups []*rule.Group var groups []*rule.Group
for _, group := range testGroups { for _, group := range testGroups {
mergedExternalLabels := make(map[string]string) mergedExternalLabels := make(map[string]string)
maps.Copy(mergedExternalLabels, tg.ExternalLabels) for k, v := range tg.ExternalLabels {
maps.Copy(mergedExternalLabels, externalLabels) mergedExternalLabels[k] = v
}
for k, v := range externalLabels {
mergedExternalLabels[k] = v
}
ng := rule.NewGroup(group, q, time.Minute, mergedExternalLabels) ng := rule.NewGroup(group, q, time.Minute, mergedExternalLabels)
ng.Init() ng.Init()
groups = append(groups, ng) groups = append(groups, ng)
@@ -375,7 +377,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
if len(g.Rules) == 0 { if len(g.Rules) == 0 {
continue continue
} }
errs := g.ExecOnce(context.Background(), rw, ts) errs := g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, rw, ts)
for err := range errs { for err := range errs {
if err != nil { if err != nil {
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name, checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,

View File

@@ -27,9 +27,6 @@ vmalert-linux-ppc64le-prod:
vmalert-linux-386-prod: vmalert-linux-386-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-386 APP_NAME=vmalert $(MAKE) app-via-docker-linux-386
vmalert-linux-s390x-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-s390x
vmalert-darwin-amd64-prod: vmalert-darwin-amd64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64 APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64

View File

@@ -31,7 +31,7 @@ type Group struct {
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource. // EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155 // see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"` EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
Limit *int `yaml:"limit,omitempty"` Limit int `yaml:"limit,omitempty"`
Rules []Rule `yaml:"rules"` Rules []Rule `yaml:"rules"`
Concurrency int `yaml:"concurrency"` Concurrency int `yaml:"concurrency"`
// Labels is a set of label value pairs, that will be added to every rule. // Labels is a set of label value pairs, that will be added to every rule.
@@ -91,8 +91,8 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
if g.EvalOffset != nil && g.EvalDelay != nil { if g.EvalOffset != nil && g.EvalDelay != nil {
return fmt.Errorf("eval_offset cannot be used with eval_delay") return fmt.Errorf("eval_offset cannot be used with eval_delay")
} }
if g.Limit != nil && *g.Limit < 0 { if g.Limit < 0 {
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", *g.Limit) return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
} }
if g.Concurrency < 0 { if g.Concurrency < 0 {
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency) return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)

View File

@@ -116,7 +116,7 @@ func TestParse_Failure(t *testing.T) {
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval") f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token") f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
f([]string{"testdata/dir/rules0-bad.rules"}, "invalid annotations") f([]string{"testdata/dir/rules0-bad.rules"}, "error parsing annotation")
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file") f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined") f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set") f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
@@ -181,10 +181,9 @@ func TestGroupValidate_Failure(t *testing.T) {
EvalOffset: promutil.NewDuration(2 * time.Minute), EvalOffset: promutil.NewDuration(2 * time.Minute),
}, false, "eval_offset should be smaller than interval") }, false, "eval_offset should be smaller than interval")
limit := -1
f(&Group{ f(&Group{
Name: "wrong limit", Name: "wrong limit",
Limit: &limit, Limit: -1,
}, false, "invalid limit") }, false, "invalid limit")
f(&Group{ f(&Group{
@@ -343,6 +342,7 @@ func TestGroupValidate_Failure(t *testing.T) {
}, },
}, },
}, true, "bad prometheus expr") }, true, "bad prometheus expr")
} }
func TestGroupValidate_Success(t *testing.T) { func TestGroupValidate_Success(t *testing.T) {

View File

@@ -2,7 +2,6 @@ package config
import ( import (
"fmt" "fmt"
"slices"
"strings" "strings"
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage" "github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
@@ -77,12 +76,13 @@ func (t *Type) ValidateExpr(expr string) error {
if err != nil { if err != nil {
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err) return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
} }
labels, err := q.GetStatsLabels() fields, _ := q.GetStatsByFields()
if err != nil { for i := range fields {
return fmt.Errorf("cannot obtain labels from LogsQL expr: %q, err: %w", expr, err) // VictoriaLogs inserts `_time` field as a label in result when query with `stats by (_time:step)`,
} // making the result meaningless and may lead to cardinality issues.
if slices.Contains(labels, "_time") { if fields[i] == "_time" {
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr) return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
}
} }
default: default:
return fmt.Errorf("unknown datasource type=%q", t.Name) return fmt.Errorf("unknown datasource type=%q", t.Name)

View File

@@ -5,7 +5,6 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"maps"
"net/http" "net/http"
"net/url" "net/url"
"strings" "strings"
@@ -92,7 +91,9 @@ func (c *Client) Clone() *Client {
ns.extraHeaders = make([]keyValue, len(c.extraHeaders)) ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
copy(ns.extraHeaders, c.extraHeaders) copy(ns.extraHeaders, c.extraHeaders)
} }
maps.Copy(ns.extraParams, c.extraParams) for k, v := range c.extraParams {
ns.extraParams[k] = v
}
return ns return ns
} }
@@ -172,26 +173,22 @@ func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result,
return Result{}, nil, fmt.Errorf("second attempt: %w", err) return Result{}, nil, fmt.Errorf("second attempt: %w", err)
} }
} }
defer func() { _ = resp.Body.Close() }()
// Process the received response. // Process the received response.
var parseFn func(resp *http.Response) (Result, error) var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType { switch c.dataSourceType {
case datasourcePrometheus: case datasourcePrometheus:
parseFn = parsePrometheusInstantResponse parseFn = parsePrometheusResponse
case datasourceGraphite: case datasourceGraphite:
parseFn = parseGraphiteResponse parseFn = parseGraphiteResponse
case datasourceVLogs: case datasourceVLogs:
parseFn = parseVLogsInstantResponse parseFn = parseVLogsResponse
default: default:
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType) logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
} }
result, err := parseFn(req, resp)
result, err := parseFn(resp) _ = resp.Body.Close()
if err != nil { return result, req, err
return Result{}, nil, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
}
return result, req, nil
} }
// QueryRange executes the given query on the given time range. // QueryRange executes the given query on the given time range.
@@ -232,23 +229,19 @@ func (c *Client) QueryRange(ctx context.Context, query string, start, end time.T
return res, fmt.Errorf("second attempt: %w", err) return res, fmt.Errorf("second attempt: %w", err)
} }
} }
defer func() { _ = resp.Body.Close() }()
// Process the received response. // Process the received response.
var parseFn func(resp *http.Response) (Result, error) var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType { switch c.dataSourceType {
case datasourcePrometheus: case datasourcePrometheus:
parseFn = parsePrometheusRangeResponse parseFn = parsePrometheusResponse
case datasourceVLogs: case datasourceVLogs:
parseFn = parseVLogsRangeResponse parseFn = parseVLogsResponse
default: default:
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType) logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
} }
res, err = parseFn(req, resp)
res, err = parseFn(resp) _ = resp.Body.Close()
if err != nil {
return Result{}, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
}
return res, err return res, err
} }

View File

@@ -33,10 +33,10 @@ func (r graphiteResponse) metrics() []Metric {
return ms return ms
} }
func parseGraphiteResponse(resp *http.Response) (Result, error) { func parseGraphiteResponse(req *http.Request, resp *http.Response) (Result, error) {
r := &graphiteResponse{} r := &graphiteResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil { if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
return Result{}, fmt.Errorf("error parsing graphite metrics: %w", err) return Result{}, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
} }
return Result{Data: r.metrics()}, nil return Result{Data: r.metrics()}, nil
} }

View File

@@ -34,7 +34,7 @@ type promResponse struct {
// Stats supported by VictoriaMetrics since v1.90 // Stats supported by VictoriaMetrics since v1.90
Stats struct { Stats struct {
SeriesFetched *string `json:"seriesFetched,omitempty"` SeriesFetched *string `json:"seriesFetched,omitempty"`
} `json:"stats"` } `json:"stats,omitempty"`
// IsPartial supported by VictoriaMetrics // IsPartial supported by VictoriaMetrics
IsPartial *bool `json:"isPartial,omitempty"` IsPartial *bool `json:"isPartial,omitempty"`
} }
@@ -172,26 +172,17 @@ const (
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar" rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
) )
func parsePromResponse(resp *http.Response) (*promResponse, error) { func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
r := &promResponse{} r := &promResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil { if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err) return res, fmt.Errorf("error parsing response from %s: %w", req.URL.Redacted(), err)
} }
if r.Status == statusError { if r.Status == statusError {
return nil, fmt.Errorf("response error %q: %s", r.ErrorType, r.Error) return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
} }
if r.Status != statusSuccess { if r.Status != statusSuccess {
return nil, fmt.Errorf("unknown response status %q", r.Status) return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
} }
return r, nil
}
func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error) {
r, err := parsePromResponse(resp)
if err != nil {
return res, fmt.Errorf("failed to parse response: %w", err)
}
var parseFn func() ([]Metric, error) var parseFn func() ([]Metric, error)
switch r.Data.ResultType { switch r.Data.ResultType {
case rtVector: case rtVector:
@@ -200,6 +191,12 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result)) return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
} }
parseFn = pi.metrics parseFn = pi.metrics
case rtMatrix:
var pr promRange
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
return res, err
}
parseFn = pr.metrics
case rScalar: case rScalar:
var ps promScalar var ps promScalar
if err := json.Unmarshal(r.Data.Result, &ps); err != nil { if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
@@ -209,6 +206,7 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
default: default:
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType) return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
} }
ms, err := parseFn() ms, err := parseFn()
if err != nil { if err != nil {
return res, err return res, err
@@ -224,34 +222,6 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
return res, nil return res, nil
} }
func parsePrometheusRangeResponse(resp *http.Response) (res Result, err error) {
r, err := parsePromResponse(resp)
if err != nil {
return res, fmt.Errorf("failed to parse response: %w", err)
}
if r.Data.ResultType != rtMatrix {
return res, fmt.Errorf("unexpected result type %q; expected result type %q", r.Data.ResultType, rtMatrix)
}
var pr promRange
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
return res, err
}
ms, err := pr.metrics()
if err != nil {
return res, err
}
res = Result{Data: ms, IsPartial: r.IsPartial}
if r.Stats.SeriesFetched != nil {
intV, err := strconv.Atoi(*r.Stats.SeriesFetched)
if err != nil {
return res, fmt.Errorf("failed to convert stats.seriesFetched to int: %w", err)
}
res.SeriesFetched = &intV
}
return res, nil
}
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) { func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
if c.appendTypePrefix { if c.appendTypePrefix {
r.URL.Path += "/prometheus" r.URL.Path += "/prometheus"

View File

@@ -65,23 +65,21 @@ func TestVMInstantQuery(t *testing.T) {
case 3: case 3:
w.Write([]byte(`{"status":"unknown"}`)) w.Write([]byte(`{"status":"unknown"}`))
case 4: case 4:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector"}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
case 5: case 5:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
case 6:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
case 7: case 6:
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
case 8: case 7:
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
case 9: case 8:
w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`)) w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
} }
}) })
mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) { mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) {
c++ c++
switch c { switch c {
case 10: case 9:
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`)) w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
} }
}) })
@@ -104,9 +102,9 @@ func TestVMInstantQuery(t *testing.T) {
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err) t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
} }
switch c { switch c {
case 11: case 10:
w.Write([]byte("[]")) w.Write([]byte("[]"))
case 12: case 11:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
} }
}) })
@@ -125,7 +123,6 @@ func TestVMInstantQuery(t *testing.T) {
ts := time.Now() ts := time.Now()
expErr := func(query, err string) { expErr := func(query, err string) {
t.Helper()
_, _, gotErr := pq.Query(ctx, query, ts) _, _, gotErr := pq.Query(ctx, query, ts)
if gotErr == nil { if gotErr == nil {
t.Fatalf("expected %q got nil", err) t.Fatalf("expected %q got nil", err)
@@ -138,11 +135,10 @@ func TestVMInstantQuery(t *testing.T) {
expErr(vmQuery, "500") // 0 expErr(vmQuery, "500") // 0
expErr(vmQuery, "error parsing response") // 1 expErr(vmQuery, "error parsing response") // 1
expErr(vmQuery, "response error") // 2 expErr(vmQuery, "response error") // 2
expErr(vmQuery, "unknown response status") // 3 expErr(vmQuery, "unknown status") // 3
expErr(vmQuery, "unexpected end of JSON input") // 4 expErr(vmQuery, "unexpected end of JSON input") // 4
expErr(vmQuery, "unknown result type") // 5
res, _, err := pq.Query(ctx, vmQuery, ts) // 6 - vector res, _, err := pq.Query(ctx, vmQuery, ts) // 5 - vector
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
@@ -163,7 +159,7 @@ func TestVMInstantQuery(t *testing.T) {
} }
metricsEqual(t, res.Data, expected) metricsEqual(t, res.Data, expected)
res, req, err := pq.Query(ctx, vmQuery, ts) // 7 - scalar res, req, err := pq.Query(ctx, vmQuery, ts) // 6 - scalar
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
@@ -188,7 +184,7 @@ func TestVMInstantQuery(t *testing.T) {
res.SeriesFetched) res.SeriesFetched)
} }
res, _, err = pq.Query(ctx, vmQuery, ts) // 8 - scalar with stats res, _, err = pq.Query(ctx, vmQuery, ts) // 7 - scalar with stats
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
@@ -209,7 +205,7 @@ func TestVMInstantQuery(t *testing.T) {
*res.SeriesFetched) *res.SeriesFetched)
} }
res, _, err = pq.Query(ctx, vmQuery, ts) // 9 res, _, err = pq.Query(ctx, vmQuery, ts) // 8
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
@@ -220,7 +216,7 @@ func TestVMInstantQuery(t *testing.T) {
// test graphite // test graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)}) gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
res, _, err = gq.Query(ctx, queryRender, ts) // 10 - graphite res, _, err = gq.Query(ctx, queryRender, ts) // 9 - graphite
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
@@ -240,9 +236,9 @@ func TestVMInstantQuery(t *testing.T) {
vlogs := datasourceVLogs vlogs := datasourceVLogs
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second}) pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
expErr(vlogsQuery, "error parsing response") // 11 expErr(vlogsQuery, "error parsing response") // 10
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 12 res, _, err = pq.Query(ctx, vlogsQuery, ts) // 11
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
@@ -394,8 +390,6 @@ func TestVMRangeQuery(t *testing.T) {
switch c { switch c {
case 0: case 0:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
case 1:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[1583786142, "1"]}}`))
} }
}) })
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) { mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
@@ -428,7 +422,7 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step) t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
} }
switch c { switch c {
case 2: case 1:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
} }
}) })
@@ -452,13 +446,13 @@ func TestVMRangeQuery(t *testing.T) {
start, end := time.Now().Add(-time.Minute), time.Now() start, end := time.Now().Add(-time.Minute), time.Now()
res, err := pq.QueryRange(ctx, vmQuery, start, end) // case 0 res, err := pq.QueryRange(ctx, vmQuery, start, end)
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
m := res.Data m := res.Data
if len(m) != 1 { if len(m) != 1 {
t.Fatalf("expected 1 metric got %d in %+v", len(m), m) t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
} }
expected := Metric{ expected := Metric{
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}}, Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}},
@@ -469,9 +463,6 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("unexpected metric %+v want %+v", m[0], expected) t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
} }
_, err = pq.QueryRange(ctx, vmQuery, start, end) // case 1
expectError(t, err, "unexpected result type")
// test unsupported graphite // test unsupported graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)}) gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})

View File

@@ -40,28 +40,8 @@ func (c *Client) setVLogsRangeReqParams(r *http.Request, query string, start, en
c.setReqParams(r, query) c.setReqParams(r, query)
} }
func parseVLogsInstantResponse(resp *http.Response) (res Result, err error) { func parseVLogsResponse(req *http.Request, resp *http.Response) (res Result, err error) {
res, err = parsePrometheusInstantResponse(resp) res, err = parsePrometheusResponse(req, resp)
if err != nil {
return Result{}, err
}
for i := range res.Data {
m := &res.Data[i]
for j := range m.Labels {
// reserve the stats func result name with a new label `stats_result` instead of dropping it,
// since there could be multiple stats results in a single query, for instance:
// _time:5m | stats quantile(0.5, request_duration_seconds) p50, quantile(0.9, request_duration_seconds) p90
if m.Labels[j].Name == "__name__" {
m.Labels[j].Name = "stats_result"
break
}
}
}
return
}
func parseVLogsRangeResponse(resp *http.Response) (res Result, err error) {
res, err = parsePrometheusRangeResponse(resp)
if err != nil { if err != nil {
return Result{}, err return Result{}, err
} }

View File

@@ -132,9 +132,12 @@ func (ls Labels) String() string {
// a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1 // a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1
// a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0 // a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0
func LabelCompare(a, b Labels) int { func LabelCompare(a, b Labels) int {
l := min(len(b), len(a)) l := len(a)
if len(b) < l {
l = len(b)
}
for i := range l { for i := 0; i < l; i++ {
if a[i].Name != b[i].Name { if a[i].Name != b[i].Name {
if a[i].Name < b[i].Name { if a[i].Name < b[i].Name {
return -1 return -1

View File

@@ -13,7 +13,7 @@ func BenchmarkPromInstantUnmarshal(b *testing.B) {
// BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op // BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op
b.Run("Instant std+fastjson", func(b *testing.B) { b.Run("Instant std+fastjson", func(b *testing.B) {
for range b.N { for i := 0; i < b.N; i++ {
var pi promInstant var pi promInstant
err = pi.Unmarshal(data) err = pi.Unmarshal(data)
if err != nil { if err != nil {

View File

@@ -7,6 +7,7 @@ import (
"net/url" "net/url"
"os" "os"
"sort" "sort"
"strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
@@ -76,12 +77,15 @@ absolute path to all .tpl files in root.
`Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+ `Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+
`If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`) `If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+ externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
"In case of conflicts, original labels are kept with prefix 'exported_'.") "In case of conflicts, original labels are kept with prefix `exported_`.")
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.") dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
) )
var extURL *url.URL var (
alertURLGeneratorFn notifier.AlertURLGenerator
extURL *url.URL
)
func main() { func main() {
// Write flags and help message to stdout, since it is easier to grep or pipe. // Write flags and help message to stdout, since it is easier to grep or pipe.
@@ -117,7 +121,7 @@ func main() {
return return
} }
err = notifier.InitAlertURLGeneratorFn(extURL, *externalAlertSource, *validateTemplates) alertURLGeneratorFn, err = getAlertURLGenerator(extURL, *externalAlertSource, *validateTemplates)
if err != nil { if err != nil {
logger.Fatalf("failed to init `external.alert.source`: %s", err) logger.Fatalf("failed to init `external.alert.source`: %s", err)
} }
@@ -159,7 +163,7 @@ func main() {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
manager, err := newManager(ctx) manager, err := newManager(ctx)
if err != nil { if err != nil {
logger.Fatalf("failed to create manager: %s", err) logger.Fatalf("failed to init: %s", err)
} }
logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";")) logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";"))
groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions) groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)
@@ -224,13 +228,14 @@ func newManager(ctx context.Context) (*manager, error) {
labels[s[:n]] = s[n+1:] labels[s[:n]] = s[n+1:]
} }
err = notifier.Init(labels, *externalURL) nts, err := notifier.Init(alertURLGeneratorFn, labels, *externalURL)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to init notifier: %w", err) return nil, fmt.Errorf("failed to init notifier: %w", err)
} }
manager := &manager{ manager := &manager{
groups: make(map[uint64]*rule.Group), groups: make(map[uint64]*rule.Group),
querierBuilder: q, querierBuilder: q,
notifiers: nts,
labels: labels, labels: labels,
} }
rw, err := remotewrite.Init(ctx) rw, err := remotewrite.Init(ctx)
@@ -287,6 +292,35 @@ func getHostnameAsExternalURL(addr string, isSecure bool) (*url.URL, error) {
return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port)) return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
} }
func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, validateTemplate bool) (notifier.AlertURLGenerator, error) {
if externalAlertSource == "" {
return func(a notifier.Alert) string {
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, paramGroupID, gID, paramAlertID, aID)
}, nil
}
if validateTemplate {
if err := notifier.ValidateTemplates(map[string]string{
"tpl": externalAlertSource,
}); err != nil {
return nil, fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
}
}
m := map[string]string{
"tpl": externalAlertSource,
}
return func(alert notifier.Alert) string {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
}
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
if err != nil {
logger.Errorf("cannot template alert source: %s", err)
}
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
}, nil
}
func usage() { func usage() {
const s = ` const s = `
vmalert processes alerts and recording rules. vmalert processes alerts and recording rules.

View File

@@ -49,6 +49,30 @@ func TestGetExternalURL(t *testing.T) {
} }
} }
func TestGetAlertURLGenerator(t *testing.T) {
testAlert := notifier.Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
u, _ := url.Parse("https://victoriametrics.com/path")
fn, err := getAlertURLGenerator(u, "", false)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", paramGroupID, paramAlertID)
if exp != fn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
}
_, err = getAlertURLGenerator(nil, "foo?{{invalid}}", true)
if err == nil {
t.Fatalf("expected template validation error got nil")
}
fn, err = getAlertURLGenerator(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != fn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
}
}
func TestConfigReload(t *testing.T) { func TestConfigReload(t *testing.T) {
originalRulePath := *rulePath originalRulePath := *rulePath
originalExternalURL := extURL originalExternalURL := extURL
@@ -96,10 +120,9 @@ groups:
querierBuilder: &datasource.FakeQuerier{}, querierBuilder: &datasource.FakeQuerier{},
groups: make(map[uint64]*rule.Group), groups: make(map[uint64]*rule.Group),
labels: map[string]string{}, labels: map[string]string{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
rw: &remotewrite.Client{}, rw: &remotewrite.Client{},
} }
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
syncCh := make(chan struct{}) syncCh := make(chan struct{})
sighupCh := procutil.NewSighupChan() sighupCh := procutil.NewSighupChan()

View File

@@ -3,7 +3,6 @@ package main
import ( import (
"context" "context"
"fmt" "fmt"
"strconv"
"sync" "sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
@@ -17,6 +16,7 @@ import (
// manager controls group states // manager controls group states
type manager struct { type manager struct {
querierBuilder datasource.QuerierBuilder querierBuilder datasource.QuerierBuilder
notifiers func() []notifier.Notifier
rw remotewrite.RWClient rw remotewrite.RWClient
// remote read builder. // remote read builder.
@@ -29,8 +29,25 @@ type manager struct {
groups map[uint64]*rule.Group groups map[uint64]*rule.Group
} }
// groupAPI generates apiGroup object from group by its ID(hash) // ruleAPI generates apiRule object from alert by its ID(hash)
func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) { func (m *manager) ruleAPI(gID, rID uint64) (apiRule, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
g, ok := m.groups[gID]
if !ok {
return apiRule{}, fmt.Errorf("can't find group with id %d", gID)
}
for _, rule := range g.Rules {
if rule.ID() == rID {
return ruleToAPI(rule), nil
}
}
return apiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
}
// alertAPI generates apiAlert object from alert by its ID(hash)
func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
m.groupsMu.RLock() m.groupsMu.RLock()
defer m.groupsMu.RUnlock() defer m.groupsMu.RUnlock()
@@ -38,47 +55,13 @@ func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
if !ok { if !ok {
return nil, fmt.Errorf("can't find group with id %d", gID) return nil, fmt.Errorf("can't find group with id %d", gID)
} }
return g.ToAPI(), nil
}
// ruleAPI generates apiRule object from alert by its ID(hash)
func (m *manager) ruleAPI(gID, rID uint64) (rule.ApiRule, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
group, ok := m.groups[gID]
if !ok {
return rule.ApiRule{}, fmt.Errorf("can't find group with id %d", gID)
}
g := group.ToAPI()
ruleID := strconv.FormatUint(rID, 10)
for _, r := range g.Rules { for _, r := range g.Rules {
if r.ID == ruleID { ar, ok := r.(*rule.AlertingRule)
return r, nil if !ok {
}
}
return rule.ApiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
}
// alertAPI generates apiAlert object from alert by its ID(hash)
func (m *manager) alertAPI(gID, aID uint64) (*rule.ApiAlert, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
group, ok := m.groups[gID]
if !ok {
return nil, fmt.Errorf("can't find group with id %d", gID)
}
g := group.ToAPI()
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
continue continue
} }
alertID := strconv.FormatUint(aID, 10) if apiAlert := alertToAPI(ar, aID); apiAlert != nil {
for _, a := range r.Alerts { return apiAlert, nil
if a.ID == alertID {
return a, nil
}
} }
} }
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name) return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)
@@ -99,16 +82,17 @@ func (m *manager) close() {
} }
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error { func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
m.wg.Add(1)
id := g.GetID() id := g.GetID()
g.Init() g.Init()
m.wg.Go(func() { go func() {
defer m.wg.Done()
if restore { if restore {
g.Start(ctx, m.rw, m.rr) g.Start(ctx, m.notifiers, m.rw, m.rr)
} else { } else {
g.Start(ctx, m.rw, nil) g.Start(ctx, m.notifiers, m.rw, nil)
} }
}) }()
m.groups[id] = g m.groups[id] = g
return nil return nil
} }
@@ -135,7 +119,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
if rrPresent && m.rw == nil { if rrPresent && m.rw == nil {
return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set") return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set")
} }
if arPresent && notifier.GetTargets() == nil { if arPresent && m.notifiers == nil {
return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set") return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set")
} }
@@ -172,15 +156,15 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
if len(toUpdate) > 0 { if len(toUpdate) > 0 {
var wg sync.WaitGroup var wg sync.WaitGroup
for _, item := range toUpdate { for _, item := range toUpdate {
oldG := item.old wg.Add(1)
newG := item.new // cancel evaluation so the Update will be applied as fast as possible.
wg.Go(func() { // it is important to call InterruptEval before the update, because cancel fn
// cancel evaluation so the Update will be applied as fast as possible. // can be re-assigned during the update.
// it is important to call InterruptEval before the update, because cancel fn item.old.InterruptEval()
// can be re-assigned during the update. go func(oldGroup *rule.Group, newGroup *rule.Group) {
oldG.InterruptEval() oldGroup.UpdateWith(newGroup)
oldG.UpdateWith(newG) wg.Done()
}) }(item.old, item.new)
} }
wg.Wait() wg.Wait()
} }

View File

@@ -40,11 +40,10 @@ func TestManagerEmptyRulesDir(t *testing.T) {
// execution of configuration update. // execution of configuration update.
// Should be executed with -race flag // Should be executed with -race flag
func TestManagerUpdateConcurrent(t *testing.T) { func TestManagerUpdateConcurrent(t *testing.T) {
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
m := &manager{ m := &manager{
groups: make(map[uint64]*rule.Group), groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{}, querierBuilder: &datasource.FakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
} }
paths := []string{ paths := []string{
"config/testdata/dir/rules0-good.rules", "config/testdata/dir/rules0-good.rules",
@@ -65,11 +64,13 @@ func TestManagerUpdateConcurrent(t *testing.T) {
const workers = 500 const workers = 500
const iterations = 10 const iterations = 10
var wg sync.WaitGroup wg := sync.WaitGroup{}
for n := range workers { wg.Add(workers)
wg.Go(func() { for i := 0; i < workers; i++ {
go func(n int) {
defer wg.Done()
r := rand.New(rand.NewSource(int64(n))) r := rand.New(rand.NewSource(int64(n)))
for range iterations { for i := 0; i < iterations; i++ {
rnd := r.Intn(len(paths)) rnd := r.Intn(len(paths))
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true) cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
if err != nil { // update can fail and this is expected if err != nil { // update can fail and this is expected
@@ -77,7 +78,7 @@ func TestManagerUpdateConcurrent(t *testing.T) {
} }
_ = m.update(context.Background(), cfg, false) _ = m.update(context.Background(), cfg, false)
} }
}) }(i)
} }
wg.Wait() wg.Wait()
} }
@@ -126,9 +127,8 @@ func TestManagerUpdate_Success(t *testing.T) {
m := &manager{ m := &manager{
groups: make(map[uint64]*rule.Group), groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{}, querierBuilder: &datasource.FakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
} }
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
cfgInit := loadCfg(t, []string{initPath}, true, true) cfgInit := loadCfg(t, []string{initPath}, true, true)
if err := m.update(ctx, cfgInit, false); err != nil { if err := m.update(ctx, cfgInit, false); err != nil {
@@ -259,7 +259,7 @@ func compareGroups(t *testing.T, a, b *rule.Group) {
for i, r := range a.Rules { for i, r := range a.Rules {
got, want := r, b.Rules[i] got, want := r, b.Rules[i]
if a.CreateID() != b.CreateID() { if a.CreateID() != b.CreateID() {
t.Fatalf("expected to have rule %d; got %d", want.ID(), got.ID()) t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
} }
if err := rule.CompareRules(t, want, got); err != nil { if err := rule.CompareRules(t, want, got); err != nil {
t.Fatalf("comparison error: %s", err) t.Fatalf("comparison error: %s", err)
@@ -277,8 +277,7 @@ func TestManagerUpdate_Failure(t *testing.T) {
rw: rw, rw: rw,
} }
if notifiers != nil { if notifiers != nil {
_, cleanup := notifier.InitFakeNotifier() m.notifiers = func() []notifier.Notifier { return notifiers }
defer cleanup()
} }
err := m.update(context.Background(), []config.Group{cfg}, false) err := m.update(context.Background(), []config.Group{cfg}, false)
if err == nil { if err == nil {

View File

@@ -80,15 +80,14 @@ func (as AlertState) String() string {
// AlertTplData is used to execute templating // AlertTplData is used to execute templating
type AlertTplData struct { type AlertTplData struct {
Type string Type string
Labels map[string]string Labels map[string]string
Value float64 Value float64
Expr string Expr string
AlertID uint64 AlertID uint64
GroupID uint64 GroupID uint64
ActiveAt time.Time ActiveAt time.Time
For time.Duration For time.Duration
IsPartial bool
} }
var tplHeaders = []string{ var tplHeaders = []string{
@@ -102,7 +101,6 @@ var tplHeaders = []string{
"{{ $groupID := .GroupID }}", "{{ $groupID := .GroupID }}",
"{{ $activeAt := .ActiveAt }}", "{{ $activeAt := .ActiveAt }}",
"{{ $for := .For }}", "{{ $for := .For }}",
"{{ $isPartial := .IsPartial }}",
} }
// ExecTemplate executes the Alert template for given // ExecTemplate executes the Alert template for given
@@ -168,8 +166,8 @@ func templateAnnotations(annotations map[string]string, data AlertTplData, tmpl
ctmpl, _ := tmpl.Clone() ctmpl, _ := tmpl.Clone()
ctmpl = ctmpl.Option("missingkey=zero") ctmpl = ctmpl.Option("missingkey=zero")
if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil { if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil {
r[key] = err.Error() r[key] = text
eg.Add(fmt.Errorf("(key: %q, value: %q): %w", key, text, err)) eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
continue continue
} }
r[key] = buf.String() r[key] = buf.String()
@@ -186,13 +184,13 @@ type tplData struct {
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error { func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
tpl, err := tpl.Parse(text) tpl, err := tpl.Parse(text)
if err != nil { if err != nil {
return fmt.Errorf("error parsing template: %w", err) return fmt.Errorf("error parsing annotation template: %w", err)
} }
if !execute { if !execute {
return nil return nil
} }
if err = tpl.Execute(dst, data); err != nil { if err = tpl.Execute(dst, data); err != nil {
return fmt.Errorf("error evaluating template: %w", err) return fmt.Errorf("error evaluating annotation template: %w", err)
} }
return nil return nil
} }

View File

@@ -20,7 +20,7 @@ func TestAlertExecTemplate(t *testing.T) {
) )
extLabels["cluster"] = extCluster extLabels["cluster"] = extCluster
extLabels["dc"] = extDC extLabels["dc"] = extDC
err := Init(extLabels, extURL) _, err := Init(nil, extLabels, extURL)
checkErr(t, err) checkErr(t, err)
f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) { f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) {

View File

@@ -3,7 +3,6 @@ package notifier
import ( import (
"bytes" "bytes"
"context" "context"
"errors"
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
@@ -14,6 +13,7 @@ import (
"github.com/VictoriaMetrics/metrics" "github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
@@ -22,11 +22,10 @@ import (
// AlertManager represents integration provider with Prometheus alert manager // AlertManager represents integration provider with Prometheus alert manager
// https://github.com/prometheus/alertmanager // https://github.com/prometheus/alertmanager
type AlertManager struct { type AlertManager struct {
addr *url.URL addr *url.URL
argFunc AlertURLGenerator argFunc AlertURLGenerator
client *http.Client client *http.Client
timeout time.Duration timeout time.Duration
lastError string
authCfg *promauth.Config authCfg *promauth.Config
// stores already parsed RelabelConfigs object // stores already parsed RelabelConfigs object
@@ -72,42 +71,24 @@ func (am AlertManager) Addr() string {
return am.addr.Redacted() return am.addr.Redacted()
} }
func (am *AlertManager) LastError() string {
return am.lastError
}
// Send an alert or resolve message // Send an alert or resolve message
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error { func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[string]string) error {
if len(alerts) != len(alertLabels) {
return fmt.Errorf("mismatched number of alerts and label sets after global alert relabeling")
}
am.metrics.alertsSent.Add(len(alerts)) am.metrics.alertsSent.Add(len(alerts))
startTime := time.Now() startTime := time.Now()
err := am.send(ctx, alerts, alertLabels, headers) err := am.send(ctx, alerts, headers)
am.metrics.alertsSendDuration.UpdateDuration(startTime) am.metrics.alertsSendDuration.UpdateDuration(startTime)
if err != nil { if err != nil {
// the context can be cancelled on graceful shutdown
// or on group update. So no need to handle the error as usual.
if errors.Is(err, context.Canceled) {
return nil
}
am.metrics.alertsSendErrors.Add(len(alerts)) am.metrics.alertsSendErrors.Add(len(alerts))
am.lastError = err.Error()
} else {
am.lastError = ""
} }
return err return err
} }
func (am *AlertManager) send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error { func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[string]string) error {
b := &bytes.Buffer{} b := &bytes.Buffer{}
alertsToSend := make([]Alert, 0, len(alerts)) alertsToSend := make([]Alert, 0, len(alerts))
lblss := make([][]prompb.Label, 0, len(alerts)) lblss := make([][]prompb.Label, 0, len(alerts))
for i, a := range alerts { for _, a := range alerts {
lbls := alertLabels[i] lbls := a.applyRelabelingIfNeeded(am.relabelConfigs)
if am.relabelConfigs != nil {
lbls = am.relabelConfigs.Apply(lbls, 0)
}
if len(lbls) == 0 { if len(lbls) == 0 {
continue continue
} }
@@ -171,6 +152,11 @@ const alertManagerPath = "/api/v2/alerts"
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig, func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration, relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
) (*AlertManager, error) { ) (*AlertManager, error) {
if err := httputil.CheckURL(alertManagerURL); err != nil {
return nil, fmt.Errorf("invalid alertmanager URL: %w", err)
}
tls := &promauth.TLSConfig{} tls := &promauth.TLSConfig{}
if authCfg.TLSConfig != nil { if authCfg.TLSConfig != nil {
tls = authCfg.TLSConfig tls = authCfg.TLSConfig

View File

@@ -11,7 +11,6 @@ import (
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
) )
@@ -146,11 +145,11 @@ func TestAlertManager_Send(t *testing.T) {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil { if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
t.Fatalf("expected connection error got nil") t.Fatalf("expected connection error got nil")
} }
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil { if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
t.Fatalf("expected wrong http code error got nil") t.Fatalf("expected wrong http code error got nil")
} }
@@ -161,7 +160,7 @@ func TestAlertManager_Send(t *testing.T) {
End: time.Now().UTC(), End: time.Now().UTC(),
Labels: map[string]string{"alertname": "alert0"}, Labels: map[string]string{"alertname": "alert0"},
Annotations: map[string]string{"a": "b", "c": "d"}, Annotations: map[string]string{"a": "b", "c": "d"},
}}, [][]prompb.Label{{{Name: "alertname", Value: "alert0"}}}, map[string]string{headerKey: "bar"}); err != nil { }}, map[string]string{headerKey: "bar"}); err != nil {
t.Fatalf("unexpected error %s", err) t.Fatalf("unexpected error %s", err)
} }
@@ -175,7 +174,7 @@ func TestAlertManager_Send(t *testing.T) {
Name: "alert2", Name: "alert2",
Labels: map[string]string{"rule": "test", "tenant": "1"}, Labels: map[string]string{"rule": "test", "tenant": "1"},
}, },
}, [][]prompb.Label{{{Name: "rule", Value: "test"}, {Name: "tenant", Value: "0"}}, {{Name: "rule", Value: "test"}, {Name: "tenant", Value: "1"}}}, map[string]string{headerKey: "bar"}); err != nil { }, map[string]string{headerKey: "bar"}); err != nil {
t.Fatalf("unexpected error %s", err) t.Fatalf("unexpected error %s", err)
} }
@@ -188,7 +187,7 @@ func TestAlertManager_Send(t *testing.T) {
Name: "alert2", Name: "alert2",
Labels: map[string]string{}, Labels: map[string]string{},
}, },
}, [][]prompb.Label{{{Name: "rule", Value: "test"}}, {{}}}, map[string]string{}); err != nil { }, map[string]string{}); err != nil {
t.Fatalf("unexpected error %s", err) t.Fatalf("unexpected error %s", err)
} }

View File

@@ -27,9 +27,15 @@ type Config struct {
// PathPrefix is added to URL path before adding alertManagerPath value // PathPrefix is added to URL path before adding alertManagerPath value
PathPrefix string `yaml:"path_prefix,omitempty"` PathPrefix string `yaml:"path_prefix,omitempty"`
ConsulSDConfigs []ConsulSDConfigs `yaml:"consul_sd_configs,omitempty"` // ConsulSDConfigs contains list of settings for service discovery via Consul
DNSSDConfigs []DNSSDConfigs `yaml:"dns_sd_configs,omitempty"` // see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"` ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"`
// DNSSDConfigs contains list of settings for service discovery via DNS.
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"`
// StaticConfigs contains list of static targets
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
// HTTPClientConfig contains HTTP configuration for Notifier clients // HTTPClientConfig contains HTTP configuration for Notifier clients
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"` HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
@@ -56,29 +62,14 @@ type Config struct {
parsedAlertRelabelConfigs *promrelabel.ParsedConfigs parsedAlertRelabelConfigs *promrelabel.ParsedConfigs
} }
// staticConfig contains list of static targets in the following form: // StaticConfig contains list of static targets in the following form:
// //
// targets: // targets:
// [ - '<host>' ] // [ - '<host>' ]
type StaticConfig struct { type StaticConfig struct {
Targets []string `yaml:"targets"` Targets []string `yaml:"targets"`
// HTTPClientConfig contains HTTP configuration for the Targets // HTTPClientConfig contains HTTP configuration for the Targets
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"` HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
}
// ConsulSDConfigs contains list of settings for service discovery via Consul,
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
type ConsulSDConfigs struct {
consul.SDConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
}
// DNSSDConfigs contains list of settings for service discovery via DNS,
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
type DNSSDConfigs struct {
dns.SDConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
} }
// UnmarshalYAML implements the yaml.Unmarshaler interface. // UnmarshalYAML implements the yaml.Unmarshaler interface.
@@ -104,31 +95,6 @@ func (cfg *Config) UnmarshalYAML(unmarshal func(any) error) error {
} }
cfg.parsedAlertRelabelConfigs = arCfg cfg.parsedAlertRelabelConfigs = arCfg
for _, s := range cfg.StaticConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in static_config: %w", err)
}
}
}
for _, s := range cfg.ConsulSDConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in consul_sd_config: %w", err)
}
}
}
for _, s := range cfg.DNSSDConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in dns_sd_config: %w", err)
}
}
}
b, err := yaml.Marshal(cfg) b, err := yaml.Marshal(cfg)
if err != nil { if err != nil {
return fmt.Errorf("failed to marshal configuration for checksum: %w", err) return fmt.Errorf("failed to marshal configuration for checksum: %w", err)

View File

@@ -35,6 +35,4 @@ func TestParseConfig_Failure(t *testing.T) {
f("testdata/unknownFields.bad.yaml", "unknown field") f("testdata/unknownFields.bad.yaml", "unknown field")
f("non-existing-file", "error reading") f("non-existing-file", "error reading")
f("testdata/consul.bad.yaml", "failed to parse alert_relabel_configs in consul_sd_config")
f("testdata/dns.bad.yaml", "failed to parse alert relabeling config")
} }

View File

@@ -8,7 +8,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
@@ -29,7 +28,11 @@ type configWatcher struct {
targets map[TargetType][]Target targets map[TargetType][]Target
} }
func newWatcher(cfg *Config, gen AlertURLGenerator) (*configWatcher, error) { func newWatcher(path string, gen AlertURLGenerator) (*configWatcher, error) {
cfg, err := parseConfig(path)
if err != nil {
return nil, err
}
cw := &configWatcher{ cw := &configWatcher{
cfg: cfg, cfg: cfg,
wg: sync.WaitGroup{}, wg: sync.WaitGroup{},
@@ -85,15 +88,18 @@ func (cw *configWatcher) reload(path string) error {
return cw.start() return cw.start()
} }
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn getTargets) error { func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg) targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
for _, err := range errors { for _, err := range errors {
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err) return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
} }
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn) cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
cw.wg.Go(func() { cw.wg.Add(1)
go func() {
defer cw.wg.Done()
ticker := time.NewTicker(interval) ticker := time.NewTicker(interval)
defer ticker.Stop() defer ticker.Stop()
@@ -103,77 +109,62 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn
return return
case <-ticker.C: case <-ticker.C:
} }
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg) targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
for _, err := range errors { for _, err := range errors {
logger.Errorf("failed to init notifier for %q: %w", typeK, err) logger.Errorf("failed to init notifier for %q: %w", typeK, err)
} }
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn) cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
} }
}) }()
return nil return nil
} }
type targetMetadata struct { func getTargetMetadata(labelsFn getLabels, cfg *Config) (map[string]*promutil.Labels, []error) {
*promutil.Labels metaLabels, err := labelsFn()
alertRelabelConfigs *promrelabel.ParsedConfigs
}
func getTargetMetadata(targetsFn getTargets, cfg *Config) (map[string]targetMetadata, []error) {
metaLabelsList, alertRelabelCfgs, err := targetsFn()
if err != nil { if err != nil {
return nil, []error{fmt.Errorf("failed to get labels: %w", err)} return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
} }
targetMts := make(map[string]targetMetadata, len(metaLabelsList)) targetMetadata := make(map[string]*promutil.Labels, len(metaLabels))
var errors []error var errors []error
duplicates := make(map[string]struct{}) duplicates := make(map[string]struct{})
for i := range metaLabelsList { for _, labels := range metaLabels {
metaLabels := metaLabelsList[i] target := labels.Get("__address__")
alertRelabelCfg := alertRelabelCfgs[i] u, processedLabels, err := parseLabels(target, labels, cfg)
for _, labels := range metaLabels { if err != nil {
target := labels.Get("__address__") errors = append(errors, err)
u, processedLabels, err := parseLabels(target, labels, cfg) continue
if err != nil {
errors = append(errors, err)
continue
}
if len(u) == 0 {
continue
}
// check for duplicated targets
// targets with same address but different alert_relabel_configs are still considered duplicates since it's mostly due to misconfiguration and could cause duplicated notifications.
if _, ok := duplicates[u]; ok {
if !*suppressDuplicateTargetErrors {
logger.Errorf("skipping duplicate target with identical address %q; "+
"make sure service discovery and relabeling is set up properly; "+
"original labels: %s; resulting labels: %s",
u, labels, processedLabels)
}
continue
}
duplicates[u] = struct{}{}
targetMts[u] = targetMetadata{
Labels: processedLabels,
alertRelabelConfigs: alertRelabelCfg,
}
} }
if len(u) == 0 {
continue
}
if _, ok := duplicates[u]; ok { // check for duplicates
if !*suppressDuplicateTargetErrors {
logger.Errorf("skipping duplicate target with identical address %q; "+
"make sure service discovery and relabeling is set up properly; "+
"original labels: %s; resulting labels: %s",
u, labels, processedLabels)
}
continue
}
duplicates[u] = struct{}{}
targetMetadata[u] = processedLabels
} }
return targetMts, errors return targetMetadata, errors
} }
type getTargets func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) type getLabels func() ([]*promutil.Labels, error)
func (cw *configWatcher) start() error { func (cw *configWatcher) start() error {
if len(cw.cfg.StaticConfigs) > 0 { if len(cw.cfg.StaticConfigs) > 0 {
var targets []Target var targets []Target
for i, cfg := range cw.cfg.StaticConfigs { for _, cfg := range cw.cfg.StaticConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.StaticConfigs[i].AlertRelabelConfigs)
httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig) httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig)
for _, target := range cfg.Targets { for _, target := range cfg.Targets {
address, labels, err := parseLabels(target, nil, cw.cfg) address, labels, err := parseLabels(target, nil, cw.cfg)
if err != nil { if err != nil {
return fmt.Errorf("failed to parse labels for target %q: %w", target, err) return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
} }
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, alertRelabelConfig, cw.cfg.Timeout.Duration()) notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
if err != nil { if err != nil {
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err) return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
} }
@@ -187,20 +178,17 @@ func (cw *configWatcher) start() error {
} }
if len(cw.cfg.ConsulSDConfigs) > 0 { if len(cw.cfg.ConsulSDConfigs) > 0 {
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) { err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]*promutil.Labels, error) {
var labels [][]*promutil.Labels var labels []*promutil.Labels
var alertRelabelConfigs []*promrelabel.ParsedConfigs
for i := range cw.cfg.ConsulSDConfigs { for i := range cw.cfg.ConsulSDConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.ConsulSDConfigs[i].AlertRelabelConfigs)
sdc := &cw.cfg.ConsulSDConfigs[i] sdc := &cw.cfg.ConsulSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir) targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("got labels err: %w", err) return nil, fmt.Errorf("got labels err: %w", err)
} }
labels = append(labels, targetLabels) labels = append(labels, targetLabels...)
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
} }
return labels, alertRelabelConfigs, nil return labels, nil
}) })
if err != nil { if err != nil {
return fmt.Errorf("failed to start consulSD discovery: %w", err) return fmt.Errorf("failed to start consulSD discovery: %w", err)
@@ -208,21 +196,17 @@ func (cw *configWatcher) start() error {
} }
if len(cw.cfg.DNSSDConfigs) > 0 { if len(cw.cfg.DNSSDConfigs) > 0 {
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) { err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]*promutil.Labels, error) {
var labels [][]*promutil.Labels var labels []*promutil.Labels
var alertRelabelConfigs []*promrelabel.ParsedConfigs
for i := range cw.cfg.DNSSDConfigs { for i := range cw.cfg.DNSSDConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.DNSSDConfigs[i].AlertRelabelConfigs)
sdc := &cw.cfg.DNSSDConfigs[i] sdc := &cw.cfg.DNSSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir) targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("got labels err: %w", err) return nil, fmt.Errorf("got labels err: %w", err)
} }
labels = append(labels, targetLabels) labels = append(labels, targetLabels...)
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
} }
return labels, alertRelabelConfigs, nil return labels, nil
}) })
if err != nil { if err != nil {
return fmt.Errorf("failed to start DNSSD discovery: %w", err) return fmt.Errorf("failed to start DNSSD discovery: %w", err)
@@ -256,30 +240,30 @@ func (cw *configWatcher) setTargets(key TargetType, targets []Target) {
cw.targetsMu.Unlock() cw.targetsMu.Unlock()
} }
func (cw *configWatcher) updateTargets(key TargetType, targetMts map[string]targetMetadata, cfg *Config, genFn AlertURLGenerator) { func (cw *configWatcher) updateTargets(key TargetType, targetMetadata map[string]*promutil.Labels, cfg *Config, genFn AlertURLGenerator) {
cw.targetsMu.Lock() cw.targetsMu.Lock()
defer cw.targetsMu.Unlock() defer cw.targetsMu.Unlock()
oldTargets := cw.targets[key] oldTargets := cw.targets[key]
var updatedTargets []Target var updatedTargets []Target
for _, ot := range oldTargets { for _, ot := range oldTargets {
if _, ok := targetMts[ot.Addr()]; !ok { if _, ok := targetMetadata[ot.Addr()]; !ok {
// if target not exists in currentTargets, close it // if target not exists in currentTargets, close it
ot.Close() ot.Close()
} else { } else {
updatedTargets = append(updatedTargets, ot) updatedTargets = append(updatedTargets, ot)
delete(targetMts, ot.Addr()) delete(targetMetadata, ot.Addr())
} }
} }
// create new resources for the new targets // create new resources for the new targets
for addr, metadata := range targetMts { for addr, labels := range targetMetadata {
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, metadata.alertRelabelConfigs, cfg.Timeout.Duration()) am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
if err != nil { if err != nil {
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err) logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
continue continue
} }
updatedTargets = append(updatedTargets, Target{ updatedTargets = append(updatedTargets, Target{
Notifier: am, Notifier: am,
Labels: metadata.Labels, Labels: labels,
}) })
} }

View File

@@ -7,7 +7,6 @@ import (
"net/http/httptest" "net/http/httptest"
"os" "os"
"sync" "sync"
"sync/atomic"
"testing" "testing"
"time" "time"
@@ -29,11 +28,7 @@ static_configs:
- localhost:9093 - localhost:9093
- localhost:9094 - localhost:9094
`) `)
cfg, err := parseConfig(f.Name()) cw, err := newWatcher(f.Name(), nil)
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
if err != nil { if err != nil {
t.Fatalf("failed to start config watcher: %s", err) t.Fatalf("failed to start config watcher: %s", err)
} }
@@ -88,64 +83,33 @@ consul_sd_configs:
- server: %s - server: %s
services: services:
- alertmanager - alertmanager
- server: %s `, consulSDServer.URL))
services:
- alertmanager
alert_relabel_configs:
- target_label: "foo"
replacement: "tar"
`, consulSDServer.URL, consulSDServer.URL))
cfg, err := parseConfig(consulSDFile.Name()) cw, err := newWatcher(consulSDFile.Name(), nil)
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
if err != nil { if err != nil {
t.Fatalf("failed to start config watcher: %s", err) t.Fatalf("failed to start config watcher: %s", err)
} }
defer cw.mustStop() defer cw.mustStop()
if len(cw.notifiers()) != 3 { if len(cw.notifiers()) != 2 {
t.Fatalf("expected to get 3 notifiers; got %d", len(cw.notifiers())) t.Fatalf("expected to get 2 notifiers; got %d", len(cw.notifiers()))
} }
expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1) expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1)
expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2) expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2)
expAddr3 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService3)
n1, n2, n3 := cw.notifiers()[0], cw.notifiers()[1], cw.notifiers()[2] n1, n2 := cw.notifiers()[0], cw.notifiers()[1]
if n1.Addr() != expAddr1 { if n1.Addr() != expAddr1 {
t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr()) t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr())
} }
if n2.Addr() != expAddr2 { if n2.Addr() != expAddr2 {
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr()) t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
} }
if n3.Addr() != expAddr3 {
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
}
if n1.(*AlertManager).relabelConfigs.String() != "" {
t.Fatalf("unexpected relabel configs: %q", n1.(*AlertManager).relabelConfigs.String())
}
if n2.(*AlertManager).relabelConfigs.String() != "" {
t.Fatalf("unexpected relabel configs: %q", n2.(*AlertManager).relabelConfigs.String())
}
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
}
f := func() bool { return len(cw.notifiers()) == 1 } f := func() bool { return len(cw.notifiers()) == 1 }
if !waitFor(f, time.Second) { if !waitFor(f, time.Second) {
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers())) t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
} }
n3 = cw.notifiers()[0]
if n3.Addr() != expAddr3 {
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
}
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
}
} }
// TestConfigWatcherReloadConcurrent supposed to test concurrent // TestConfigWatcherReloadConcurrent supposed to test concurrent
@@ -200,11 +164,7 @@ consul_sd_configs:
"unknownFields.bad.yaml", "unknownFields.bad.yaml",
} }
cfg, err := parseConfig(paths[0]) cw, err := newWatcher(paths[0], nil)
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
if err != nil { if err != nil {
t.Fatalf("failed to start config watcher: %s", err) t.Fatalf("failed to start config watcher: %s", err)
} }
@@ -212,16 +172,18 @@ consul_sd_configs:
const workers = 500 const workers = 500
const iterations = 10 const iterations = 10
var wg sync.WaitGroup wg := sync.WaitGroup{}
for n := range workers { wg.Add(workers)
wg.Go(func() { for i := 0; i < workers; i++ {
go func(n int) {
defer wg.Done()
r := rand.New(rand.NewSource(int64(n))) r := rand.New(rand.NewSource(int64(n)))
for range iterations { for i := 0; i < iterations; i++ {
rnd := r.Intn(len(paths)) rnd := r.Intn(len(paths))
_ = cw.reload(paths[rnd]) // update can fail and this is expected _ = cw.reload(paths[rnd]) // update can fail and this is expected
_ = cw.notifiers() _ = cw.notifiers()
} }
}) }(i)
} }
wg.Wait() wg.Wait()
} }
@@ -240,11 +202,10 @@ func checkErr(t *testing.T, err error) {
const ( const (
fakeConsulService1 = "127.0.0.1:9093" fakeConsulService1 = "127.0.0.1:9093"
fakeConsulService2 = "127.0.0.1:9095" fakeConsulService2 = "127.0.0.1:9095"
fakeConsulService3 = "127.0.0.1:9097"
) )
func newFakeConsulServer() *httptest.Server { func newFakeConsulServer() *httptest.Server {
var requestCount atomic.Int32 requestCount := 0
mux := http.NewServeMux() mux := http.NewServeMux()
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) { mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`)) rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
@@ -259,7 +220,7 @@ func newFakeConsulServer() *httptest.Server {
}`)) }`))
}) })
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) { mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
if requestCount.Load() == 0 { if requestCount == 0 {
rw.Header().Set("X-Consul-Index", "1") rw.Header().Set("X-Consul-Index", "1")
rw.Write([]byte(` rw.Write([]byte(`
[ [
@@ -399,7 +360,7 @@ func newFakeConsulServer() *httptest.Server {
} }
]`)) ]`))
} }
requestCount.Add(1) requestCount++
}) })
return httptest.NewServer(mux) return httptest.NewServer(mux)

View File

@@ -5,8 +5,6 @@ import (
"fmt" "fmt"
"sync" "sync"
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
) )
// FakeNotifier is a mock notifier // FakeNotifier is a mock notifier
@@ -17,32 +15,14 @@ type FakeNotifier struct {
counter int counter int
} }
// InitFakeNotifier initializes global notifier to FakeNotifier,
// and returns a cleanup function to restore the original getActiveNotifiers.
func InitFakeNotifier() (*FakeNotifier, func()) {
originalGetActiveNotifiers := getActiveNotifiers
fn := &FakeNotifier{}
getActiveNotifiers = func() []Notifier {
return []Notifier{fn}
}
return fn, func() {
getActiveNotifiers = originalGetActiveNotifiers
}
}
// Close does nothing // Close does nothing
func (*FakeNotifier) Close() {} func (*FakeNotifier) Close() {}
// LastError returns last error message
func (*FakeNotifier) LastError() string {
return ""
}
// Addr returns "" // Addr returns ""
func (*FakeNotifier) Addr() string { return "" } func (*FakeNotifier) Addr() string { return "" }
// Send sets alerts and increases counter // Send sets alerts and increases counter
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error { func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error {
fn.Lock() fn.Lock()
defer fn.Unlock() defer fn.Unlock()
fn.counter += len(alerts) fn.counter += len(alerts)

View File

@@ -1,22 +1,14 @@
package notifier package notifier
import ( import (
"context"
"flag" "flag"
"fmt" "fmt"
"net/url" "net/url"
"strconv"
"strings" "strings"
"sync"
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
) )
@@ -65,61 +57,11 @@ var (
sendTimeout = flagutil.NewArrayDuration("notifier.sendTimeout", 10*time.Second, "Timeout when sending alerts to the corresponding -notifier.url") sendTimeout = flagutil.NewArrayDuration("notifier.sendTimeout", 10*time.Second, "Timeout when sending alerts to the corresponding -notifier.url")
) )
// AlertURLGeneratorFn returns a URL to the passed alert object. // cw holds a configWatcher for configPath configuration file
// Call InitAlertURLGeneratorFn before using this function. // configWatcher provides a list of Notifier objects discovered
var AlertURLGeneratorFn AlertURLGenerator // from static config or via service discovery.
// cw is not nil only if configPath is provided.
// InitAlertURLGeneratorFn populates AlertURLGeneratorFn var cw *configWatcher
func InitAlertURLGeneratorFn(externalURL *url.URL, externalAlertSource string, validateTemplate bool) error {
if externalAlertSource == "" {
AlertURLGeneratorFn = func(a Alert) string {
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, "group_id", gID, "alert_id", aID)
}
return nil
}
if validateTemplate {
if err := ValidateTemplates(map[string]string{
"tpl": externalAlertSource,
}); err != nil {
return fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
}
}
m := map[string]string{
"tpl": externalAlertSource,
}
AlertURLGeneratorFn = func(alert Alert) string {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
}
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
if err != nil {
logger.Errorf("cannot template alert source: %s", err)
}
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
}
return nil
}
var (
// getActiveNotifiers returns the current list of Notifier objects.
getActiveNotifiers func() []Notifier
// globalRelabelCfg stores the parsed alert relabeling config from the config file if there is
globalRelabelCfg *promrelabel.ParsedConfigs
// cw holds a configWatcher for configPath configuration file
// configWatcher provides a list of Notifier objects discovered
// from static config or via service discovery.
// cw is not nil only if configPath is provided.
cw *configWatcher
// externalLabels is a global variable for holding external labels configured via flags
// It is supposed to be inited via Init function only.
externalLabels map[string]string
// externalURL is a global variable for holding external URL value configured via flag
// It is supposed to be inited via Init function only.
externalURL string
)
// Reload checks the changes in configPath configuration file // Reload checks the changes in configPath configuration file
// and applies changes if any. // and applies changes if any.
@@ -130,62 +72,66 @@ func Reload() error {
return cw.reload(*configPath) return cw.reload(*configPath)
} }
var staticNotifiersFn func() []Notifier
var (
// externalLabels is a global variable for holding external labels configured via flags
// It is supposed to be inited via Init function only.
externalLabels map[string]string
// externalURL is a global variable for holding external URL value configured via flag
// It is supposed to be inited via Init function only.
externalURL string
)
// Init returns a function for retrieving actual list of Notifier objects.
// Init works in two mods: // Init works in two mods:
// - configuration via flags (for backward compatibility). Is always static // - configuration via flags (for backward compatibility). Is always static
// and don't support live reloads. // and don't support live reloads.
// - configuration via file. Supports live reloads and service discovery. // - configuration via file. Supports live reloads and service discovery.
// //
// Init returns an error if both mods are used. // Init returns an error if both mods are used.
func Init(extLabels map[string]string, extURL string) error { func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (func() []Notifier, error) {
externalURL = extURL externalURL = extURL
externalLabels = extLabels externalLabels = extLabels
_, err := url.Parse(externalURL) _, err := url.Parse(externalURL)
if err != nil { if err != nil {
return fmt.Errorf("failed to parse external URL: %w", err) return nil, fmt.Errorf("failed to parse external URL: %w", err)
} }
if *blackHole { if *blackHole {
if len(*addrs) > 0 || *configPath != "" { if len(*addrs) > 0 || *configPath != "" {
return fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified") return nil, fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
} }
notifier := newBlackHoleNotifier() notifier := newBlackHoleNotifier()
getActiveNotifiers = func() []Notifier { staticNotifiersFn = func() []Notifier {
return []Notifier{notifier} return []Notifier{notifier}
} }
return nil return staticNotifiersFn, nil
} }
if *configPath == "" && len(*addrs) == 0 { if *configPath == "" && len(*addrs) == 0 {
return nil return nil, nil
} }
if *configPath != "" && len(*addrs) > 0 { if *configPath != "" && len(*addrs) > 0 {
return fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified") return nil, fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
} }
if len(*addrs) > 0 { if len(*addrs) > 0 {
notifiers, err := notifiersFromFlags(AlertURLGeneratorFn) notifiers, err := notifiersFromFlags(gen)
if err != nil { if err != nil {
return fmt.Errorf("failed to create notifier from flag values: %w", err) return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
} }
getActiveNotifiers = func() []Notifier { staticNotifiersFn = func() []Notifier {
return notifiers return notifiers
} }
return nil return staticNotifiersFn, nil
} }
cfg, err := parseConfig(*configPath) cw, err = newWatcher(*configPath, gen)
if err != nil { if err != nil {
return err return nil, fmt.Errorf("failed to init config watcher: %w", err)
} }
if cfg.AlertRelabelConfigs != nil { return cw.notifiers, nil
globalRelabelCfg = cfg.parsedAlertRelabelConfigs
}
cw, err = newWatcher(cfg, AlertURLGeneratorFn)
if err != nil {
return fmt.Errorf("failed to init config watcher: %w", err)
}
getActiveNotifiers = cw.notifiers
return nil
} }
// InitSecretFlags must be called after flag.Parse and before any logging // InitSecretFlags must be called after flag.Parse and before any logging
@@ -229,9 +175,6 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
Headers: []string{headers.GetOptionalArg(i)}, Headers: []string{headers.GetOptionalArg(i)},
} }
if err := httputil.CheckURL(addr); err != nil {
return nil, fmt.Errorf("invalid notifier.url %q: %w", addr, err)
}
addr = strings.TrimSuffix(addr, "/") addr = strings.TrimSuffix(addr, "/")
am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i)) am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i))
if err != nil { if err != nil {
@@ -263,58 +206,23 @@ const (
// GetTargets returns list of static or discovered targets // GetTargets returns list of static or discovered targets
// via notifier configuration. // via notifier configuration.
//
// Must be called after Init.
func GetTargets() map[TargetType][]Target { func GetTargets() map[TargetType][]Target {
if getActiveNotifiers == nil { var targets = make(map[TargetType][]Target)
return nil
if staticNotifiersFn != nil {
for _, ns := range staticNotifiersFn() {
targets[TargetStatic] = append(targets[TargetStatic], Target{
Notifier: ns,
})
}
} }
targets := make(map[TargetType][]Target)
// use cached targets from configWatcher instead of getActiveNotifiers for the extra target labels
if cw != nil { if cw != nil {
cw.targetsMu.RLock() cw.targetsMu.RLock()
for key, ns := range cw.targets { for key, ns := range cw.targets {
targets[key] = append(targets[key], ns...) targets[key] = append(targets[key], ns...)
} }
cw.targetsMu.RUnlock() cw.targetsMu.RUnlock()
return targets
}
// static notifiers don't have labels
for _, ns := range getActiveNotifiers() {
targets[TargetStatic] = append(targets[TargetStatic], Target{
Notifier: ns,
})
} }
return targets return targets
} }
// Send sends alerts to all active notifiers
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) chan error {
alertsToSend := make([]Alert, 0, len(alerts))
lblss := make([][]prompb.Label, 0, len(alerts))
// apply global relabel config first without modifying original alerts in alerts
for _, a := range alerts {
lbls := a.applyRelabelingIfNeeded(globalRelabelCfg)
if len(lbls) == 0 {
continue
}
alertsToSend = append(alertsToSend, a)
lblss = append(lblss, lbls)
}
wg := sync.WaitGroup{}
activeNotifiers := getActiveNotifiers()
errCh := make(chan error, len(activeNotifiers))
defer close(errCh)
for i := range activeNotifiers {
nt := activeNotifiers[i]
wg.Go(func() {
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
errCh <- fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err)
}
})
}
wg.Wait()
return errCh
}

View File

@@ -1,17 +1,9 @@
package notifier package notifier
import ( import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"os"
"testing" "testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
) )
func TestInit(t *testing.T) { func TestInit(t *testing.T) {
@@ -20,13 +12,14 @@ func TestInit(t *testing.T) {
*addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"} *addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"}
err := Init(nil, "") fn, err := Init(nil, nil, "")
if err != nil { if err != nil {
t.Fatalf("%s", err) t.Fatalf("%s", err)
} }
if len(getActiveNotifiers()) != 2 { nfs := fn()
t.Fatalf("expected to get 2 notifiers; got %d", len(getActiveNotifiers())) if len(nfs) != 2 {
t.Fatalf("expected to get 2 notifiers; got %d", len(nfs))
} }
targets := GetTargets() targets := GetTargets()
@@ -55,22 +48,19 @@ func TestInitNegative(t *testing.T) {
*blackHole = oldBlackHole *blackHole = oldBlackHole
}() }()
f := func(path string, addr []string, bh bool) { f := func(path, addr string, bh bool) {
*configPath = path *configPath = path
*addrs = flagutil.ArrayString(addr) *addrs = flagutil.ArrayString{addr}
*blackHole = bh *blackHole = bh
if err := Init(nil, ""); err == nil { if _, err := Init(nil, nil, ""); err == nil {
t.Fatalf("expected to get error; got nil instead") t.Fatalf("expected to get error; got nil instead")
} }
} }
// *configPath, *addrs and *blackhole are mutually exclusive // *configPath, *addrs and *blackhole are mutually exclusive
f("/dummy/path", []string{"127.0.0.1"}, false) f("/dummy/path", "127.0.0.1", false)
f("/dummy/path", []string{}, true) f("/dummy/path", "", true)
f("", []string{"127.0.0.1"}, true) f("", "127.0.0.1", true)
// addr cannot be ""
f("", []string{""}, false)
f("", []string{"127.0.0.1", ""}, false)
} }
func TestBlackHole(t *testing.T) { func TestBlackHole(t *testing.T) {
@@ -79,13 +69,14 @@ func TestBlackHole(t *testing.T) {
*blackHole = true *blackHole = true
err := Init(nil, "") fn, err := Init(nil, nil, "")
if err != nil { if err != nil {
t.Fatalf("%s", err) t.Fatalf("%s", err)
} }
if len(getActiveNotifiers()) != 1 { nfs := fn()
t.Fatalf("expected to get 1 notifier; got %d", len(getActiveNotifiers())) if len(nfs) != 1 {
t.Fatalf("expected to get 1 notifier; got %d", len(nfs))
} }
targets := GetTargets() targets := GetTargets()
@@ -100,114 +91,3 @@ func TestBlackHole(t *testing.T) {
t.Fatalf("expected to get \"blackhole\"; got %q instead", nf1.Addr()) t.Fatalf("expected to get \"blackhole\"; got %q instead", nf1.Addr())
} }
} }
func TestGetAlertURLGenerator(t *testing.T) {
oldAlertURLGeneratorFn := AlertURLGeneratorFn
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
testAlert := Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
u, _ := url.Parse("https://victoriametrics.com/path")
err := InitAlertURLGeneratorFn(u, "", false)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", "group_id", "alert_id")
if exp != AlertURLGeneratorFn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
}
err = InitAlertURLGeneratorFn(nil, "foo?{{invalid}}", true)
if err == nil {
t.Fatalf("expected template validation error got nil")
}
err = InitAlertURLGeneratorFn(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != AlertURLGeneratorFn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
}
}
func TestSendAlerts(t *testing.T) {
oldAlertURLGeneratorFn := AlertURLGeneratorFn
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
AlertURLGeneratorFn = func(alert Alert) string {
return ""
}
mux := http.NewServeMux()
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
t.Fatalf("should not be called")
})
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
var a []struct {
Labels map[string]string `json:"labels"`
}
if err := json.NewDecoder(r.Body).Decode(&a); err != nil {
t.Fatalf("can not unmarshal data into alert %s", err)
}
if len(a) != 2 {
t.Fatalf("expected 2 alert in array got %d", len(a))
}
if len(a[0].Labels) != 4 {
t.Fatalf("expected 4 labels got %d", len(a[0].Labels))
}
if a[0].Labels["env"] != "prod" {
t.Fatalf("expected env label to be prod during relabeling, got %s", a[0].Labels["env"])
}
if a[0].Labels["c"] != "baz" {
t.Fatalf("expected c label to be baz during relabeling, got %s", a[0].Labels["c"])
}
if len(a[1].Labels) != 1 {
t.Fatalf("expected 1 labels got %d", len(a[1].Labels))
}
})
srv := httptest.NewServer(mux)
defer srv.Close()
f, err := os.CreateTemp("", "")
if err != nil {
t.Fatal(err)
}
defer fs.MustRemovePath(f.Name())
rawConfig := `
static_configs:
- targets:
- %s
alert_relabel_configs:
- source_labels: [b]
target_label: "c"
alert_relabel_configs:
- source_labels: [a]
target_label: "b"
- target_label: "env"
replacement: "prod"
`
config := fmt.Sprintf(rawConfig, srv.URL+alertManagerPath)
writeToFile(f.Name(), config)
oldConfigPath := configPath
defer func() { configPath = oldConfigPath }()
*configPath = f.Name()
err = Init(nil, "")
if err != nil {
t.Fatalf("unexpected error when parse notifier config: %s", err)
}
firingAlerts := []Alert{
{
Name: "alert1",
Labels: map[string]string{"a": "baz"},
},
{
Name: "alert2",
Labels: map[string]string{},
},
}
errG := Send(context.Background(), firingAlerts, nil)
for err := range errG {
if err != nil {
t.Errorf("unexpected error when sending alerts: %s", err)
}
}
}

View File

@@ -1,21 +1,15 @@
package notifier package notifier
import ( import "context"
"context"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
// Notifier is a common interface for alert manager provider // Notifier is a common interface for alert manager provider
type Notifier interface { type Notifier interface {
// Send sends the given list of alerts. // Send sends the given list of alerts.
// Returns an error if fails to send the alerts. // Returns an error if fails to send the alerts.
// Must unblock if the given ctx is cancelled. // Must unblock if the given ctx is cancelled.
Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, notifierHeaders map[string]string) error Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) error
// Addr returns address where alerts are sent. // Addr returns address where alerts are sent.
Addr() string Addr() string
// LastError returns error, that occured during last attempt to send data
LastError() string
// Close is a destructor for the Notifier // Close is a destructor for the Notifier
Close() Close()
} }

View File

@@ -1,10 +1,6 @@
package notifier package notifier
import ( import "context"
"context"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
// blackHoleNotifier is a Notifier stub, used when no notifications need // blackHoleNotifier is a Notifier stub, used when no notifications need
// to be sent. // to be sent.
@@ -14,7 +10,7 @@ type blackHoleNotifier struct {
} }
// Send will send no notifications, but increase the metric. // Send will send no notifications, but increase the metric.
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error { //nolint:revive func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error { //nolint:revive
bh.metrics.alertsSent.Add(len(alerts)) bh.metrics.alertsSent.Add(len(alerts))
return nil return nil
} }
@@ -29,11 +25,6 @@ func (bh *blackHoleNotifier) Close() {
bh.metrics.close() bh.metrics.close()
} }
// LastError return last notifier's error
func (bh *blackHoleNotifier) LastError() string {
return ""
}
// newBlackHoleNotifier creates a new blackHoleNotifier // newBlackHoleNotifier creates a new blackHoleNotifier
func newBlackHoleNotifier() *blackHoleNotifier { func newBlackHoleNotifier() *blackHoleNotifier {
address := "blackhole" address := "blackhole"

View File

@@ -5,7 +5,6 @@ import (
"testing" "testing"
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
metricset "github.com/VictoriaMetrics/metrics" metricset "github.com/VictoriaMetrics/metrics"
) )
@@ -17,7 +16,7 @@ func TestBlackHoleNotifier_Send(t *testing.T) {
Start: time.Now().UTC(), Start: time.Now().UTC(),
End: time.Now().UTC(), End: time.Now().UTC(),
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"}, Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
}}, [][]prompb.Label{{}}, nil); err != nil { }}, nil); err != nil {
t.Fatalf("unexpected error %s", err) t.Fatalf("unexpected error %s", err)
} }
@@ -35,7 +34,7 @@ func TestBlackHoleNotifier_Close(t *testing.T) {
Start: time.Now().UTC(), Start: time.Now().UTC(),
End: time.Now().UTC(), End: time.Now().UTC(),
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"}, Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
}}, [][]prompb.Label{{}}, nil); err != nil { }}, nil); err != nil {
t.Fatalf("unexpected error %s", err) t.Fatalf("unexpected error %s", err)
} }

View File

@@ -1,19 +0,0 @@
consul_sd_configs:
- server: localhost:8500
scheme: http
services:
- alertmanager
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "prod"
- server: localhost:8500
services:
- consul
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "(abc"
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"

View File

@@ -1,13 +0,0 @@
dns_sd_configs:
- names:
- cloudflare.com
type: 'A'
port: 9093
relabel_configs:
- source_labels: [__meta_dns_name]
replacement: '${1}'
target_label: dns_name
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "(abc"

View File

@@ -2,19 +2,12 @@ static_configs:
- targets: - targets:
- localhost:9093 - localhost:9093
- localhost:9095 - localhost:9095
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "static"
consul_sd_configs: consul_sd_configs:
- server: localhost:8500 - server: localhost:8500
scheme: http scheme: http
services: services:
- alertmanager - alertmanager
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "consul"
- server: localhost:8500 - server: localhost:8500
services: services:
- consul - consul
@@ -24,10 +17,6 @@ dns_sd_configs:
- cloudflare.com - cloudflare.com
type: 'A' type: 'A'
port: 9093 port: 9093
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "dns"
relabel_configs: relabel_configs:
- source_labels: [__meta_consul_tags] - source_labels: [__meta_consul_tags]
@@ -36,4 +25,4 @@ relabel_configs:
target_label: __scheme__ target_label: __scheme__
- source_labels: [__meta_dns_name] - source_labels: [__meta_dns_name]
replacement: '${1}' replacement: '${1}'
target_label: dns_name target_label: dns_name

View File

@@ -1,14 +1,22 @@
headers:
- 'CustomHeader: foo'
static_configs: static_configs:
- targets: - targets:
- http://192.168.0.101:9093 - localhost:9093
alert_relabel_configs: - localhost:9095
- target_label: "foo" - https://localhost:9093/test/api/v2/alerts
replacement: "aaa" basic_auth:
username: foo
password: bar
- targets: - targets:
- http://192.168.0.101:9093 - localhost:9096
alert_relabel_configs: - localhost:9097
- target_label: "foo" basic_auth:
replacement: "ccc" username: foo
password: baz
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"

View File

@@ -1,19 +0,0 @@
package notifier
// ApiNotifier represents a Notifier configuration for WEB view
type ApiNotifier struct {
// Kind is a Notifier type
Kind TargetType `json:"kind"`
// Targets is a list of Notifier targets
Targets []*ApiTarget `json:"targets"`
}
// ApiTarget represents a specific Notifier target for WEB view
type ApiTarget struct {
// Address is a URL for sending notifications
Address string `json:"address"`
// Labels is a list of labels to add to each sent notification
Labels map[string]string `json:"labels"`
// LastError contains the error faced while sending to notifier.
LastError string `json:"lastError"`
}

View File

@@ -14,9 +14,9 @@ import (
) )
var ( var (
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect. "+ addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect."+
"Remote read is used to restore alerts state. "+ "Remote read is used to restore alerts state."+
"This configuration makes sense only if vmalert was configured with '-remoteWrite.url' before and has been successfully persisted its state. "+ "This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state. "+
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+ "Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
"See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.") "See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.")

View File

@@ -113,7 +113,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
input: make(chan prompb.TimeSeries, cfg.MaxQueueSize), input: make(chan prompb.TimeSeries, cfg.MaxQueueSize),
} }
for range cc { for i := 0; i < cc; i++ {
c.run(ctx) c.run(ctx)
} }
return c, nil return c, nil
@@ -173,8 +173,9 @@ func (c *Client) run(ctx context.Context) {
cancel() cancel()
} }
c.wg.Add(1)
c.wg.Go(func() { go func() {
defer c.wg.Done()
defer ticker.Stop() defer ticker.Stop()
for { for {
select { select {
@@ -196,7 +197,7 @@ func (c *Client) run(ctx context.Context) {
} }
} }
} }
}) }()
} }
var ( var (
@@ -238,10 +239,8 @@ func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
defer func() { defer func() {
sendDuration.Add(time.Since(timeStart).Seconds()) sendDuration.Add(time.Since(timeStart).Seconds())
}() }()
attempts := 0
L: L:
for { for attempts := 0; ; attempts++ {
err := c.send(ctx, b) err := c.send(ctx, b)
if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) { if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) {
// Something in the middle between client and destination might be closing // Something in the middle between client and destination might be closing
@@ -283,7 +282,6 @@ L:
time.Sleep(retryInterval) time.Sleep(retryInterval)
retryInterval *= 2 retryInterval *= 2
attempts++
} }
rwErrors.Inc() rwErrors.Inc()

View File

@@ -44,7 +44,7 @@ func TestClient_Push(t *testing.T) {
r := rand.New(rand.NewSource(1)) r := rand.New(rand.NewSource(1))
const rowsN = int(1e4) const rowsN = int(1e4)
for range rowsN { for i := 0; i < rowsN; i++ {
s := prompb.TimeSeries{ s := prompb.TimeSeries{
Samples: []prompb.Sample{{ Samples: []prompb.Sample{{
Value: r.Float64(), Value: r.Float64(),
@@ -102,7 +102,7 @@ func TestClient_run_maxBatchSizeDuringShutdown(t *testing.T) {
} }
// push time series to the client. // push time series to the client.
for range pushCnt { for i := 0; i < pushCnt; i++ {
if err = rwClient.Push(prompb.TimeSeries{}); err != nil { if err = rwClient.Push(prompb.TimeSeries{}); err != nil {
t.Fatalf("cannot time series to the client: %s", err) t.Fatalf("cannot time series to the client: %s", err)
} }

View File

@@ -22,7 +22,7 @@ func TestDebugClient_Push(t *testing.T) {
const rowsN = 100 const rowsN = 100
var sent int var sent int
for i := range rowsN { for i := 0; i < rowsN; i++ {
s := prompb.TimeSeries{ s := prompb.TimeSeries{
Samples: []prompb.Sample{{ Samples: []prompb.Sample{{
Value: float64(i), Value: float64(i),

View File

@@ -2,7 +2,6 @@ package rule
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"hash/fnv" "hash/fnv"
"math" "math"
@@ -188,54 +187,6 @@ func (ar *AlertingRule) ID() uint64 {
return ar.RuleID return ar.RuleID
} }
// ToAPI returns ApiRule representation of ar
func (ar *AlertingRule) ToAPI() ApiRule {
state := ar.state
lastState := state.getLast()
r := ApiRule{
Type: TypeAlerting,
DatasourceType: ar.Type.String(),
Name: ar.Name,
Query: ar.Expr,
Duration: ar.For.Seconds(),
KeepFiringFor: ar.KeepFiringFor.Seconds(),
Labels: ar.Labels,
Annotations: ar.Annotations,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
State: "inactive",
Alerts: ar.AlertsToAPI(),
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: state.size(),
Updates: state.getAll(),
Debug: ar.Debug,
// encode as strings to avoid rounding in JSON
ID: fmt.Sprintf("%d", ar.ID()),
GroupID: fmt.Sprintf("%d", ar.GroupID),
GroupName: ar.GroupName,
File: ar.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
// satisfy apiRule.State logic
if len(r.Alerts) > 0 {
r.State = notifier.StatePending.String()
stateFiring := notifier.StateFiring.String()
for _, a := range r.Alerts {
if a.State == stateFiring {
r.State = stateFiring
break
}
}
}
return r
}
// GetAlerts returns active alerts of rule // GetAlerts returns active alerts of rule
func (ar *AlertingRule) GetAlerts() []*notifier.Alert { func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
ar.alertsMu.RLock() ar.alertsMu.RLock()
@@ -247,6 +198,16 @@ func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
return alerts return alerts
} }
// GetAlert returns alert if id exists
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
ar.alertsMu.RLock()
defer ar.alertsMu.RUnlock()
if ar.alerts == nil {
return nil
}
return ar.alerts[id]
}
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) { func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) {
if !ar.Debug { if !ar.Debug {
return return
@@ -312,11 +273,6 @@ type labelSet struct {
// On k conflicts in origin set, the original value is preferred and copied // On k conflicts in origin set, the original value is preferred and copied
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value. // to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
func (ls *labelSet) add(k, v string) { func (ls *labelSet) add(k, v string) {
// do not add label with empty value, since it has no meaning.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
if v == "" {
return
}
ls.processed[k] = v ls.processed[k] = v
ov, ok := ls.origin[k] ov, ok := ls.origin[k]
if !ok { if !ok {
@@ -346,13 +302,14 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
ls.processed[l.Name] = l.Value ls.processed[l.Name] = l.Value
} }
// labels only support limited templating variables,
// including `labels`, `value` and `expr`, to avoid breaking alert states or causing cardinality issue with results
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{ extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
Labels: ls.origin, Labels: ls.origin,
Value: m.Values[0], Value: m.Values[0],
Expr: ar.Expr, Expr: ar.Expr,
}) })
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %w", err)
}
for k, v := range extraLabels { for k, v := range extraLabels {
ls.add(k, v) ls.add(k, v)
} }
@@ -363,7 +320,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
if !*disableAlertGroupLabel && ar.GroupName != "" { if !*disableAlertGroupLabel && ar.GroupName != "" {
ls.add(alertGroupNameLabel, ar.GroupName) ls.add(alertGroupNameLabel, ar.GroupName)
} }
return ls, err return ls, nil
} }
// execRange executes alerting rule on the given time range similarly to exec. // execRange executes alerting rule on the given time range similarly to exec.
@@ -384,12 +341,16 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
return []datasource.Metric{{Timestamps: []int64{0}, Values: []float64{math.NaN()}}}, nil return []datasource.Metric{{Timestamps: []int64{0}, Values: []float64{math.NaN()}}}, nil
} }
for _, s := range res.Data { for _, s := range res.Data {
ls, err := ar.expandLabelTemplates(s, qFn) ls, err := ar.expandLabelTemplates(s)
if err != nil { if err != nil {
return nil, err return nil, err
} }
alertID := hash(ls.processed) alertID := hash(ls.processed)
a := ar.newAlert(s, time.Time{}, ls.processed, nil) // initial alert as, err := ar.expandAnnotationTemplates(s, qFn, time.Time{}, ls)
if err != nil {
return nil, err
}
a := ar.newAlert(s, time.Time{}, ls.processed, as) // initial alert
prevT := time.Time{} prevT := time.Time{}
for i := range s.Values { for i := range s.Values {
@@ -405,6 +366,8 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
// reset to Pending if there are gaps > EvalInterval between DPs // reset to Pending if there are gaps > EvalInterval between DPs
a.State = notifier.StatePending a.State = notifier.StatePending
a.ActiveAt = at a.ActiveAt = at
// re-template the annotations as active timestamp is changed
a.Annotations, _ = ar.expandAnnotationTemplates(s, qFn, at, ls)
a.Start = time.Time{} a.Start = time.Time{}
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring { } else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
a.State = notifier.StateFiring a.State = notifier.StateFiring
@@ -450,7 +413,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
defer func() { defer func() {
ar.state.add(curState) ar.state.add(curState)
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) { if curState.Err != nil {
ar.metrics.errors.Inc() ar.metrics.errors.Inc()
} }
}() }()
@@ -459,8 +422,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err) return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
} }
isPartial := isPartialResponse(res) ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartialResponse(res))
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartial)
qFn := func(query string) ([]datasource.Metric, error) { qFn := func(query string) ([]datasource.Metric, error) {
res, _, err := ar.q.Query(ctx, query, ts) res, _, err := ar.q.Query(ctx, query, ts)
return res.Data, err return res.Data, err
@@ -472,11 +434,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
expandedLabels := make([]*labelSet, len(res.Data)) expandedLabels := make([]*labelSet, len(res.Data))
expandedAnnotations := make([]map[string]string, len(res.Data)) expandedAnnotations := make([]map[string]string, len(res.Data))
for i, m := range res.Data { for i, m := range res.Data {
ls, err := ar.expandLabelTemplates(m, qFn) ls, err := ar.expandLabelTemplates(m)
if err != nil { if err != nil {
// only set error in current state, but do not break alert processing
curState.Err = err curState.Err = err
logger.Errorf("got templating error in rule %s: %q", ar.Name, err) return nil, curState.Err
} }
at := ts at := ts
alertID := hash(ls.processed) alertID := hash(ls.processed)
@@ -486,11 +447,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
at = a.ActiveAt at = a.ActiveAt
} }
} }
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls, isPartial) as, err := ar.expandAnnotationTemplates(m, qFn, at, ls)
if err != nil { if err != nil {
// only set error in current state, but do not break alert processing
curState.Err = err curState.Err = err
logger.Errorf("got templating error in rule %s: %q", ar.Name, err) return nil, curState.Err
} }
expandedLabels[i] = ls expandedLabels[i] = ls
expandedAnnotations[i] = as expandedAnnotations[i] = as
@@ -596,29 +556,31 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
return append(tss, ar.toTimeSeries(ts.Unix())...), nil return append(tss, ar.toTimeSeries(ts.Unix())...), nil
} }
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) { func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric) (*labelSet, error) {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported in rule label")
}
ls, err := ar.toLabels(m, qFn) ls, err := ar.toLabels(m, qFn)
if err != nil { if err != nil {
return ls, fmt.Errorf("failed to expand label templates: %s", err) return nil, fmt.Errorf("failed to expand label templates: %s", err)
} }
return ls, nil return ls, nil
} }
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet, isPartial bool) (map[string]string, error) { func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet) (map[string]string, error) {
tplData := notifier.AlertTplData{ tplData := notifier.AlertTplData{
Value: m.Values[0], Value: m.Values[0],
Type: ar.Type.String(), Type: ar.Type.String(),
Labels: ls.origin, Labels: ls.origin,
Expr: ar.Expr, Expr: ar.Expr,
AlertID: hash(ls.processed), AlertID: hash(ls.processed),
GroupID: ar.GroupID, GroupID: ar.GroupID,
ActiveAt: activeAt, ActiveAt: activeAt,
For: ar.For, For: ar.For,
IsPartial: isPartial,
} }
as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData) as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData)
if err != nil { if err != nil {
return as, fmt.Errorf("failed to expand annotation templates: %s", err) return nil, fmt.Errorf("failed to expand annotation templates: %s", err)
} }
return as, nil return as, nil
} }
@@ -818,9 +780,7 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])", expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds())) alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
// query ALERTS_FOR_STATE at `ts-1s` instead `ts` to avoid retrieving data written in the current run, res, _, err := q.Query(ctx, expr, ts)
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10335
res, _, err := q.Query(ctx, expr, ts.Add(-1*time.Second))
if err != nil { if err != nil {
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err) return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
} }

View File

@@ -1,106 +0,0 @@
//go:build synctest
package rule
import (
"context"
"strings"
"testing"
"testing/synctest"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
)
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
// wrap into synctest because of time manipulations
synctest.Test(t, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
ar := &AlertingRule{
Name: "TestActiveAtPreservation",
Labels: map[string]string{
"test_query_in_label": `{{ "static_value" }}`,
},
Annotations: map[string]string{
"description": "Alert active since {{ $activeAt }}",
},
alerts: make(map[uint64]*notifier.Alert),
q: fq,
state: &ruleState{
entries: make([]StateEntry, 10),
},
}
// Mock query result - return empty result to make suppress_for_mass_alert = false
// (no need to add anything to fq for empty result)
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
// First execution - creates new alert
ts1 := time.Now()
_, err := ar.exec(context.TODO(), ts1, 0)
if err != nil {
t.Fatalf("unexpected error on first exec: %s", err)
}
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
firstAlert := ar.GetAlerts()[0]
// Verify first execution: activeAt should be ts1 and annotation should reflect it
if !firstAlert.ActiveAt.Equal(ts1) {
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
}
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
}
// Second execution - should preserve activeAt in annotation
// Ensure different timestamp with different seconds
// sleep is non-blocking thanks to synctest
time.Sleep(2 * time.Second)
ts2 := time.Now()
_, err = ar.exec(context.TODO(), ts2, 0)
if err != nil {
t.Fatalf("unexpected error on second exec: %s", err)
}
// Get the alert again (should be the same alert)
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
secondAlert := ar.GetAlerts()[0]
// Critical test: activeAt should still be ts1, not ts2
if !secondAlert.ActiveAt.Equal(ts1) {
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
}
// Critical test: annotation should still contain ts1 time, not ts2
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
}
// Additional verification: annotation should NOT contain ts2 time
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
}
// Verify query template in labels still works (this would fail if query templates were broken)
if firstAlert.Labels["test_query_in_label"] != "static_value" {
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
}
})
}

View File

@@ -663,7 +663,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-pending", Name: "for-pending",
Type: config.NewPrometheusType().String(), Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-pending"}, Labels: map[string]string{"alertname": "for-pending"},
Annotations: map[string]string{}, Annotations: map[string]string{"activeAt": "5000"},
State: notifier.StatePending, State: notifier.StatePending,
ActiveAt: time.Unix(5, 0), ActiveAt: time.Unix(5, 0),
Value: 1, Value: 1,
@@ -683,7 +683,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-firing", Name: "for-firing",
Type: config.NewPrometheusType().String(), Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-firing"}, Labels: map[string]string{"alertname": "for-firing"},
Annotations: map[string]string{}, Annotations: map[string]string{"activeAt": "1000"},
State: notifier.StateFiring, State: notifier.StateFiring,
ActiveAt: time.Unix(1, 0), ActiveAt: time.Unix(1, 0),
Start: time.Unix(5, 0), Start: time.Unix(5, 0),
@@ -704,7 +704,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-hold-pending", Name: "for-hold-pending",
Type: config.NewPrometheusType().String(), Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-hold-pending"}, Labels: map[string]string{"alertname": "for-hold-pending"},
Annotations: map[string]string{}, Annotations: map[string]string{"activeAt": "5000"},
State: notifier.StatePending, State: notifier.StatePending,
ActiveAt: time.Unix(5, 0), ActiveAt: time.Unix(5, 0),
Value: 1, Value: 1,
@@ -826,9 +826,12 @@ func TestGroup_Restore(t *testing.T) {
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil) fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
fg.Init() fg.Init()
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
wg.Go(func() { wg.Add(1)
fg.Start(context.Background(), nil, fqr) go func() {
}) nts := func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} }
fg.Start(context.Background(), nts, nil, fqr)
wg.Done()
}()
fg.Close() fg.Close()
wg.Wait() wg.Wait()
@@ -1119,7 +1122,7 @@ func TestAlertingRuleLimit_Success(t *testing.T) {
} }
func TestAlertingRule_Template(t *testing.T) { func TestAlertingRule_Template(t *testing.T) {
f := func(rule *AlertingRule, metrics []datasource.Metric, isResponsePartial bool, alertsExpected map[uint64]*notifier.Alert) { f := func(rule *AlertingRule, metrics []datasource.Metric, alertsExpected map[uint64]*notifier.Alert) {
t.Helper() t.Helper()
fakeGroup := Group{ fakeGroup := Group{
@@ -1132,7 +1135,6 @@ func TestAlertingRule_Template(t *testing.T) {
entries: make([]StateEntry, 10), entries: make([]StateEntry, 10),
} }
fq.Add(metrics...) fq.Add(metrics...)
fq.SetPartialResponse(isResponsePartial)
if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil { if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
@@ -1163,7 +1165,7 @@ func TestAlertingRule_Template(t *testing.T) {
}, []datasource.Metric{ }, []datasource.Metric{
metricWithValueAndLabels(t, 1, "instance", "foo"), metricWithValueAndLabels(t, 1, "instance", "foo"),
metricWithValueAndLabels(t, 1, "instance", "bar"), metricWithValueAndLabels(t, 1, "instance", "bar"),
}, false, map[uint64]*notifier.Alert{ }, map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): { hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
Annotations: map[string]string{ Annotations: map[string]string{
"summary": `common: Too high connection number for "foo"`, "summary": `common: Too high connection number for "foo"`,
@@ -1192,14 +1194,14 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "{{ $labels.instance }}", "instance": "{{ $labels.instance }}",
}, },
Annotations: map[string]string{ Annotations: map[string]string{
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}".{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`, "summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}"`,
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`, "description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
}, },
alerts: make(map[uint64]*notifier.Alert), alerts: make(map[uint64]*notifier.Alert),
}, []datasource.Metric{ }, []datasource.Metric{
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"), metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"), metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
}, false, map[uint64]*notifier.Alert{ }, map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): { hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
Labels: map[string]string{ Labels: map[string]string{
alertNameLabel: "override label", alertNameLabel: "override label",
@@ -1207,7 +1209,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "foo", "instance": "foo",
}, },
Annotations: map[string]string{ Annotations: map[string]string{
"summary": `first: Too high connection number for "foo".`, "summary": `first: Too high connection number for "foo"`,
"description": `override: It is 2 connections for "foo"`, "description": `override: It is 2 connections for "foo"`,
}, },
}, },
@@ -1218,7 +1220,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "bar", "instance": "bar",
}, },
Annotations: map[string]string{ Annotations: map[string]string{
"summary": `second: Too high connection number for "bar".`, "summary": `second: Too high connection number for "bar"`,
"description": `override: It is 10 connections for "bar"`, "description": `override: It is 10 connections for "bar"`,
}, },
}, },
@@ -1231,7 +1233,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "{{ $labels.instance }}", "instance": "{{ $labels.instance }}",
}, },
Annotations: map[string]string{ Annotations: map[string]string{
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}.{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`, "summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
}, },
alerts: make(map[uint64]*notifier.Alert), alerts: make(map[uint64]*notifier.Alert),
}, []datasource.Metric{ }, []datasource.Metric{
@@ -1239,7 +1241,7 @@ func TestAlertingRule_Template(t *testing.T) {
alertNameLabel, "originAlertname", alertNameLabel, "originAlertname",
alertGroupNameLabel, "originGroupname", alertGroupNameLabel, "originGroupname",
"instance", "foo"), "instance", "foo"),
}, true, map[uint64]*notifier.Alert{ }, map[uint64]*notifier.Alert{
hash(map[string]string{ hash(map[string]string{
alertNameLabel: "OriginLabels", alertNameLabel: "OriginLabels",
"exported_alertname": "originAlertname", "exported_alertname": "originAlertname",
@@ -1255,7 +1257,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "foo", "instance": "foo",
}, },
Annotations: map[string]string{ Annotations: map[string]string{
"summary": `Alert "originAlertname(originGroupname)" for instance foo. WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.`, "summary": `Alert "originAlertname(originGroupname)" for instance foo`,
}, },
}, },
}) })
@@ -1370,10 +1372,8 @@ func TestAlertingRule_ToLabels(t *testing.T) {
ar := &AlertingRule{ ar := &AlertingRule{
Labels: map[string]string{ Labels: map[string]string{
"instance": "override", // this should override instance with new value "instance": "override", // this should override instance with new value
"group": "vmalert", // this shouldn't have effect since value in metric is equal "group": "vmalert", // this shouldn't have effect since value in metric is equal
"invalid_label": "{{ .Values.mustRuntimeFail }}",
"empty_label": "", // this should be dropped
}, },
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0", Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
Name: "AlertingRulesError", Name: "AlertingRulesError",
@@ -1381,11 +1381,10 @@ func TestAlertingRule_ToLabels(t *testing.T) {
} }
expectedOriginLabels := map[string]string{ expectedOriginLabels := map[string]string{
"instance": "0.0.0.0:8800", "instance": "0.0.0.0:8800",
"group": "vmalert", "group": "vmalert",
"alertname": "ConfigurationReloadFailure", "alertname": "ConfigurationReloadFailure",
"alertgroup": "vmalert", "alertgroup": "vmalert",
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
} }
expectedProcessedLabels := map[string]string{ expectedProcessedLabels := map[string]string{
@@ -1395,12 +1394,11 @@ func TestAlertingRule_ToLabels(t *testing.T) {
"exported_alertname": "ConfigurationReloadFailure", "exported_alertname": "ConfigurationReloadFailure",
"group": "vmalert", "group": "vmalert",
"alertgroup": "vmalert", "alertgroup": "vmalert",
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
} }
ls, err := ar.toLabels(metric, nil) ls, err := ar.toLabels(metric, nil)
if err == nil || !strings.Contains(err.Error(), "error evaluating template") { if err != nil {
t.Fatalf("unexpected error %q", err.Error()) t.Fatalf("unexpected error: %s", err)
} }
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) { if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
@@ -1431,50 +1429,3 @@ func TestAlertingRuleExec_Partial(t *testing.T) {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }
} }
func TestAlertingRule_QueryTemplateInLabels(t *testing.T) {
fq := &datasource.FakeQuerier{}
fakeGroup := Group{
Name: "TestQueryTemplateInLabels",
}
ar := &AlertingRule{
Name: "test_alert",
Labels: map[string]string{
"suppress_for_mass_alert": `{{ if (printf "ALERTS{alertname='SomeAlert', alertstate='firing', device='%s'} == 1" $labels.device | query) }}true{{ else }}false{{ end }}`,
},
Annotations: map[string]string{
"summary": "Test alert with query template in labels",
},
alerts: make(map[uint64]*notifier.Alert),
}
ar.GroupID = fakeGroup.GetID()
ar.q = fq
ar.state = &ruleState{
entries: make([]StateEntry, 10),
}
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "device", "sda1"))
ts := time.Now()
_, err := ar.exec(context.TODO(), ts, 0)
if err != nil {
t.Fatalf("unexpected error with query template in labels: %s", err)
}
// Verify that the alert was created and the query template was executed
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
alert := ar.GetAlerts()[0]
suppressLabel, exists := alert.Labels["suppress_for_mass_alert"]
if !exists {
t.Fatalf("expected 'suppress_for_mass_alert' label to exist")
}
// The query template should have been executed (even if it returns false due to mock data)
if suppressLabel != "true" && suppressLabel != "false" {
t.Fatalf("expected 'suppress_for_mass_alert' label to be 'true' or 'false', got '%s'", suppressLabel)
}
}

View File

@@ -2,11 +2,11 @@ package rule
import ( import (
"context" "context"
"encoding/json"
"errors" "errors"
"flag" "flag"
"fmt" "fmt"
"hash/fnv" "hash/fnv"
"maps"
"net/url" "net/url"
"sync" "sync"
"time" "time"
@@ -25,14 +25,10 @@ import (
) )
var ( var (
ruleResultsLimit = flag.Int("rule.resultsLimit", 0, "Limits the number of alerts or recording results a single rule can produce. "+
"Can be overridden by the limit option under group if specified. "+
"If exceeded, the rule will be marked with an error and all its results will be discarded. "+
"0 means no limit.")
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+ ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.") "Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier.") resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+ maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
"which by default is 4 times evaluationInterval of the parent group") "which by default is 4 times evaluationInterval of the parent group")
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+ evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+
"Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+ "Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+
@@ -40,8 +36,6 @@ var (
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.") disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+ remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+
"For example, if lookback=1h then range from now() to now()-1h will be scanned.") "For example, if lookback=1h then range from now() to now()-1h will be scanned.")
maxStartDelay = flag.Duration("group.maxStartDelay", 5*time.Minute, "Defines the max delay before starting the group evaluation. Group's start is artificially delayed for random duration on interval"+
" [0..min(--group.maxStartDelay, group.interval)]. This helps smoothing out the load on the configured datasource, so evaluations aren't executed too close to each other.")
) )
// Group is an entity for grouping rules // Group is an entity for grouping rules
@@ -98,7 +92,9 @@ type groupMetrics struct {
// set2 has priority over set1. // set2 has priority over set1.
func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string { func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string {
r := map[string]string{} r := map[string]string{}
maps.Copy(r, set1) for k, v := range set1 {
r[k] = v
}
for k, v := range set2 { for k, v := range set2 {
if prevV, ok := r[k]; ok { if prevV, ok := r[k]; ok {
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q", logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
@@ -116,6 +112,7 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
Name: cfg.Name, Name: cfg.Name,
File: cfg.File, File: cfg.File,
Interval: cfg.Interval.Duration(), Interval: cfg.Interval.Duration(),
Limit: cfg.Limit,
Concurrency: cfg.Concurrency, Concurrency: cfg.Concurrency,
checksum: cfg.Checksum, checksum: cfg.Checksum,
Params: cfg.Params, Params: cfg.Params,
@@ -132,11 +129,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
if g.Interval == 0 { if g.Interval == 0 {
g.Interval = defaultInterval g.Interval = defaultInterval
} }
if cfg.Limit != nil {
g.Limit = *cfg.Limit
} else {
g.Limit = *ruleResultsLimit
}
if g.Concurrency < 1 { if g.Concurrency < 1 {
g.Concurrency = 1 g.Concurrency = 1
} }
@@ -297,7 +289,7 @@ func (g *Group) InterruptEval() {
} }
} }
// Close stops the group and its rules, unregisters group metrics // Close stops the group and it's rules, unregisters group metrics
func (g *Group) Close() { func (g *Group) Close() {
if g.doneCh == nil { if g.doneCh == nil {
return return
@@ -306,6 +298,10 @@ func (g *Group) Close() {
g.InterruptEval() g.InterruptEval()
<-g.finishedCh <-g.finishedCh
g.closeGroupMetrics()
}
func (g *Group) closeGroupMetrics() {
metrics.UnregisterSet(g.metrics.set, true) metrics.UnregisterSet(g.metrics.set, true)
} }
@@ -331,13 +327,13 @@ func (g *Group) Init() {
} }
// Start starts group's evaluation // Start starts group's evaluation
func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasource.QuerierBuilder) { func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
defer func() { close(g.finishedCh) }() defer func() { close(g.finishedCh) }()
evalTS := time.Now() evalTS := time.Now()
// sleep random duration to spread group rules evaluation // sleep random duration to spread group rules evaluation
// over maxStartDelay to reduce the load on datasource. // over time in order to reduce load on datasource.
if !SkipRandSleepOnGroupStart { if !SkipRandSleepOnGroupStart {
sleepBeforeStart := g.delayBeforeStart(evalTS, *maxStartDelay) sleepBeforeStart := delayBeforeStart(evalTS, g.GetID(), g.Interval, g.EvalOffset)
g.infof("will start in %v", sleepBeforeStart) g.infof("will start in %v", sleepBeforeStart)
sleepTimer := time.NewTimer(sleepBeforeStart) sleepTimer := time.NewTimer(sleepBeforeStart)
@@ -369,12 +365,13 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
e := &executor{ e := &executor{
Rw: rw, Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders, notifierHeaders: g.NotifierHeaders,
} }
g.infof("started") g.infof("started")
eval := func(ctx context.Context, ts time.Time) time.Time { eval := func(ctx context.Context, ts time.Time) {
g.metrics.iterationTotal.Inc() g.metrics.iterationTotal.Inc()
start := time.Now() start := time.Now()
@@ -382,7 +379,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
if len(g.Rules) < 1 { if len(g.Rules) < 1 {
g.metrics.iterationDuration.UpdateDuration(start) g.metrics.iterationDuration.UpdateDuration(start)
g.LastEvaluation = start g.LastEvaluation = start
return ts return
} }
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration) resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
@@ -396,7 +393,6 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
} }
g.metrics.iterationDuration.UpdateDuration(start) g.metrics.iterationDuration.UpdateDuration(start)
g.LastEvaluation = start g.LastEvaluation = start
return ts
} }
evalCtx, cancel := context.WithCancel(ctx) evalCtx, cancel := context.WithCancel(ctx)
@@ -405,7 +401,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
g.mu.Unlock() g.mu.Unlock()
defer g.evalCancel() defer g.evalCancel()
realEvalTS := eval(evalCtx, evalTS) eval(evalCtx, evalTS)
t := time.NewTicker(g.Interval) t := time.NewTicker(g.Interval)
defer t.Stop() defer t.Stop()
@@ -413,7 +409,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
// restore the rules state after the first evaluation // restore the rules state after the first evaluation
// so only active alerts can be restored. // so only active alerts can be restored.
if rr != nil { if rr != nil {
err := g.restore(ctx, rr, realEvalTS, *remoteReadLookBack) err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
if err != nil { if err != nil {
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err) logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
} }
@@ -476,28 +472,32 @@ func (g *Group) UpdateWith(newGroup *Group) {
g.updateCh <- newGroup g.updateCh <- newGroup
} }
// delayBeforeStart returns duration for delaying the evaluation start // DeepCopy returns a deep copy of group
// based on given ts and Group settings. The delay can't exceed maxDelay. func (g *Group) DeepCopy() *Group {
// maxDelay is ignored if g.EvalOffset != nil. g.mu.RLock()
// data, _ := json.Marshal(g)
// Delaying is important to smooth out the load on the datasource when all groups start at the same time. g.mu.RUnlock()
// delayBeforeStart calculates delay based on Group ID, so all groups will start at different moments of time. newG := Group{}
func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Duration { _ = json.Unmarshal(data, &newG)
if g.EvalOffset != nil { newG.Rules = g.Rules
// if offset is specified, ignore the maxDelay and return a duration aligned with offset newG.id = g.id
currentOffsetPoint := ts.Truncate(g.Interval).Add(*g.EvalOffset) return &newG
}
// if offset is specified, delayBeforeStart returns a duration to help aligning timestamp with offset;
// otherwise, it returns a random duration between [0..interval] based on group key.
func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *time.Duration) time.Duration {
if offset != nil {
currentOffsetPoint := ts.Truncate(interval).Add(*offset)
if currentOffsetPoint.Before(ts) { if currentOffsetPoint.Before(ts) {
// wait until the next offset point // wait until the next offset point
return currentOffsetPoint.Add(g.Interval).Sub(ts) return currentOffsetPoint.Add(interval).Sub(ts)
} }
return currentOffsetPoint.Sub(ts) return currentOffsetPoint.Sub(ts)
} }
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
// artificially limit interval, so groups with big intervals could start sooner.
interval := min(g.Interval, maxDelay)
var randSleep time.Duration var randSleep time.Duration
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64))) randSleep = time.Duration(float64(interval) * (float64(key) / (1 << 64)))
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds()) sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
if randSleep < sleepOffset { if randSleep < sleepOffset {
randSleep += interval randSleep += interval
@@ -559,13 +559,15 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
if !disableProgressBar { if !disableProgressBar {
bar = pb.StartNew(iterations * len(g.Rules)) bar = pb.StartNew(iterations * len(g.Rules))
} }
for i := range g.Rules { for _, r := range g.Rules {
rule := g.Rules[i]
sem <- struct{}{} sem <- struct{}{}
wg.Go(func() { wg.Add(1)
res <- replayRuleRange(rule, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency) go func(r Rule, ri rangeIterator) {
// pass ri as a copy, so it can be modified within the replayRuleRange
res <- replayRuleRange(r, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
<-sem <-sem
}) wg.Done()
}(r, ri)
} }
wg.Wait() wg.Wait()
@@ -595,10 +597,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1) res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1)
for ri.next() { for ri.next() {
sem <- struct{}{} sem <- struct{}{}
start := ri.s wg.Add(1)
end := ri.e
wg.Go(func() { go func(s, e time.Time) {
n, err := replayRule(r, start, end, rw, replayRuleRetryAttempts) n, err := replayRule(r, s, e, rw, replayRuleRetryAttempts)
if err != nil { if err != nil {
logger.Fatalf("rule %q: %s", r, err) logger.Fatalf("rule %q: %s", r, err)
} }
@@ -607,7 +609,8 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
} }
res <- n res <- n
<-sem <-sem
}) wg.Done()
}(ri.s, ri.e)
} }
wg.Wait() wg.Wait()
close(res) close(res)
@@ -621,9 +624,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
} }
// ExecOnce evaluates all the rules under group for once with given timestamp. // ExecOnce evaluates all the rules under group for once with given timestamp.
func (g *Group) ExecOnce(ctx context.Context, rw remotewrite.RWClient, evalTS time.Time) chan error { func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
e := &executor{ e := &executor{
Rw: rw, Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders, notifierHeaders: g.NotifierHeaders,
} }
if len(g.Rules) < 1 { if len(g.Rules) < 1 {
@@ -698,6 +702,7 @@ func (g *Group) getEvalDelay() time.Duration {
// executor contains group's notify and rw configs // executor contains group's notify and rw configs
type executor struct { type executor struct {
Notifiers func() []notifier.Notifier
notifierHeaders map[string]string notifierHeaders map[string]string
Rw remotewrite.RWClient Rw remotewrite.RWClient
@@ -718,13 +723,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
sem := make(chan struct{}, concurrency) sem := make(chan struct{}, concurrency)
go func() { go func() {
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
for i := range rules { for _, r := range rules {
rule := rules[i]
sem <- struct{}{} sem <- struct{}{}
wg.Go(func() { wg.Add(1)
res <- e.exec(ctx, rule, ts, resolveDuration, limit) go func(r Rule) {
res <- e.exec(ctx, r, ts, resolveDuration, limit)
<-sem <-sem
}) wg.Done()
}(r)
} }
wg.Wait() wg.Wait()
close(res) close(res)
@@ -753,7 +759,6 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return fmt.Errorf("rule %q: failed to execute: %w", r, err) return fmt.Errorf("rule %q: failed to execute: %w", r, err)
} }
var errG vmalertutil.ErrGroup
if e.Rw != nil { if e.Rw != nil {
pushToRW := func(tss []prompb.TimeSeries) error { pushToRW := func(tss []prompb.TimeSeries) error {
var lastErr error var lastErr error
@@ -765,26 +770,31 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return lastErr return lastErr
} }
if err := pushToRW(tss); err != nil { if err := pushToRW(tss); err != nil {
errG.Add(err) return err
} }
} }
ar, ok := r.(*AlertingRule) ar, ok := r.(*AlertingRule)
if !ok { if !ok {
return errG.Err() return nil
} }
alerts := ar.alertsToSend(resolveDuration, *resendDelay) alerts := ar.alertsToSend(resolveDuration, *resendDelay)
if len(alerts) < 1 { if len(alerts) < 1 {
return errG.Err() return nil
} }
notifierErr := notifier.Send(ctx, alerts, e.notifierHeaders) wg := sync.WaitGroup{}
for err := range notifierErr { errGr := new(vmalertutil.ErrGroup)
if err != nil { for _, nt := range e.Notifiers() {
errG.Add(fmt.Errorf("rule %q: notifier failure: %w", r, err)) wg.Add(1)
} go func(nt notifier.Notifier) {
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
}
wg.Done()
}(nt)
} }
wg.Wait()
return errG.Err() return errGr.Err()
} }

View File

@@ -262,7 +262,7 @@ func TestUpdateDuringRandSleep(t *testing.T) {
updateCh: make(chan *Group), updateCh: make(chan *Group),
} }
g.Init() g.Init()
go g.Start(context.Background(), nil, nil) go g.Start(context.Background(), nil, nil, nil)
rule1 := AlertingRule{ rule1 := AlertingRule{
Name: "jobDown", Name: "jobDown",
@@ -346,8 +346,7 @@ func TestGroupStart(t *testing.T) {
} }
fs := &datasource.FakeQuerier{} fs := &datasource.FakeQuerier{}
fn, cleanup := notifier.InitFakeNotifier() fn := &notifier.FakeNotifier{}
defer cleanup()
const evalInterval = time.Millisecond const evalInterval = time.Millisecond
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"}) g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
@@ -396,7 +395,7 @@ func TestGroupStart(t *testing.T) {
fs.Add(m2) fs.Add(m2)
g.Init() g.Init()
go func() { go func() {
g.Start(context.Background(), nil, fs) g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
close(finished) close(finished)
}() }()
@@ -405,8 +404,7 @@ func TestGroupStart(t *testing.T) {
var cur uint64 var cur uint64
prev := g.metrics.iterationTotal.Get() prev := g.metrics.iterationTotal.Get()
i := 0 for i := 0; ; i++ {
for {
if i > 40 { if i > 40 {
t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i) t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i)
} }
@@ -415,7 +413,6 @@ func TestGroupStart(t *testing.T) {
return return
} }
time.Sleep(interval) time.Sleep(interval)
i++
} }
} }
@@ -475,10 +472,15 @@ func TestFaultyNotifier(t *testing.T) {
r := newTestAlertingRule("instant", 0) r := newTestAlertingRule("instant", 0)
r.q = fq r.q = fq
fn, cleanup := notifier.InitFakeNotifier() fn := &notifier.FakeNotifier{}
defer cleanup() e := &executor{
Notifiers: func() []notifier.Notifier {
e := &executor{} return []notifier.Notifier{
&notifier.FaultyNotifier{},
fn,
}
},
}
delay := 5 * time.Second delay := 5 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), delay) ctx, cancel := context.WithTimeout(context.Background(), delay)
defer cancel() defer cancel()
@@ -551,7 +553,7 @@ func TestCloseWithEvalInterruption(t *testing.T) {
g := NewGroup(groups[0], fq, evalInterval, nil) g := NewGroup(groups[0], fq, evalInterval, nil)
g.Init() g.Init()
go g.Start(context.Background(), nil, nil) go g.Start(context.Background(), nil, nil, nil)
time.Sleep(evalInterval * 20) time.Sleep(evalInterval * 20)
@@ -569,10 +571,9 @@ func TestCloseWithEvalInterruption(t *testing.T) {
func TestGroupStartDelay(t *testing.T) { func TestGroupStartDelay(t *testing.T) {
g := &Group{} g := &Group{}
g.id = uint64(math.MaxUint64 / 10)
// interval of 5min and key generate a static delay of 30s // interval of 5min and key generate a static delay of 30s
g.Interval = time.Minute * 5 g.Interval = time.Minute * 5
maxDelay := time.Minute * 5 key := uint64(math.MaxUint64 / 10)
f := func(atS, expS string) { f := func(atS, expS string) {
t.Helper() t.Helper()
@@ -584,7 +585,7 @@ func TestGroupStartDelay(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
delay := g.delayBeforeStart(at, maxDelay) delay := delayBeforeStart(at, key, g.Interval, g.EvalOffset)
gotStart := at.Add(delay) gotStart := at.Add(delay)
if expTS != gotStart { if expTS != gotStart {
t.Fatalf("expected to get %v; got %v instead", expTS, gotStart) t.Fatalf("expected to get %v; got %v instead", expTS, gotStart)
@@ -605,15 +606,6 @@ func TestGroupStartDelay(t *testing.T) {
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00") f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00") f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00") f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
maxDelay = time.Minute * 1
g.EvalOffset = nil
// test group with maxDelay, and offset disabled
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:06.000+00:00")
f("2023-01-01T00:00:01.000+00:00", "2023-01-01T00:00:06.000+00:00")
f("2023-01-01T00:00:06.100+00:00", "2023-01-01T00:01:06.000+00:00")
f("2023-01-01T00:00:11.000+00:00", "2023-01-01T00:01:06.000+00:00")
} }
func TestGetPrometheusReqTimestamp(t *testing.T) { func TestGetPrometheusReqTimestamp(t *testing.T) {

View File

@@ -2,7 +2,6 @@ package rule
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"strings" "strings"
"time" "time"
@@ -82,37 +81,6 @@ func (rr *RecordingRule) ID() uint64 {
return rr.RuleID return rr.RuleID
} }
// ToAPI returns ApiRule representation of rr
func (rr *RecordingRule) ToAPI() ApiRule {
state := rr.state
lastState := state.getLast()
r := ApiRule{
Type: TypeRecording,
DatasourceType: rr.Type.String(),
Name: rr.Name,
Query: rr.Expr,
Labels: rr.Labels,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: state.size(),
Updates: state.getAll(),
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", rr.ID()),
GroupID: fmt.Sprintf("%d", rr.GroupID),
GroupName: rr.GroupName,
File: rr.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
return r
}
// NewRecordingRule creates a new RecordingRule // NewRecordingRule creates a new RecordingRule
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule { func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
debug := group.Debug debug := group.Debug
@@ -198,7 +166,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
defer func() { defer func() {
rr.state.add(curState) rr.state.add(curState)
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) { if curState.Err != nil {
rr.metrics.errors.Inc() rr.metrics.errors.Inc()
} }
}() }()
@@ -237,8 +205,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
Labels: stringToLabels(k), Labels: stringToLabels(k),
Samples: []prompb.Sample{ Samples: []prompb.Sample{
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6}, {Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
}, }})
})
} }
rr.lastEvaluation = curEvaluation rr.lastEvaluation = curEvaluation
return tss, nil return tss, nil
@@ -293,11 +260,6 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompb.TimeSeries {
} }
// add extra labels configured by user // add extra labels configured by user
for k := range rr.Labels { for k := range rr.Labels {
// do not add label with empty value, since it has no meaning.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
if rr.Labels[k] == "" {
continue
}
existingLabel := promrelabel.GetLabelByName(m.Labels, k) existingLabel := promrelabel.GetLabelByName(m.Labels, k)
if existingLabel != nil { // there is a conflict between extra and existing label if existingLabel != nil { // there is a conflict between extra and existing label
if existingLabel.Value == rr.Labels[k] { if existingLabel.Value == rr.Labels[k] {

View File

@@ -21,8 +21,6 @@ type Rule interface {
// ID returns unique ID that may be used for // ID returns unique ID that may be used for
// identifying this Rule among others. // identifying this Rule among others.
ID() uint64 ID() uint64
// ToAPI returns ApiRule representation of Rule
ToAPI() ApiRule
// exec executes the rule with given context at the given timestamp and limit. // exec executes the rule with given context at the given timestamp and limit.
// returns an err if number of resulting time series exceeds the limit. // returns an err if number of resulting time series exceeds the limit.
exec(ctx context.Context, ts time.Time, limit int) ([]prompb.TimeSeries, error) exec(ctx context.Context, ts time.Time, limit int) ([]prompb.TimeSeries, error)
@@ -70,6 +68,39 @@ type StateEntry struct {
Curl string `json:"curl"` Curl string `json:"curl"`
} }
// GetLastEntry returns latest stateEntry of rule
func GetLastEntry(r Rule) StateEntry {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.getLast()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.getLast()
}
return StateEntry{}
}
// GetRuleStateSize returns size of rule stateEntry
func GetRuleStateSize(r Rule) int {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.size()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.size()
}
return 0
}
// GetAllRuleState returns rule entire stateEntries
func GetAllRuleState(r Rule) []StateEntry {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.getAll()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.getAll()
}
return []StateEntry{}
}
func (s *ruleState) size() int { func (s *ruleState) size() int {
s.RLock() s.RLock()
defer s.RUnlock() defer s.RUnlock()
@@ -121,7 +152,7 @@ func (s *ruleState) add(e StateEntry) {
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) { func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
var err error var err error
var tss []prompb.TimeSeries var tss []prompb.TimeSeries
for i := range replayRuleRetryAttempts { for i := 0; i < replayRuleRetryAttempts; i++ {
tss, err = r.execRange(context.Background(), start, end) tss, err = r.execRange(context.Background(), start, end)
if err == nil { if err == nil {
break break

View File

@@ -40,7 +40,7 @@ func TestRule_state(t *testing.T) {
} }
var last time.Time var last time.Time
for range stateEntriesN * 2 { for i := 0; i < stateEntriesN*2; i++ {
last = time.Now() last = time.Now()
r.state.add(StateEntry{At: last}) r.state.add(StateEntry{At: last})
} }
@@ -65,15 +65,17 @@ func TestRule_stateConcurrent(_ *testing.T) {
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}} r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
const workers = 50 const workers = 50
const iterations = 100 const iterations = 100
var wg sync.WaitGroup wg := sync.WaitGroup{}
for range workers { wg.Add(workers)
wg.Go(func() { for i := 0; i < workers; i++ {
for range iterations { go func() {
defer wg.Done()
for i := 0; i < iterations; i++ {
r.state.add(StateEntry{At: time.Now()}) r.state.add(StateEntry{At: time.Now()})
r.state.getAll() r.state.getAll()
r.state.getLast() r.state.getLast()
} }
}) }()
} }
wg.Wait() wg.Wait()
} }

View File

@@ -19,13 +19,13 @@ func CompareRules(t *testing.T, a, b Rule) error {
case *AlertingRule: case *AlertingRule:
br, ok := b.(*AlertingRule) br, ok := b.(*AlertingRule)
if !ok { if !ok {
return fmt.Errorf("rule %d supposed to be of type AlertingRule", b.ID()) return fmt.Errorf("rule %q supposed to be of type AlertingRule", b.ID())
} }
return compareAlertingRules(t, v, br) return compareAlertingRules(t, v, br)
case *RecordingRule: case *RecordingRule:
br, ok := b.(*RecordingRule) br, ok := b.(*RecordingRule)
if !ok { if !ok {
return fmt.Errorf("rule %d supposed to be of type RecordingRule", b.ID()) return fmt.Errorf("rule %q supposed to be of type RecordingRule", b.ID())
} }
return compareRecordingRules(t, v, br) return compareRecordingRules(t, v, br)
default: default:

View File

@@ -34,12 +34,11 @@ body {
padding-top: 4.5rem; padding-top: 4.5rem;
} }
.vm-group { .group-items {
cursor: pointer; cursor: pointer;
padding: 5px; padding: 5px;
margin-top: 5px; margin-top: 5px;
position: relative; position: relative;
display: none;
} }
.btn svg, .dropdown-item svg { .btn svg, .dropdown-item svg {
@@ -56,22 +55,14 @@ body {
height: 38px; height: 38px;
} }
.vm-item:not(.vm-found) { .group-items:not(:has(.sub-item:not(.d-none))) {
display: none; display: none !important;
} }
.vm-group:has(.vm-item:is(.vm-found)), .vm-group:is(.vm-found) { .group-items:hover {
display: flex;
}
.vm-group:hover {
background-color: #f8f9fa!important; background-color: #f8f9fa!important;
} }
.vm-group:is(.vm-found) .vm-item {
display: table-row;
}
.table { .table {
table-layout: fixed; table-layout: fixed;
} }
@@ -120,9 +111,3 @@ textarea.curl-area {
.w-60 { .w-60 {
width: 60%; width: 60%;
} }
.annotations {
white-space: pre-wrap;
color: gray;
word-wrap: break-word;
}

View File

@@ -65,34 +65,32 @@ function getParamURL(key) {
return url.searchParams.get(key) return url.searchParams.get(key)
} }
function matchText(search, item) {
const text = item.innerText.toLowerCase();
return text.indexOf(search) >= 0;
}
function filterRules(searchPhrase) { function filterRules(searchPhrase) {
document.querySelectorAll('.vm-group').forEach((group) => { document.querySelectorAll('.sub-items').forEach((rules) => {
if (!searchPhrase) { let found = false;
group.classList.add('vm-found'); rules.querySelectorAll('.sub-item').forEach((rule) => {
return; if (searchPhrase) {
} const ruleName = rule.innerText.toLowerCase();
for (const item of group.querySelectorAll('.vm-group-search')) { const matches = []
if (matchText(searchPhrase, item)) { const hasValue = ruleName.indexOf(searchPhrase) >= 0;
group.classList.add('vm-found'); rule.querySelectorAll('.label').forEach((label) => {
return; const text = label.innerText.toLowerCase();
if (text.indexOf(searchPhrase) >= 0) {
matches.push(text);
}
});
if (!matches.length && !hasValue) {
rule.classList.add('d-none');
return;
}
} }
} rule.classList.remove('d-none');
group.classList.remove('vm-found'); found = true;
for (const item of group.querySelectorAll('.vm-item')) { });
if (matchText(searchPhrase, item)) { if (found && searchPhrase || !searchPhrase) {
item.classList.add('vm-found'); rules.classList.remove('d-none');
continue; } else {
} rules.classList.add('d-none');
if (Array.from(item.querySelectorAll('.label')).find(l => matchText(searchPhrase, l))) {
item.classList.add('vm-found');
continue;
}
item.classList.remove('vm-found');
} }
}); });
} }

View File

@@ -485,12 +485,6 @@ func templateFuncs() textTpl.FuncMap {
/* Helpers */ /* Helpers */
// now returns the Unix timestamp in seconds at the time of the template evaluation.
// For example: {{ (now | toTime).Sub $activeAt }} will return the duration the alert has been active.
"now": func() float64 {
return float64(time.Now().Unix())
},
// Converts a list of objects to a map with keys arg0, arg1 etc. // Converts a list of objects to a map with keys arg0, arg1 etc.
// This is intended to allow multiple arguments to be passed to templates. // This is intended to allow multiple arguments to be passed to templates.
"args": func(args ...any) map[string]any { "args": func(args ...any) map[string]any {

View File

@@ -45,7 +45,7 @@ func (eg *ErrGroup) Error() string {
return "" return ""
} }
var b strings.Builder var b strings.Builder
fmt.Fprintf(&b, "errors(%d): \n", len(eg.errs)) fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
for i, err := range eg.errs { for i, err := range eg.errs {
b.WriteString(err.Error()) b.WriteString(err.Error())
if i != len(eg.errs)-1 { if i != len(eg.errs)-1 {

View File

@@ -30,8 +30,8 @@ func TestErrGroup(t *testing.T) {
} }
f(nil, "") f(nil, "")
f([]error{errors.New("timeout")}, "errors(1): \ntimeout") f([]error{errors.New("timeout")}, "errors(1): timeout")
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): \ntimeout\ndeadline") f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): timeout\ndeadline")
} }
// TestErrGroupConcurrent supposed to test concurrent // TestErrGroupConcurrent supposed to test concurrent
@@ -42,7 +42,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
const writersN = 4 const writersN = 4
payload := make(chan error, writersN) payload := make(chan error, writersN)
for range writersN { for i := 0; i < writersN; i++ {
go func() { go func() {
for err := range payload { for err := range payload {
eg.Add(err) eg.Add(err)
@@ -51,7 +51,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
} }
const iterations = 500 const iterations = 500
for i := range iterations { for i := 0; i < iterations; i++ {
payload <- fmt.Errorf("error %d", i) payload <- fmt.Errorf("error %d", i)
if i%10 == 0 { if i%10 == 0 {
_ = eg.Err() _ = eg.Err()

View File

@@ -29,9 +29,7 @@ var (
{"api/v1/rules", "list all loaded groups and rules"}, {"api/v1/rules", "list all loaded groups and rules"},
{"api/v1/alerts", "list all active alerts"}, {"api/v1/alerts", "list all active alerts"},
{"api/v1/notifiers", "list all notifiers"}, {"api/v1/notifiers", "list all notifiers"},
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamAlertID), "get alert status by group and alert ID"}, {fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", paramGroupID, paramAlertID), "get alert status by group and alert ID"},
{fmt.Sprintf("api/v1/rule?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamRuleID), "get rule status by group and rule ID"},
{fmt.Sprintf("api/v1/group?%s=<int>", rule.ParamGroupID), "get group status by group ID"},
} }
systemLinks = [][2]string{ systemLinks = [][2]string{
{"vmalert/groups", "UI"}, {"vmalert/groups", "UI"},
@@ -47,8 +45,8 @@ var (
{Name: "Docs", URL: "https://docs.victoriametrics.com/victoriametrics/vmalert/"}, {Name: "Docs", URL: "https://docs.victoriametrics.com/victoriametrics/vmalert/"},
} }
ruleTypeMap = map[string]string{ ruleTypeMap = map[string]string{
"alert": rule.TypeAlerting, "alert": ruleTypeAlerting,
"record": rule.TypeRecording, "record": ruleTypeRecording,
} }
) )
@@ -114,7 +112,7 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
case "/rules": case "/rules":
// Grafana makes an extra request to `/rules` // Grafana makes an extra request to `/rules`
// handler in addition to `/api/v1/rules` calls in alerts UI // handler in addition to `/api/v1/rules` calls in alerts UI
var data []*rule.ApiGroup var data []*apiGroup
rf, err := newRulesFilter(r) rf, err := newRulesFilter(r)
if err != nil { if err != nil {
httpserver.Errorf(w, r, "%s", err) httpserver.Errorf(w, r, "%s", err)
@@ -180,14 +178,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
w.Write(data) w.Write(data)
return true return true
case "/vmalert/api/v1/rule", "/api/v1/rule": case "/vmalert/api/v1/rule", "/api/v1/rule":
apiRule, err := rh.getRule(r) rule, err := rh.getRule(r)
if err != nil { if err != nil {
httpserver.Errorf(w, r, "%s", err) httpserver.Errorf(w, r, "%s", err)
return true return true
} }
rwu := rule.ApiRuleWithUpdates{ rwu := apiRuleWithUpdates{
ApiRule: apiRule, apiRule: rule,
StateUpdates: apiRule.Updates, StateUpdates: rule.Updates,
} }
data, err := json.Marshal(rwu) data, err := json.Marshal(rwu)
if err != nil { if err != nil {
@@ -197,20 +195,6 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
w.Write(data) w.Write(data)
return true return true
case "/vmalert/api/v1/group", "/api/v1/group":
group, err := rh.getGroup(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
data, err := json.Marshal(group)
if err != nil {
httpserver.Errorf(w, r, "failed to marshal group: %s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
w.Write(data)
return true
case "/-/reload": case "/-/reload":
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) { if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
return true return true
@@ -225,42 +209,30 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
} }
} }
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, error) { func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64) groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err) return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
} }
obj, err := rh.m.groupAPI(groupID) ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 64)
if err != nil { if err != nil {
return nil, errResponse(err, http.StatusNotFound) return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramRuleID, err)
}
return obj, nil
}
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, error) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
if err != nil {
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
}
ruleID, err := strconv.ParseUint(r.FormValue(rule.ParamRuleID), 10, 64)
if err != nil {
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err)
} }
obj, err := rh.m.ruleAPI(groupID, ruleID) obj, err := rh.m.ruleAPI(groupID, ruleID)
if err != nil { if err != nil {
return rule.ApiRule{}, errResponse(err, http.StatusNotFound) return apiRule{}, errResponse(err, http.StatusNotFound)
} }
return obj, nil return obj, nil
} }
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) { func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64) groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err) return nil, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
} }
alertID, err := strconv.ParseUint(r.FormValue(rule.ParamAlertID), 10, 64) alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 64)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err) return nil, fmt.Errorf("failed to read %q param: %w", paramAlertID, err)
} }
a, err := rh.m.alertAPI(groupID, alertID) a, err := rh.m.alertAPI(groupID, alertID)
if err != nil { if err != nil {
@@ -272,7 +244,7 @@ func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
type listGroupsResponse struct { type listGroupsResponse struct {
Status string `json:"status"` Status string `json:"status"`
Data struct { Data struct {
Groups []*rule.ApiGroup `json:"groups"` Groups []*apiGroup `json:"groups"`
} `json:"data"` } `json:"data"`
} }
@@ -338,19 +310,19 @@ func (rf *rulesFilter) matchesGroup(group *rule.Group) bool {
return true return true
} }
func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup { func (rh *requestHandler) groups(rf *rulesFilter) []*apiGroup {
rh.m.groupsMu.RLock() rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock() defer rh.m.groupsMu.RUnlock()
groups := make([]*rule.ApiGroup, 0) groups := make([]*apiGroup, 0)
for _, group := range rh.m.groups { for _, group := range rh.m.groups {
if !rf.matchesGroup(group) { if !rf.matchesGroup(group) {
continue continue
} }
g := group.ToAPI() g := groupToAPI(group)
// the returned list should always be non-nil // the returned list should always be non-nil
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221 // https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
filteredRules := make([]rule.ApiRule, 0) filteredRules := make([]apiRule, 0)
for _, rule := range g.Rules { for _, rule := range g.Rules {
if rf.ruleType != "" && rf.ruleType != rule.Type { if rf.ruleType != "" && rf.ruleType != rule.Type {
continue continue
@@ -378,7 +350,7 @@ func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
groups = append(groups, g) groups = append(groups, g)
} }
// sort list of groups for deterministic output // sort list of groups for deterministic output
slices.SortFunc(groups, func(a, b *rule.ApiGroup) int { slices.SortFunc(groups, func(a, b *apiGroup) int {
if a.Name != b.Name { if a.Name != b.Name {
return strings.Compare(a.Name, b.Name) return strings.Compare(a.Name, b.Name)
} }
@@ -403,32 +375,32 @@ func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, error) {
type listAlertsResponse struct { type listAlertsResponse struct {
Status string `json:"status"` Status string `json:"status"`
Data struct { Data struct {
Alerts []*rule.ApiAlert `json:"alerts"` Alerts []*apiAlert `json:"alerts"`
} `json:"data"` } `json:"data"`
} }
func (rh *requestHandler) groupAlerts() []rule.GroupAlerts { func (rh *requestHandler) groupAlerts() []groupAlerts {
rh.m.groupsMu.RLock() rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock() defer rh.m.groupsMu.RUnlock()
var gAlerts []rule.GroupAlerts var gAlerts []groupAlerts
for _, group := range rh.m.groups { for _, g := range rh.m.groups {
var alerts []*rule.ApiAlert var alerts []*apiAlert
g := group.ToAPI()
for _, r := range g.Rules { for _, r := range g.Rules {
if r.Type != rule.TypeAlerting { a, ok := r.(*rule.AlertingRule)
if !ok {
continue continue
} }
alerts = append(alerts, r.Alerts...) alerts = append(alerts, ruleToAPIAlert(a)...)
} }
if len(alerts) > 0 { if len(alerts) > 0 {
gAlerts = append(gAlerts, rule.GroupAlerts{ gAlerts = append(gAlerts, groupAlerts{
Group: g, Group: groupToAPI(g),
Alerts: alerts, Alerts: alerts,
}) })
} }
} }
slices.SortFunc(gAlerts, func(a, b rule.GroupAlerts) int { slices.SortFunc(gAlerts, func(a, b groupAlerts) int {
return strings.Compare(a.Group.Name, b.Group.Name) return strings.Compare(a.Group.Name, b.Group.Name)
}) })
return gAlerts return gAlerts
@@ -439,22 +411,22 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
defer rh.m.groupsMu.RUnlock() defer rh.m.groupsMu.RUnlock()
lr := listAlertsResponse{Status: "success"} lr := listAlertsResponse{Status: "success"}
lr.Data.Alerts = make([]*rule.ApiAlert, 0) lr.Data.Alerts = make([]*apiAlert, 0)
for _, group := range rh.m.groups { for _, group := range rh.m.groups {
if !rf.matchesGroup(group) { if !rf.matchesGroup(group) {
continue continue
} }
g := group.ToAPI() for _, r := range group.Rules {
for _, r := range g.Rules { a, ok := r.(*rule.AlertingRule)
if r.Type != rule.TypeAlerting { if !ok {
continue continue
} }
lr.Data.Alerts = append(lr.Data.Alerts, r.Alerts...) lr.Data.Alerts = append(lr.Data.Alerts, ruleToAPIAlert(a)...)
} }
} }
// sort list of alerts for deterministic output // sort list of alerts for deterministic output
slices.SortFunc(lr.Data.Alerts, func(a, b *rule.ApiAlert) int { slices.SortFunc(lr.Data.Alerts, func(a, b *apiAlert) int {
return strings.Compare(a.ID, b.ID) return strings.Compare(a.ID, b.ID)
}) })
@@ -471,7 +443,7 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
type listNotifiersResponse struct { type listNotifiersResponse struct {
Status string `json:"status"` Status string `json:"status"`
Data struct { Data struct {
Notifiers []*notifier.ApiNotifier `json:"notifiers"` Notifiers []*apiNotifier `json:"notifiers"`
} `json:"data"` } `json:"data"`
} }
@@ -479,20 +451,19 @@ func (rh *requestHandler) listNotifiers() ([]byte, error) {
targets := notifier.GetTargets() targets := notifier.GetTargets()
lr := listNotifiersResponse{Status: "success"} lr := listNotifiersResponse{Status: "success"}
lr.Data.Notifiers = make([]*notifier.ApiNotifier, 0) lr.Data.Notifiers = make([]*apiNotifier, 0)
for protoName, protoTargets := range targets { for protoName, protoTargets := range targets {
nr := &notifier.ApiNotifier{ notifier := &apiNotifier{
Kind: protoName, Kind: string(protoName),
Targets: make([]*notifier.ApiTarget, 0, len(protoTargets)), Targets: make([]*apiTarget, 0, len(protoTargets)),
} }
for _, target := range protoTargets { for _, target := range protoTargets {
nr.Targets = append(nr.Targets, &notifier.ApiTarget{ notifier.Targets = append(notifier.Targets, &apiTarget{
Address: target.Addr(), Address: target.Addr(),
Labels: target.Labels.ToMap(), Labels: target.Labels.ToMap(),
LastError: target.LastError(),
}) })
} }
lr.Data.Notifiers = append(lr.Data.Notifiers, nr) lr.Data.Notifiers = append(lr.Data.Notifiers, notifier)
} }
b, err := json.Marshal(lr) b, err := json.Marshal(lr)

View File

@@ -8,8 +8,6 @@
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
) %} ) %}
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %} {% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
@@ -79,8 +77,6 @@
{% func Welcome(r *http.Request) %} {% func Welcome(r *http.Request) %}
{%= tpl.Header(r, navItems, "vmalert", getLastConfigError()) %} {%= tpl.Header(r, navItems, "vmalert", getLastConfigError()) %}
<p> <p>
Version {%s buildinfo.Version %} <br>
API:<br> API:<br>
{% for _, p := range apiLinks %} {% for _, p := range apiLinks %}
{%code p, doc := p[0], p[1] %} {%code p, doc := p[0], p[1] %}
@@ -97,7 +93,7 @@
{%= tpl.Footer(r) %} {%= tpl.Footer(r) %}
{% endfunc %} {% endfunc %}
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, filter string) %} {% func ListGroups(r *http.Request, groups []*apiGroup, filter string) %}
{%code {%code
prefix := vmalertutil.Prefix(r.URL.Path) prefix := vmalertutil.Prefix(r.URL.Path)
filters := map[string]string{ filters := map[string]string{
@@ -117,17 +113,14 @@
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %} {%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
{% if len(groups) > 0 %} {% if len(groups) > 0 %}
{% for _, g := range groups %} {% for _, g := range groups %}
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.Unhealthy > 0 %} alert-danger{% endif %}"> <div id="group-{%s g.ID %}" class="d-flex w-100 border-0 flex-column group-items{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
<span class="d-flex justify-content-between"> <span class="d-flex justify-content-between">
<a <a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
class="vm-group-search"
href="#group-{%s g.ID %}"
>{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
<span <span
class="flex-grow-1 d-flex justify-content-end" class="flex-grow-1 d-flex justify-content-end"
role="button" role="button"
data-bs-toggle="collapse" data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}" data-bs-target="#sub-{%s g.ID %}"
> >
<span class="d-flex gap-2"> <span class="d-flex gap-2">
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %} {% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
@@ -140,9 +133,9 @@
class="d-flex flex-column row-gap-2 mb-2" class="d-flex flex-column row-gap-2 mb-2"
role="button" role="button"
data-bs-toggle="collapse" data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}" data-bs-target="#sub-{%s g.ID %}"
> >
<span class="fs-6 text-start vm-group-search w-100 fw-lighter">{%s g.File %}</span> <span class="fs-6 text-start w-100 fw-lighter">{%s g.File %}</span>
{% if len(g.Params) > 0 %} {% if len(g.Params) > 0 %}
<span class="fs-6 text-start w-100 d-flex justify-content-between fw-lighter"> <span class="fs-6 text-start w-100 d-flex justify-content-between fw-lighter">
<span>Extra params</span> <span>Extra params</span>
@@ -164,7 +157,7 @@
</span> </span>
{% endif %} {% endif %}
</span> </span>
<div class="collapse" id="item-{%s g.ID %}"> <div class="collapse sub-items" id="sub-{%s g.ID %}">
<table class="table table-striped table-hover table-sm"> <table class="table table-striped table-hover table-sm">
<thead> <thead>
<tr> <tr>
@@ -175,7 +168,7 @@
</thead> </thead>
<tbody> <tbody>
{% for _, r := range g.Rules %} {% for _, r := range g.Rules %}
<tr class="vm-item{% if r.LastError != "" %} alert-danger{% endif %}"> <tr class="sub-item{% if r.LastError != "" %} alert-danger{% endif %}">
<td> <td>
<div class="row"> <div class="row">
<div class="col-12 mb-2"> <div class="col-12 mb-2">
@@ -212,12 +205,7 @@
</div> </div>
</td> </td>
<td class="text-center">{%d r.LastSamples %}</td> <td class="text-center">{%d r.LastSamples %}</td>
<td class="text-center">{% if r.LastEvaluation.IsZero() %} <td class="text-center">{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago</td>
Never
{% else %}
{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago
{% endif %}
</td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
@@ -234,7 +222,7 @@
{% endfunc %} {% endfunc %}
{% func ListAlerts(r *http.Request, groupAlerts []rule.GroupAlerts) %} {% func ListAlerts(r *http.Request, groupAlerts []groupAlerts) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %} {%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %} {%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %}
{%= Controls(prefix, "", "", nil, nil, true) %} {%= Controls(prefix, "", "", nil, nil, true) %}
@@ -243,7 +231,7 @@
{%code {%code
g := ga.Group g := ga.Group
var keys []string var keys []string
alertsByRule := make(map[string][]*rule.ApiAlert) alertsByRule := make(map[string][]*apiAlert)
for _, alert := range ga.Alerts { for _, alert := range ga.Alerts {
if len(alertsByRule[alert.RuleID]) < 1 { if len(alertsByRule[alert.RuleID]) < 1 {
keys = append(keys, alert.RuleID) keys = append(keys, alert.RuleID)
@@ -252,14 +240,14 @@
} }
sort.Strings(keys) sort.Strings(keys)
%} %}
<div class="w-100 flex-column vm-group alert-danger"> <div class="d-flex w-100 flex-column group-items alert-danger">
<span id="group-{%s g.ID %}" class="d-flex justify-content-between"> <span id="group-{%s g.ID %}" class="d-flex justify-content-between">
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %}</a> <a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %}</a>
<span <span
class="flex-grow-1 d-flex justify-content-end" class="flex-grow-1 d-flex justify-content-end"
role="button" role="button"
data-bs-toggle="collapse" data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}" data-bs-target="#sub-{%s g.ID %}"
> >
<span class="badge bg-danger" title="Number of active alerts">{%d len(ga.Alerts) %}</span> <span class="badge bg-danger" title="Number of active alerts">{%d len(ga.Alerts) %}</span>
</span> </span>
@@ -269,10 +257,10 @@
class="fs-6 text-start w-100 fw-lighter" class="fs-6 text-start w-100 fw-lighter"
role="button" role="button"
data-bs-toggle="collapse" data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}" data-bs-target="#sub-{%s g.ID %}"
>{%s g.File %}</span> >{%s g.File %}</span>
</span> </span>
<div class="collapse" id="item-{%s g.ID %}"> <div class="collapse sub-items" id="sub-{%s g.ID %}">
{% for _, ruleID := range keys %} {% for _, ruleID := range keys %}
{%code {%code
defaultAR := alertsByRule[ruleID][0] defaultAR := alertsByRule[ruleID][0]
@@ -283,7 +271,7 @@
sort.Strings(labelKeys) sort.Strings(labelKeys)
%} %}
<br> <br>
<div class="vm-item"> <div class="sub-item">
<b>alert:</b> {%s defaultAR.Name %} ({%d len(alertsByRule[ruleID]) %}) <b>alert:</b> {%s defaultAR.Name %} ({%d len(alertsByRule[ruleID]) %})
| <span><a target="_blank" href="{%s defaultAR.SourceLink %}">Source</a></span> | <span><a target="_blank" href="{%s defaultAR.SourceLink %}">Source</a></span>
<br> <br>
@@ -348,20 +336,20 @@
typeK, ns := keys[i], targets[notifier.TargetType(keys[i])] typeK, ns := keys[i], targets[notifier.TargetType(keys[i])]
count := len(ns) count := len(ns)
%} %}
<div class="w-100 flex-column vm-group"> <div class="d-flex w-100 flex-column group-items">
<span class="d-flex justify-content-between" id="group-{%s typeK %}"> <span class="d-flex justify-content-between" id="group-{%s typeK %}">
<a href="#group-{%s typeK %}">{%s typeK %} ({%d count %})</a> <a href="#group-{%s typeK %}">{%s typeK %} ({%d count %})</a>
<span <span
class="flex-grow-1" class="flex-grow-1"
role="button" role="button"
data-bs-toggle="collapse" data-bs-toggle="collapse"
data-bs-target="#item-{%s typeK %}" data-bs-target="#sub-{%s typeK %}"
></span> ></span>
</span> </span>
<div id="item-{%s typeK %}" class="collapse show"> <div id="sub-{%s typeK %}" class="collapse show sub-items">
<table class="table table-striped table-hover table-sm"> <table class="table table-striped table-hover table-sm">
<thead> <thead>
<tr class="vm-item"> <tr class="sub-item">
<th scope="col">Labels</th> <th scope="col">Labels</th>
<th scope="col">Address</th> <th scope="col">Address</th>
</tr> </tr>
@@ -390,7 +378,7 @@
{%= tpl.Footer(r) %} {%= tpl.Footer(r) %}
{% endfunc %} {% endfunc %}
{% func Alert(r *http.Request, alert *rule.ApiAlert) %} {% func Alert(r *http.Request, alert *apiAlert) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %} {%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "", getLastConfigError()) %} {%= tpl.Header(r, navItems, "", getLastConfigError()) %}
{%code {%code
@@ -446,7 +434,7 @@
<div class="col"> <div class="col">
{% for _, k := range annotationKeys %} {% for _, k := range annotationKeys %}
<b>{%s k %}:</b><br> <b>{%s k %}:</b><br>
<p class="annotations">{%s alert.Annotations[k] %}</p> <p>{%s alert.Annotations[k] %}</p>
{% endfor %} {% endfor %}
</div> </div>
</div> </div>
@@ -476,7 +464,7 @@
{% endfunc %} {% endfunc %}
{% func RuleDetails(r *http.Request, rule rule.ApiRule) %} {% func RuleDetails(r *http.Request, rule apiRule) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %} {%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "", getLastConfigError()) %} {%= tpl.Header(r, navItems, "", getLastConfigError()) %}
{%code {%code
@@ -560,7 +548,7 @@
<div class="col"> <div class="col">
{% for _, k := range annotationKeys %} {% for _, k := range annotationKeys %}
<b>{%s k %}:</b><br> <b>{%s k %}:</b><br>
<p class="annotations">{%s rule.Annotations[k] %}</p> <p>{%s rule.Annotations[k] %}</p>
{% endfor %} {% endfor %}
</div> </div>
</div> </div>
@@ -605,11 +593,11 @@
<table class="table table-striped table-hover table-sm"> <table class="table table-striped table-hover table-sm">
<thead> <thead>
<tr> <tr>
<th scope="col" title="The time when the rule was executed">Updated at</th> <th scope="col" title="The time when event was created">Updated at</th>
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th> <th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %} {% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th> <th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
<th scope="col" class="text-center" title="The time used in execution query request">Execution timestamp</th> <th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
<th scope="col" class="text-center" title="cURL command with request example">cURL</th> <th scope="col" class="text-center" title="cURL command with request example">cURL</th>
</tr> </tr>
</thead> </thead>
@@ -661,7 +649,7 @@
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span> <span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
{% endfunc %} {% endfunc %}
{% func seriesFetchedWarn(prefix string, r rule.ApiRule) %} {% func seriesFetchedWarn(prefix string, r apiRule) %}
{% if isNoMatch(r) %} {% if isNoMatch(r) %}
<svg <svg
data-bs-toggle="tooltip" data-bs-toggle="tooltip"
@@ -675,7 +663,7 @@
{% endfunc %} {% endfunc %}
{%code {%code
func isNoMatch (r rule.ApiRule) bool { func isNoMatch (r apiRule) bool {
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0 return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
} }
%} %}

File diff suppressed because it is too large Load Diff

View File

@@ -23,12 +23,8 @@ func TestHandler(t *testing.T) {
Timestamps: []int64{0}, Timestamps: []int64{0},
}) })
m := &manager{groups: map[uint64]*rule.Group{}} m := &manager{groups: map[uint64]*rule.Group{}}
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
var ar *rule.AlertingRule var ar *rule.AlertingRule
var rr *rule.RecordingRule var rr *rule.RecordingRule
var groupIDs []uint64
for _, dsType := range []string{"prometheus", "", "graphite"} { for _, dsType := range []string{"prometheus", "", "graphite"} {
g := rule.NewGroup(config.Group{ g := rule.NewGroup(config.Group{
Name: "group", Name: "group",
@@ -48,10 +44,8 @@ func TestHandler(t *testing.T) {
}, fq, 1*time.Minute, nil) }, fq, 1*time.Minute, nil)
ar = g.Rules[0].(*rule.AlertingRule) ar = g.Rules[0].(*rule.AlertingRule)
rr = g.Rules[1].(*rule.RecordingRule) rr = g.Rules[1].(*rule.RecordingRule)
g.ExecOnce(context.Background(), nil, time.Time{}) g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, nil, time.Time{})
id := g.CreateID() m.groups[g.CreateID()] = g
m.groups[id] = g
groupIDs = append(groupIDs, id)
} }
rh := &requestHandler{m: m} rh := &requestHandler{m: m}
@@ -88,22 +82,22 @@ func TestHandler(t *testing.T) {
}) })
t.Run("/vmalert/rule", func(t *testing.T) { t.Run("/vmalert/rule", func(t *testing.T) {
a := ar.ToAPI() a := ruleToAPI(ar)
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200) getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
r := rr.ToAPI() r := ruleToAPI(rr)
getResp(t, ts.URL+"/vmalert/"+r.WebLink(), nil, 200) getResp(t, ts.URL+"/vmalert/"+r.WebLink(), nil, 200)
}) })
t.Run("/vmalert/alert", func(t *testing.T) { t.Run("/vmalert/alert", func(t *testing.T) {
alerts := ar.AlertsToAPI() alerts := ruleToAPIAlert(ar)
for _, a := range alerts { for _, a := range alerts {
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200) getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
} }
}) })
t.Run("/vmalert/rule?badParam", func(t *testing.T) { t.Run("/vmalert/rule?badParam", func(t *testing.T) {
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamRuleID) params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramRuleID)
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404) getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamRuleID) params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramRuleID)
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404) getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
}) })
@@ -130,14 +124,14 @@ func TestHandler(t *testing.T) {
} }
}) })
t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) { t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) {
expAlert := rule.NewAlertAPI(ar, ar.GetAlerts()[0]) expAlert := newAlertAPI(ar, ar.GetAlerts()[0])
alert := &rule.ApiAlert{} alert := &apiAlert{}
getResp(t, ts.URL+"/"+expAlert.APILink(), alert, 200) getResp(t, ts.URL+"/"+expAlert.APILink(), alert, 200)
if !reflect.DeepEqual(alert, expAlert) { if !reflect.DeepEqual(alert, expAlert) {
t.Fatalf("expected %v is equal to %v", alert, expAlert) t.Fatalf("expected %v is equal to %v", alert, expAlert)
} }
alert = &rule.ApiAlert{} alert = &apiAlert{}
getResp(t, ts.URL+"/vmalert/"+expAlert.APILink(), alert, 200) getResp(t, ts.URL+"/vmalert/"+expAlert.APILink(), alert, 200)
if !reflect.DeepEqual(alert, expAlert) { if !reflect.DeepEqual(alert, expAlert) {
t.Fatalf("expected %v is equal to %v", alert, expAlert) t.Fatalf("expected %v is equal to %v", alert, expAlert)
@@ -145,16 +139,16 @@ func TestHandler(t *testing.T) {
}) })
t.Run("/api/v1/alert?badParams", func(t *testing.T) { t.Run("/api/v1/alert?badParams", func(t *testing.T) {
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamAlertID) params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramAlertID)
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404) getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404) getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamAlertID) params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramAlertID)
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404) getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404) getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
// bad request, alertID is missing // bad request, alertID is missing
params = fmt.Sprintf("?%s=1", rule.ParamGroupID) params = fmt.Sprintf("?%s=1", paramGroupID)
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 400) getResp(t, ts.URL+"/api/v1/alert"+params, nil, 400)
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 400) getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 400)
}) })
@@ -173,42 +167,27 @@ func TestHandler(t *testing.T) {
} }
}) })
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) { t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
expRule := ar.ToAPI() expRule := ruleToAPI(ar)
gotRule := rule.ApiRule{} gotRule := apiRule{}
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRule, 200) getResp(t, ts.URL+"/"+expRule.APILink(), &gotRule, 200)
if expRule.ID != gotRule.ID { if expRule.ID != gotRule.ID {
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID) t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
} }
gotRule = rule.ApiRule{} gotRule = apiRule{}
getResp(t, ts.URL+"/vmalert/"+expRule.APILink(), &gotRule, 200) getResp(t, ts.URL+"/vmalert/"+expRule.APILink(), &gotRule, 200)
if expRule.ID != gotRule.ID { if expRule.ID != gotRule.ID {
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID) t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
} }
gotRuleWithUpdates := rule.ApiRuleWithUpdates{} gotRuleWithUpdates := apiRuleWithUpdates{}
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRuleWithUpdates, 200) getResp(t, ts.URL+"/"+expRule.APILink(), &gotRuleWithUpdates, 200)
if len(gotRuleWithUpdates.StateUpdates) < 1 { if len(gotRuleWithUpdates.StateUpdates) < 1 {
t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates) t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates)
} }
}) })
t.Run("/api/v1/group?groupID", func(t *testing.T) {
id := groupIDs[0]
g := m.groups[id]
expGroup := g.ToAPI()
gotGroup := rule.ApiGroup{}
getResp(t, ts.URL+"/"+expGroup.APILink(), &gotGroup, 200)
if expGroup.ID != gotGroup.ID {
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
}
gotGroup = rule.ApiGroup{}
getResp(t, ts.URL+"/vmalert/"+expGroup.APILink(), &gotGroup, 200)
if expGroup.ID != gotGroup.ID {
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
}
})
t.Run("/api/v1/rules&filters", func(t *testing.T) { t.Run("/api/v1/rules&filters", func(t *testing.T) {
check := func(url string, statusCode, expGroups, expRules int) { check := func(url string, statusCode, expGroups, expRules int) {

View File

@@ -1,4 +1,4 @@
package rule package main
import ( import (
"fmt" "fmt"
@@ -8,28 +8,79 @@ import (
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
) )
const ( const (
// ParamGroupID is group id key in url parameter // ParamGroupID is group id key in url parameter
ParamGroupID = "group_id" paramGroupID = "group_id"
// ParamAlertID is alert id key in url parameter // ParamAlertID is alert id key in url parameter
ParamAlertID = "alert_id" paramAlertID = "alert_id"
// ParamRuleID is rule id key in url parameter // ParamRuleID is rule id key in url parameter
ParamRuleID = "rule_id" paramRuleID = "rule_id"
// TypeRecording is a RecordingRule type
TypeRecording = "recording"
// TypeAlerting is an AlertingRule type
TypeAlerting = "alerting"
) )
// ApiGroup represents a Group for web view type apiNotifier struct {
type ApiGroup struct { Kind string `json:"kind"`
Targets []*apiTarget `json:"targets"`
}
type apiTarget struct {
Address string `json:"address"`
Labels map[string]string `json:"labels"`
}
// apiAlert represents a notifier.AlertingRule state
// for WEB view
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type apiAlert struct {
State string `json:"state"`
Name string `json:"name"`
Value string `json:"value"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations"`
ActiveAt time.Time `json:"activeAt"`
// Additional fields
// ID is an unique Alert's ID within a group
ID string `json:"id"`
// RuleID is an unique Rule's ID within a group
RuleID string `json:"rule_id"`
// GroupID is an unique Group's ID
GroupID string `json:"group_id"`
// Expression contains the PromQL/MetricsQL expression
// for Rule's evaluation
Expression string `json:"expression"`
// SourceLink contains a link to a system which should show
// why Alert was generated
SourceLink string `json:"source"`
// Restored shows whether Alert's state was restored on restart
Restored bool `json:"restored"`
// Stabilizing shows when firing state is kept because of
// `keep_firing_for` instead of real alert
Stabilizing bool `json:"stabilizing"`
}
// WebLink returns a link to the alert which can be used in UI.
func (aa *apiAlert) WebLink() string {
return fmt.Sprintf("alert?%s=%s&%s=%s",
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
}
// APILink returns a link to the alert's JSON representation.
func (aa *apiAlert) APILink() string {
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
}
// apiGroup represents Group for web view
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type apiGroup struct {
// Name is the group name as present in the config // Name is the group name as present in the config
Name string `json:"name"` Name string `json:"name"`
// Rules contains both recording and alerting rules // Rules contains both recording and alerting rules
Rules []ApiRule `json:"rules"` Rules []apiRule `json:"rules"`
// Interval is the Group's evaluation interval in float seconds as present in the file. // Interval is the Group's evaluation interval in float seconds as present in the file.
Interval float64 `json:"interval"` Interval float64 `json:"interval"`
// LastEvaluation is the timestamp of the last time the Group was executed // LastEvaluation is the timestamp of the last time the Group was executed
@@ -65,20 +116,15 @@ type ApiGroup struct {
NoMatch int NoMatch int
} }
// APILink returns a link to the group's JSON representation. // groupAlerts represents a group of alerts for WEB view
func (ag *ApiGroup) APILink() string { type groupAlerts struct {
return fmt.Sprintf("api/v1/group?%s=%s", ParamGroupID, ag.ID) Group *apiGroup
Alerts []*apiAlert
} }
// GroupAlerts represents a Group with its Alerts for web view // apiRule represents a Rule for web view
type GroupAlerts struct {
Group *ApiGroup
Alerts []*ApiAlert
}
// ApiRule represents a Rule for web view
// see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules // see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type ApiRule struct { type apiRule struct {
// State must be one of these under following scenarios // State must be one of these under following scenarios
// "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState. // "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState.
// "firing": at least 1 alert in the rule in firing state. // "firing": at least 1 alert in the rule in firing state.
@@ -100,7 +146,7 @@ type ApiRule struct {
// LastEvaluation is the timestamp of the last time the rule was executed // LastEvaluation is the timestamp of the last time the rule was executed
LastEvaluation time.Time `json:"lastEvaluation"` LastEvaluation time.Time `json:"lastEvaluation"`
// Alerts is the list of all the alerts in this rule that are currently pending or firing // Alerts is the list of all the alerts in this rule that are currently pending or firing
Alerts []*ApiAlert `json:"alerts,omitempty"` Alerts []*apiAlert `json:"alerts,omitempty"`
// Health is the health of rule evaluation. // Health is the health of rule evaluation.
// It MUST be one of "ok", "err", "unknown" // It MUST be one of "ok", "err", "unknown"
Health string `json:"health"` Health string `json:"health"`
@@ -131,87 +177,143 @@ type ApiRule struct {
// MaxUpdates is the max number of recorded ruleStateEntry objects // MaxUpdates is the max number of recorded ruleStateEntry objects
MaxUpdates int `json:"max_updates_entries"` MaxUpdates int `json:"max_updates_entries"`
// Updates contains the ordered list of recorded ruleStateEntry objects // Updates contains the ordered list of recorded ruleStateEntry objects
Updates []StateEntry `json:"-"` Updates []rule.StateEntry `json:"-"`
} }
// ApiAlert represents a notifier.AlertingRule state // apiRuleWithUpdates represents apiRule but with extra fields for marshalling
// for WEB view type apiRuleWithUpdates struct {
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules apiRule
type ApiAlert struct {
State string `json:"state"`
Name string `json:"name"`
Value string `json:"value"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations"`
ActiveAt time.Time `json:"activeAt"`
// Additional fields
// ID is an unique Alert's ID within a group
ID string `json:"id"`
// RuleID is an unique Rule's ID within a group
RuleID string `json:"rule_id"`
// GroupID is an unique Group's ID
GroupID string `json:"group_id"`
// Expression contains the PromQL/MetricsQL expression
// for Rule's evaluation
Expression string `json:"expression"`
// SourceLink contains a link to a system which should show
// why Alert was generated
SourceLink string `json:"source"`
// Restored shows whether Alert's state was restored on restart
Restored bool `json:"restored"`
// Stabilizing shows when firing state is kept because of
// `keep_firing_for` instead of real alert
Stabilizing bool `json:"stabilizing"`
}
// WebLink returns a link to the alert which can be used in UI.
func (aa *ApiAlert) WebLink() string {
return fmt.Sprintf("alert?%s=%s&%s=%s",
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
}
// APILink returns a link to the alert's JSON representation.
func (aa *ApiAlert) APILink() string {
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
}
// ApiRuleWithUpdates represents ApiRule but with extra fields for marshalling
type ApiRuleWithUpdates struct {
ApiRule
// Updates contains the ordered list of recorded ruleStateEntry objects // Updates contains the ordered list of recorded ruleStateEntry objects
StateUpdates []StateEntry `json:"updates,omitempty"` StateUpdates []rule.StateEntry `json:"updates,omitempty"`
} }
// APILink returns a link to the rule's JSON representation. // APILink returns a link to the rule's JSON representation.
func (ar ApiRule) APILink() string { func (ar apiRule) APILink() string {
return fmt.Sprintf("api/v1/rule?%s=%s&%s=%s", return fmt.Sprintf("api/v1/rule?%s=%s&%s=%s",
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID) paramGroupID, ar.GroupID, paramRuleID, ar.ID)
} }
// WebLink returns a link to the alert which can be used in UI. // WebLink returns a link to the alert which can be used in UI.
func (ar ApiRule) WebLink() string { func (ar apiRule) WebLink() string {
return fmt.Sprintf("rule?%s=%s&%s=%s", return fmt.Sprintf("rule?%s=%s&%s=%s",
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID) paramGroupID, ar.GroupID, paramRuleID, ar.ID)
} }
// AlertsToAPI returns list of ApiAlert objects from existing alerts func ruleToAPI(r any) apiRule {
func (ar *AlertingRule) AlertsToAPI() []*ApiAlert { if ar, ok := r.(*rule.AlertingRule); ok {
var alerts []*ApiAlert return alertingToAPI(ar)
}
if rr, ok := r.(*rule.RecordingRule); ok {
return recordingToAPI(rr)
}
return apiRule{}
}
const (
ruleTypeRecording = "recording"
ruleTypeAlerting = "alerting"
)
func recordingToAPI(rr *rule.RecordingRule) apiRule {
lastState := rule.GetLastEntry(rr)
r := apiRule{
Type: ruleTypeRecording,
DatasourceType: rr.Type.String(),
Name: rr.Name,
Query: rr.Expr,
Labels: rr.Labels,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: rule.GetRuleStateSize(rr),
Updates: rule.GetAllRuleState(rr),
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", rr.ID()),
GroupID: fmt.Sprintf("%d", rr.GroupID),
GroupName: rr.GroupName,
File: rr.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
return r
}
// alertingToAPI returns Rule representation in form of apiRule
func alertingToAPI(ar *rule.AlertingRule) apiRule {
lastState := rule.GetLastEntry(ar)
r := apiRule{
Type: ruleTypeAlerting,
DatasourceType: ar.Type.String(),
Name: ar.Name,
Query: ar.Expr,
Duration: ar.For.Seconds(),
KeepFiringFor: ar.KeepFiringFor.Seconds(),
Labels: ar.Labels,
Annotations: ar.Annotations,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
State: "inactive",
Alerts: ruleToAPIAlert(ar),
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: rule.GetRuleStateSize(ar),
Updates: rule.GetAllRuleState(ar),
Debug: ar.Debug,
// encode as strings to avoid rounding in JSON
ID: fmt.Sprintf("%d", ar.ID()),
GroupID: fmt.Sprintf("%d", ar.GroupID),
GroupName: ar.GroupName,
File: ar.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
// satisfy apiRule.State logic
if len(r.Alerts) > 0 {
r.State = notifier.StatePending.String()
stateFiring := notifier.StateFiring.String()
for _, a := range r.Alerts {
if a.State == stateFiring {
r.State = stateFiring
break
}
}
}
return r
}
// ruleToAPIAlert generates list of apiAlert objects from existing alerts
func ruleToAPIAlert(ar *rule.AlertingRule) []*apiAlert {
var alerts []*apiAlert
for _, a := range ar.GetAlerts() { for _, a := range ar.GetAlerts() {
if a.State == notifier.StateInactive { if a.State == notifier.StateInactive {
continue continue
} }
alerts = append(alerts, NewAlertAPI(ar, a)) alerts = append(alerts, newAlertAPI(ar, a))
} }
return alerts return alerts
} }
// alertToAPI generates apiAlert object from alert by its id(hash)
func alertToAPI(ar *rule.AlertingRule, id uint64) *apiAlert {
a := ar.GetAlert(id)
if a == nil {
return nil
}
return newAlertAPI(ar, a)
}
// NewAlertAPI creates apiAlert for notifier.Alert // NewAlertAPI creates apiAlert for notifier.Alert
func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert { func newAlertAPI(ar *rule.AlertingRule, a *notifier.Alert) *apiAlert {
aa := &ApiAlert{ aa := &apiAlert{
// encode as strings to avoid rounding // encode as strings to avoid rounding
ID: fmt.Sprintf("%d", a.ID), ID: fmt.Sprintf("%d", a.ID),
GroupID: fmt.Sprintf("%d", a.GroupID), GroupID: fmt.Sprintf("%d", a.GroupID),
@@ -226,8 +328,8 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
Restored: a.Restored, Restored: a.Restored,
Value: strconv.FormatFloat(a.Value, 'f', -1, 32), Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
} }
if notifier.AlertURLGeneratorFn != nil { if alertURLGeneratorFn != nil {
aa.SourceLink = notifier.AlertURLGeneratorFn(*a) aa.SourceLink = alertURLGeneratorFn(*a)
} }
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() { if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
aa.Stabilizing = true aa.Stabilizing = true
@@ -235,11 +337,9 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
return aa return aa
} }
// ToAPI returns ApiGroup representation of g func groupToAPI(g *rule.Group) *apiGroup {
func (g *Group) ToAPI() *ApiGroup { g = g.DeepCopy()
g.mu.RLock() ag := apiGroup{
defer g.mu.RUnlock()
ag := ApiGroup{
// encode as string to avoid rounding // encode as string to avoid rounding
ID: strconv.FormatUint(g.GetID(), 10), ID: strconv.FormatUint(g.GetID(), 10),
Name: g.Name, Name: g.Name,
@@ -259,9 +359,9 @@ func (g *Group) ToAPI() *ApiGroup {
if g.EvalDelay != nil { if g.EvalDelay != nil {
ag.EvalDelay = g.EvalDelay.Seconds() ag.EvalDelay = g.EvalDelay.Seconds()
} }
ag.Rules = make([]ApiRule, 0) ag.Rules = make([]apiRule, 0)
for _, r := range g.Rules { for _, r := range g.Rules {
ag.Rules = append(ag.Rules, r.ToAPI()) ag.Rules = append(ag.Rules, ruleToAPI(r))
} }
return &ag return &ag
} }

View File

@@ -1,4 +1,4 @@
package rule package main
import ( import (
"fmt" "fmt"
@@ -8,6 +8,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
) )
func TestRecordingToApi(t *testing.T) { func TestRecordingToApi(t *testing.T) {
@@ -16,7 +17,7 @@ func TestRecordingToApi(t *testing.T) {
Values: []float64{1}, Timestamps: []int64{0}, Values: []float64{1}, Timestamps: []int64{0},
}) })
entriesLimit := 44 entriesLimit := 44
g := NewGroup(config.Group{ g := rule.NewGroup(config.Group{
Name: "group", Name: "group",
File: "rules.yaml", File: "rules.yaml",
Concurrency: 1, Concurrency: 1,
@@ -30,24 +31,24 @@ func TestRecordingToApi(t *testing.T) {
}, },
}, },
}, fq, 1*time.Minute, nil) }, fq, 1*time.Minute, nil)
rr := g.Rules[0].(*RecordingRule) rr := g.Rules[0].(*rule.RecordingRule)
expectedRes := ApiRule{ expectedRes := apiRule{
Name: "record_name", Name: "record_name",
Query: "up", Query: "up",
Labels: map[string]string{"label": "value"}, Labels: map[string]string{"label": "value"},
Health: "ok", Health: "ok",
Type: TypeRecording, Type: ruleTypeRecording,
DatasourceType: "prometheus", DatasourceType: "prometheus",
ID: "1248", ID: "1248",
GroupID: fmt.Sprintf("%d", g.CreateID()), GroupID: fmt.Sprintf("%d", g.CreateID()),
GroupName: "group", GroupName: "group",
File: "rules.yaml", File: "rules.yaml",
MaxUpdates: 44, MaxUpdates: 44,
Updates: make([]StateEntry, 0), Updates: make([]rule.StateEntry, 0),
} }
res := rr.ToAPI() res := recordingToAPI(rr)
if !reflect.DeepEqual(res, expectedRes) { if !reflect.DeepEqual(res, expectedRes) {
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expectedRes, res) t.Fatalf("expected to have: \n%v;\ngot: \n%v", expectedRes, res)

View File

@@ -27,9 +27,6 @@ vmauth-linux-ppc64le-prod:
vmauth-linux-386-prod: vmauth-linux-386-prod:
APP_NAME=vmauth $(MAKE) app-via-docker-linux-386 APP_NAME=vmauth $(MAKE) app-via-docker-linux-386
vmauth-linux-s390x-prod:
APP_NAME=vmauth $(MAKE) app-via-docker-linux-s390x
vmauth-darwin-amd64-prod: vmauth-darwin-amd64-prod:
APP_NAME=vmauth $(MAKE) app-via-docker-darwin-amd64 APP_NAME=vmauth $(MAKE) app-via-docker-darwin-amd64

View File

@@ -4,7 +4,6 @@ import (
"bytes" "bytes"
"context" "context"
"encoding/base64" "encoding/base64"
"errors"
"flag" "flag"
"fmt" "fmt"
"math" "math"
@@ -42,9 +41,6 @@ var (
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing for details") "See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing for details")
defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+ defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+
"Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing") "Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing")
defaultMergeQueryArgs = flagutil.NewArrayString("mergeQueryArgs", "An optional list of client query arg names, which must be merged with args at backend urls. "+
"The rest of client query args are replaced by the corresponding query args from backend urls for security reasons; "+
"see https://docs.victoriametrics.com/victoriametrics/vmauth/#query-args-handling")
discoverBackendIPsGlobal = flag.Bool("discoverBackendIPs", false, "Whether to discover backend IPs via periodic DNS queries to hostnames specified in url_prefix. "+ discoverBackendIPsGlobal = flag.Bool("discoverBackendIPs", false, "Whether to discover backend IPs via periodic DNS queries to hostnames specified in url_prefix. "+
"This may be useful when url_prefix points to a hostname with dynamically scaled instances behind it. See https://docs.victoriametrics.com/victoriametrics/vmauth/#discovering-backend-ips") "This may be useful when url_prefix points to a hostname with dynamically scaled instances behind it. See https://docs.victoriametrics.com/victoriametrics/vmauth/#discovering-backend-ips")
discoverBackendIPsInterval = flag.Duration("discoverBackendIPsInterval", 10*time.Second, "The interval for re-discovering backend IPs if -discoverBackendIPs command-line flag is set. "+ discoverBackendIPsInterval = flag.Duration("discoverBackendIPsInterval", 10*time.Second, "The interval for re-discovering backend IPs if -discoverBackendIPs command-line flag is set. "+
@@ -65,11 +61,10 @@ type AuthConfig struct {
type UserInfo struct { type UserInfo struct {
Name string `yaml:"name,omitempty"` Name string `yaml:"name,omitempty"`
BearerToken string `yaml:"bearer_token,omitempty"` BearerToken string `yaml:"bearer_token,omitempty"`
JWT *JWTConfig `yaml:"jwt,omitempty"` AuthToken string `yaml:"auth_token,omitempty"`
AuthToken string `yaml:"auth_token,omitempty"` Username string `yaml:"username,omitempty"`
Username string `yaml:"username,omitempty"` Password string `yaml:"password,omitempty"`
Password string `yaml:"password,omitempty"`
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"` URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
DiscoverBackendIPs *bool `yaml:"discover_backend_ips,omitempty"` DiscoverBackendIPs *bool `yaml:"discover_backend_ips,omitempty"`
@@ -80,7 +75,6 @@ type UserInfo struct {
DefaultURL *URLPrefix `yaml:"default_url,omitempty"` DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"` RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"` LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"` DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
TLSCAFile string `yaml:"tls_ca_file,omitempty"` TLSCAFile string `yaml:"tls_ca_file,omitempty"`
TLSCertFile string `yaml:"tls_cert_file,omitempty"` TLSCertFile string `yaml:"tls_cert_file,omitempty"`
@@ -96,8 +90,6 @@ type UserInfo struct {
rt http.RoundTripper rt http.RoundTripper
requests *metrics.Counter requests *metrics.Counter
requestErrors *metrics.Counter
backendRequests *metrics.Counter
backendErrors *metrics.Counter backendErrors *metrics.Counter
requestsDuration *metrics.Summary requestsDuration *metrics.Summary
} }
@@ -109,29 +101,13 @@ type HeadersConf struct {
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"` KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
} }
func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error { func (ui *UserInfo) beginConcurrencyLimit() error {
select { select {
case ui.concurrencyLimitCh <- struct{}{}: case ui.concurrencyLimitCh <- struct{}{}:
return nil return nil
default: default:
// The number of concurrently executed requests for the given user equals the limt. ui.concurrencyLimitReached.Inc()
// Wait until some of the currently executed requests are finished, so the current request could be executed. return fmt.Errorf("cannot handle more than %d concurrent requests from user %s", ui.getMaxConcurrentRequests(), ui.name())
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
select {
case ui.concurrencyLimitCh <- struct{}{}:
return nil
case <-ctx.Done():
err := ctx.Err()
if errors.Is(err, context.DeadlineExceeded) {
// The current request couldn't be executed until the request timeout.
ui.concurrencyLimitReached.Inc()
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because %d concurrent requests from the user %s are executed",
*maxQueueDuration, ui.getMaxConcurrentRequests(), ui.name())
}
return fmt.Errorf("cannot start executing the request because %d concurrent requests from the user %s are executed: %w",
ui.getMaxConcurrentRequests(), ui.name(), err)
}
} }
} }
@@ -147,28 +123,6 @@ func (ui *UserInfo) getMaxConcurrentRequests() int {
return mcr return mcr
} }
func (ui *UserInfo) stopHealthChecks() {
if ui == nil {
return
}
if ui.URLPrefix != nil {
bus := ui.URLPrefix.bus.Load()
bus.stopHealthChecks()
}
if ui.DefaultURL != nil {
bus := ui.DefaultURL.bus.Load()
bus.stopHealthChecks()
}
for i := range ui.URLMaps {
um := &ui.URLMaps[i]
if um.URLPrefix != nil {
bus := um.URLPrefix.bus.Load()
bus.stopHealthChecks()
}
}
}
// Header is `Name: Value` http header, which must be added to the proxied request. // Header is `Name: Value` http header, which must be added to the proxied request.
type Header struct { type Header struct {
Name string Name string
@@ -228,11 +182,6 @@ type URLMap struct {
// LoadBalancingPolicy is load balancing policy among UrlPrefix backends. // LoadBalancingPolicy is load balancing policy among UrlPrefix backends.
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"` LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
// MergeQueryArgs is a list of client query args, which must be merged with the existing backend query args.
//
// The rest of client query args are replaced with the corresponding backend query args for security reasons.
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
// DropSrcPathPrefixParts is the number of `/`-delimited request path prefix parts to drop before proxying the request to backend. // DropSrcPathPrefixParts is the number of `/`-delimited request path prefix parts to drop before proxying the request to backend.
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"` DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
} }
@@ -279,7 +228,7 @@ func (qa *QueryArg) MarshalYAML() (any, error) {
return qa.sOriginal, nil return qa.sOriginal, nil
} }
// URLPrefix represents the `url_prefix` from auth config. // URLPrefix represents passed `url_prefix`
type URLPrefix struct { type URLPrefix struct {
// requests are re-tried on other backend urls for these http response status codes // requests are re-tried on other backend urls for these http response status codes
retryStatusCodes []int retryStatusCodes []int
@@ -287,11 +236,6 @@ type URLPrefix struct {
// load balancing policy used // load balancing policy used
loadBalancingPolicy string loadBalancingPolicy string
// the list of client query args, which must be merged with backend query args.
//
// By default backend query args replace all the client query args for security reasons.
mergeQueryArgs []string
// how many request path prefix parts to drop before routing the request to backendURL // how many request path prefix parts to drop before routing the request to backendURL
dropSrcPathPrefixParts int dropSrcPathPrefixParts int
@@ -304,7 +248,7 @@ type URLPrefix struct {
// the list of backend urls // the list of backend urls
// //
// the list can be dynamically updated if `discover_backend_ips` option is set. // the list can be dynamically updated if `discover_backend_ips` option is set.
bus atomic.Pointer[backendURLs] bus atomic.Pointer[[]*backendURL]
// if this option is set, then backend ips for busOriginal are periodically re-discovered and put to bus. // if this option is set, then backend ips for busOriginal are periodically re-discovered and put to bus.
discoverBackendIPs bool discoverBackendIPs bool
@@ -328,91 +272,21 @@ func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
} }
} }
type backendURLs struct {
healthChecksContext context.Context
healthChecksCancel func()
healthChecksWG sync.WaitGroup
bus []*backendURL
}
func newBackendURLs() *backendURLs {
ctx, cancel := context.WithCancel(context.Background())
return &backendURLs{
healthChecksContext: ctx,
healthChecksCancel: cancel,
}
}
func (bus *backendURLs) add(u *url.URL) {
bus.bus = append(bus.bus, &backendURL{
url: u,
healthCheckContext: bus.healthChecksContext,
healthCheckWG: &bus.healthChecksWG,
})
}
func (bus *backendURLs) stopHealthChecks() {
bus.healthChecksCancel()
bus.healthChecksWG.Wait()
}
type backendURL struct { type backendURL struct {
broken atomic.Bool brokenDeadline atomic.Uint64
healthCheckContext context.Context
healthCheckWG *sync.WaitGroup
concurrentRequests atomic.Int32 concurrentRequests atomic.Int32
url *url.URL url *url.URL
} }
func (bu *backendURL) isBroken() bool { func (bu *backendURL) isBroken() bool {
return bu.broken.Load() ct := fasttime.UnixTimestamp()
return ct < bu.brokenDeadline.Load()
} }
func (bu *backendURL) setBroken() { func (bu *backendURL) setBroken() {
if bu.broken.CompareAndSwap(false, true) { deadline := fasttime.UnixTimestamp() + uint64((*failTimeout).Seconds())
bu.healthCheckWG.Go(func() { bu.brokenDeadline.Store(deadline)
bu.runHealthCheck()
bu.broken.Store(false)
})
}
}
func (bu *backendURL) runHealthCheck() {
port := bu.url.Port()
if port == "" {
port = "80"
}
addr := net.JoinHostPort(bu.url.Hostname(), port)
t := time.NewTicker(*failTimeout)
defer t.Stop()
for {
select {
case <-t.C:
// Verify network connectivity via TCP dial before marking backend healthy.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997
ctx, cancel := context.WithTimeout(bu.healthCheckContext, time.Second)
c, err := netutil.Dialer.DialContext(ctx, "tcp", addr)
cancel()
if err != nil {
if errors.Is(bu.healthCheckContext.Err(), context.Canceled) {
return
}
logger.Warnf("ignoring the backend at %s for %s because of dial error: %s", addr, *failTimeout, err)
continue
}
_ = c.Close()
return
case <-bu.healthCheckContext.Done():
return
}
}
} }
func (bu *backendURL) get() { func (bu *backendURL) get() {
@@ -424,8 +298,8 @@ func (bu *backendURL) put() {
} }
func (up *URLPrefix) getBackendsCount() int { func (up *URLPrefix) getBackendsCount() int {
bus := up.bus.Load() pbus := up.bus.Load()
return len(bus.bus) return len(*pbus)
} }
// getBackendURL returns the backendURL depending on the load balance policy. // getBackendURL returns the backendURL depending on the load balance policy.
@@ -436,15 +310,16 @@ func (up *URLPrefix) getBackendsCount() int {
func (up *URLPrefix) getBackendURL() *backendURL { func (up *URLPrefix) getBackendURL() *backendURL {
up.discoverBackendAddrsIfNeeded() up.discoverBackendAddrsIfNeeded()
bus := up.bus.Load() pbus := up.bus.Load()
if len(bus.bus) == 0 { bus := *pbus
if len(bus) == 0 {
return nil return nil
} }
if up.loadBalancingPolicy == "first_available" { if up.loadBalancingPolicy == "first_available" {
return getFirstAvailableBackendURL(bus.bus) return getFirstAvailableBackendURL(bus)
} }
return getLeastLoadedBackendURL(bus.bus, &up.n) return getLeastLoadedBackendURL(bus, &up.n)
} }
func (up *URLPrefix) discoverBackendAddrsIfNeeded() { func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
@@ -518,24 +393,25 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
cancel() cancel()
// generate new backendURLs for the resolved IPs // generate new backendURLs for the resolved IPs
busNew := newBackendURLs() var busNew []*backendURL
for _, bu := range up.busOriginal { for _, bu := range up.busOriginal {
host := bu.Hostname() host := bu.Hostname()
for _, addr := range hostToAddrs[host] { for _, addr := range hostToAddrs[host] {
buCopy := *bu buCopy := *bu
buCopy.Host = addr buCopy.Host = addr
busNew.add(&buCopy) busNew = append(busNew, &backendURL{
url: &buCopy,
})
} }
} }
bus := up.bus.Load() pbus := up.bus.Load()
if areEqualBackendURLs(bus.bus, busNew.bus) { if areEqualBackendURLs(*pbus, busNew) {
return return
} }
// Store new backend urls // Store new backend urls
up.bus.Store(busNew) up.bus.Store(&busNew)
bus.stopHealthChecks()
} }
func areEqualBackendURLs(a, b []*backendURL) bool { func areEqualBackendURLs(a, b []*backendURL) bool {
@@ -566,66 +442,53 @@ func getFirstAvailableBackendURL(bus []*backendURL) *backendURL {
for i := 1; i < len(bus); i++ { for i := 1; i < len(bus); i++ {
if !bus[i].isBroken() { if !bus[i].isBroken() {
bu = bus[i] bu = bus[i]
bu.get() break
return bu
} }
} }
return nil bu.get()
return bu
} }
// getLeastLoadedBackendURL returns a non-broken backendURL with the lowest number of concurrent requests. // getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
// //
// backendURL.put() must be called on the returned backendURL after the request is complete. // backendURL.put() must be called on the returned backendURL after the request is complete.
func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL { func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL {
if len(bus) == 1 { if len(bus) == 1 {
// Fast path - return the only backend url. // Fast path - return the only backend url.
bu := bus[0] bu := bus[0]
if bu.isBroken() {
return nil
}
bu.get() bu.get()
return bu return bu
} }
// Slow path - select other backend urls. // Slow path - select other backend urls.
n := atomicCounter.Add(1) - 1 n := atomicCounter.Add(1) - 1
for i := range uint32(len(bus)) { for i := uint32(0); i < uint32(len(bus)); i++ {
idx := (n + i) % uint32(len(bus)) idx := (n + i) % uint32(len(bus))
bu := bus[idx] bu := bus[idx]
if bu.isBroken() { if bu.isBroken() {
continue continue
} }
if bu.concurrentRequests.Load() == 0 {
// The Load() in front of CompareAndSwap() avoids CAS overhead for items with values bigger than 0. // Fast path - return the backend with zero concurrently executed requests.
if bu.concurrentRequests.Load() == 0 && bu.concurrentRequests.CompareAndSwap(0, 1) { // Do not use CompareAndSwap() instead of Load(), since it is much slower on systems with many CPU cores.
atomicCounter.CompareAndSwap(n+1, idx+1) bu.concurrentRequests.Add(1)
// There is no need in the call bu.get(), because we already incremented bu.concrrentRequests above.
return bu return bu
} }
} }
// Slow path - return the backend with the minimum number of concurrently executed requests. // Slow path - return the backend with the minimum number of concurrently executed requests.
buMinIdx := n % uint32(len(bus)) buMin := bus[n%uint32(len(bus))]
minRequests := bus[buMinIdx].concurrentRequests.Load() minRequests := buMin.concurrentRequests.Load()
for i := uint32(1); i < uint32(len(bus)); i++ { for _, bu := range bus {
idx := (n + i) % uint32(len(bus))
bu := bus[idx]
if bu.isBroken() { if bu.isBroken() {
continue continue
} }
if n := bu.concurrentRequests.Load(); n < minRequests || buMin.isBroken() {
reqs := bu.concurrentRequests.Load() buMin = bu
if reqs < minRequests || bus[buMinIdx].isBroken() { minRequests = n
buMinIdx = idx
minRequests = reqs
} }
} }
buMin := bus[buMinIdx]
if buMin.isBroken() {
return nil
}
buMin.get() buMin.get()
atomicCounter.CompareAndSwap(n+1, buMinIdx+1)
return buMin return buMin
} }
@@ -742,9 +605,11 @@ func initAuthConfig() {
configTimestamp.Set(fasttime.UnixTimestamp()) configTimestamp.Set(fasttime.UnixTimestamp())
stopCh = make(chan struct{}) stopCh = make(chan struct{})
authConfigWG.Go(func() { authConfigWG.Add(1)
go func() {
defer authConfigWG.Done()
authConfigReloader(sighupCh) authConfigReloader(sighupCh)
}) }()
} }
func stopAuthConfig() { func stopAuthConfig() {
@@ -800,9 +665,6 @@ var (
// authUsers contains the currently loaded auth users // authUsers contains the currently loaded auth users
authUsers atomic.Pointer[map[string]*UserInfo] authUsers atomic.Pointer[map[string]*UserInfo]
// jwt authentication cache
jwtAuthCache atomic.Pointer[jwtCache]
authConfigWG sync.WaitGroup authConfigWG sync.WaitGroup
stopCh chan struct{} stopCh chan struct{}
) )
@@ -819,7 +681,7 @@ func reloadAuthConfig() (bool, error) {
ok, err := reloadAuthConfigData(data) ok, err := reloadAuthConfigData(data)
if err != nil { if err != nil {
return false, fmt.Errorf("failed to parse -auth.config=%q: %w", *authConfigPath, err) return false, fmt.Errorf("failed to pars -auth.config=%q: %w", *authConfigPath, err)
} }
if !ok { if !ok {
return false, nil return false, nil
@@ -842,14 +704,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
return false, fmt.Errorf("failed to parse auth config: %w", err) return false, fmt.Errorf("failed to parse auth config: %w", err)
} }
jui, err := parseJWTUsers(ac)
if err != nil {
return false, fmt.Errorf("failed to parse JWT users from auth config: %w", err)
}
jwtc := &jwtCache{
users: jui,
}
m, err := parseAuthConfigUsers(ac) m, err := parseAuthConfigUsers(ac)
if err != nil { if err != nil {
return false, fmt.Errorf("failed to parse users from auth config: %w", err) return false, fmt.Errorf("failed to parse users from auth config: %w", err)
@@ -857,11 +711,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
acPrev := authConfig.Load() acPrev := authConfig.Load()
if acPrev != nil { if acPrev != nil {
acPrev.UnauthorizedUser.stopHealthChecks()
for i := range acPrev.Users {
acPrev.Users[i].stopHealthChecks()
}
metrics.UnregisterSet(acPrev.ms, true) metrics.UnregisterSet(acPrev.ms, true)
} }
metrics.RegisterSet(ac.ms) metrics.RegisterSet(ac.ms)
@@ -869,7 +718,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
authConfig.Store(ac) authConfig.Store(ac)
authConfigData.Store(&data) authConfigData.Store(&data)
authUsers.Store(&m) authUsers.Store(&m)
jwtAuthCache.Store(jwtc)
return true, nil return true, nil
} }
@@ -894,9 +742,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
if ui.BearerToken != "" { if ui.BearerToken != "" {
return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section") return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section")
} }
if ui.JWT != nil {
return nil, fmt.Errorf("field jwt can't be specified for unauthorized_user section")
}
if ui.AuthToken != "" { if ui.AuthToken != "" {
return nil, fmt.Errorf("field auth_token can't be specified for unauthorized_user section") return nil, fmt.Errorf("field auth_token can't be specified for unauthorized_user section")
} }
@@ -912,8 +757,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err) return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
} }
ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels) ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
ui.requestErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_errors_total` + metricLabels)
ui.backendRequests = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_requests_total` + metricLabels)
ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels) ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels) ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests()) ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
@@ -943,17 +786,10 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
} }
for i := range uis { for i := range uis {
ui := &uis[i] ui := &uis[i]
// users with jwt tokens are parsed by parseJWTUsers function.
// the function also checks that users with jwt tokens do not have auth tokens, bearer tokens, usernames and passwords.
if ui.JWT != nil {
continue
}
ats, err := getAuthTokens(ui.AuthToken, ui.BearerToken, ui.Username, ui.Password) ats, err := getAuthTokens(ui.AuthToken, ui.BearerToken, ui.Username, ui.Password)
if err != nil { if err != nil {
return nil, err return nil, err
} }
for _, at := range ats { for _, at := range ats {
if uiOld := byAuthToken[at]; uiOld != nil { if uiOld := byAuthToken[at]; uiOld != nil {
return nil, fmt.Errorf("duplicate auth token=%q found for username=%q, name=%q; the previous one is set for username=%q, name=%q", return nil, fmt.Errorf("duplicate auth token=%q found for username=%q, name=%q; the previous one is set for username=%q, name=%q",
@@ -969,8 +805,6 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
return nil, fmt.Errorf("cannot parse metric_labels: %w", err) return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
} }
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels) ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels) ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels) ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
mcr := ui.getMaxConcurrentRequests() mcr := ui.getMaxConcurrentRequests()
@@ -1022,7 +856,6 @@ func (ui *UserInfo) getMetricLabels() (string, error) {
func (ui *UserInfo) initURLs() error { func (ui *UserInfo) initURLs() error {
retryStatusCodes := defaultRetryStatusCodes.Values() retryStatusCodes := defaultRetryStatusCodes.Values()
loadBalancingPolicy := *defaultLoadBalancingPolicy loadBalancingPolicy := *defaultLoadBalancingPolicy
mergeQueryArgs := *defaultMergeQueryArgs
dropSrcPathPrefixParts := 0 dropSrcPathPrefixParts := 0
discoverBackendIPs := *discoverBackendIPsGlobal discoverBackendIPs := *discoverBackendIPsGlobal
if ui.RetryStatusCodes != nil { if ui.RetryStatusCodes != nil {
@@ -1031,9 +864,6 @@ func (ui *UserInfo) initURLs() error {
if ui.LoadBalancingPolicy != "" { if ui.LoadBalancingPolicy != "" {
loadBalancingPolicy = ui.LoadBalancingPolicy loadBalancingPolicy = ui.LoadBalancingPolicy
} }
if len(ui.MergeQueryArgs) != 0 {
mergeQueryArgs = ui.MergeQueryArgs
}
if ui.DropSrcPathPrefixParts != nil { if ui.DropSrcPathPrefixParts != nil {
dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts
} }
@@ -1041,18 +871,16 @@ func (ui *UserInfo) initURLs() error {
discoverBackendIPs = *ui.DiscoverBackendIPs discoverBackendIPs = *ui.DiscoverBackendIPs
} }
up := ui.URLPrefix if ui.URLPrefix != nil {
if up != nil { if err := ui.URLPrefix.sanitizeAndInitialize(); err != nil {
if err := up.sanitizeAndInitialize(); err != nil {
return err return err
} }
up.retryStatusCodes = retryStatusCodes ui.URLPrefix.retryStatusCodes = retryStatusCodes
up.dropSrcPathPrefixParts = dropSrcPathPrefixParts ui.URLPrefix.dropSrcPathPrefixParts = dropSrcPathPrefixParts
up.discoverBackendIPs = discoverBackendIPs ui.URLPrefix.discoverBackendIPs = discoverBackendIPs
if err := up.setLoadBalancingPolicy(loadBalancingPolicy); err != nil { if err := ui.URLPrefix.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
return err return err
} }
up.mergeQueryArgs = mergeQueryArgs
} }
if ui.DefaultURL != nil { if ui.DefaultURL != nil {
if err := ui.DefaultURL.sanitizeAndInitialize(); err != nil { if err := ui.DefaultURL.sanitizeAndInitialize(); err != nil {
@@ -1071,7 +899,6 @@ func (ui *UserInfo) initURLs() error {
} }
rscs := retryStatusCodes rscs := retryStatusCodes
lbp := loadBalancingPolicy lbp := loadBalancingPolicy
mqa := mergeQueryArgs
dsp := dropSrcPathPrefixParts dsp := dropSrcPathPrefixParts
dbd := discoverBackendIPs dbd := discoverBackendIPs
if e.RetryStatusCodes != nil { if e.RetryStatusCodes != nil {
@@ -1080,9 +907,6 @@ func (ui *UserInfo) initURLs() error {
if e.LoadBalancingPolicy != "" { if e.LoadBalancingPolicy != "" {
lbp = e.LoadBalancingPolicy lbp = e.LoadBalancingPolicy
} }
if len(e.MergeQueryArgs) != 0 {
mqa = e.MergeQueryArgs
}
if e.DropSrcPathPrefixParts != nil { if e.DropSrcPathPrefixParts != nil {
dsp = *e.DropSrcPathPrefixParts dsp = *e.DropSrcPathPrefixParts
} }
@@ -1093,7 +917,6 @@ func (ui *UserInfo) initURLs() error {
if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil { if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil {
return err return err
} }
e.URLPrefix.mergeQueryArgs = mqa
e.URLPrefix.dropSrcPathPrefixParts = dsp e.URLPrefix.dropSrcPathPrefixParts = dsp
e.URLPrefix.discoverBackendIPs = dbd e.URLPrefix.discoverBackendIPs = dbd
} }
@@ -1205,11 +1028,13 @@ func (up *URLPrefix) sanitizeAndInitialize() error {
} }
// Initialize up.bus // Initialize up.bus
bus := newBackendURLs() bus := make([]*backendURL, len(up.busOriginal))
for _, bu := range up.busOriginal { for i, bu := range up.busOriginal {
bus.add(bu) bus[i] = &backendURL{
url: bu,
}
} }
up.bus.Store(bus) up.bus.Store(&bus)
return nil return nil
} }

View File

@@ -280,7 +280,7 @@ users:
} }
func TestParseAuthConfigSuccess(t *testing.T) { func TestParseAuthConfigSuccess(t *testing.T) {
f := func(s string, expectedAuthConfig map[string]*UserInfo, expectedUnauthorizedUserConfig *UserInfo) { f := func(s string, expectedAuthConfig map[string]*UserInfo) {
t.Helper() t.Helper()
ac, err := parseAuthConfig([]byte(s)) ac, err := parseAuthConfig([]byte(s))
if err != nil { if err != nil {
@@ -294,19 +294,15 @@ func TestParseAuthConfigSuccess(t *testing.T) {
if err := areEqualConfigs(m, expectedAuthConfig); err != nil { if err := areEqualConfigs(m, expectedAuthConfig); err != nil {
t.Fatal(err) t.Fatal(err)
} }
if err := areEqualConfigs(ac.UnauthorizedUser, expectedUnauthorizedUserConfig); err != nil {
t.Fatal(err)
}
} }
insecureSkipVerifyTrue := true insecureSkipVerifyTrue := true
// Empty config // Empty config
f(``, map[string]*UserInfo{}, nil) f(``, map[string]*UserInfo{})
// Empty users // Empty users
f(`users: []`, map[string]*UserInfo{}, nil) f(`users: []`, map[string]*UserInfo{})
// Single user // Single user
f(` f(`
@@ -324,7 +320,7 @@ users:
MaxConcurrentRequests: 5, MaxConcurrentRequests: 5,
TLSInsecureSkipVerify: &insecureSkipVerifyTrue, TLSInsecureSkipVerify: &insecureSkipVerifyTrue,
}, },
}, nil) })
// Single user with auth_token // Single user with auth_token
f(` f(`
@@ -348,7 +344,7 @@ users:
TLSCertFile: "foo/baz", TLSCertFile: "foo/baz",
TLSKeyFile: "foo/foo", TLSKeyFile: "foo/foo",
}, },
}, nil) })
// Multiple url_prefix entries // Multiple url_prefix entries
insecureSkipVerifyFalse := false insecureSkipVerifyFalse := false
@@ -363,7 +359,6 @@ users:
tls_insecure_skip_verify: false tls_insecure_skip_verify: false
retry_status_codes: [500, 501] retry_status_codes: [500, 501]
load_balancing_policy: first_available load_balancing_policy: first_available
merge_query_args: [foo, bar]
drop_src_path_prefix_parts: 1 drop_src_path_prefix_parts: 1
discover_backend_ips: true discover_backend_ips: true
`, map[string]*UserInfo{ `, map[string]*UserInfo{
@@ -377,11 +372,10 @@ users:
TLSInsecureSkipVerify: &insecureSkipVerifyFalse, TLSInsecureSkipVerify: &insecureSkipVerifyFalse,
RetryStatusCodes: []int{500, 501}, RetryStatusCodes: []int{500, 501},
LoadBalancingPolicy: "first_available", LoadBalancingPolicy: "first_available",
MergeQueryArgs: []string{"foo", "bar"}, DropSrcPathPrefixParts: intp(1),
DropSrcPathPrefixParts: new(1),
DiscoverBackendIPs: &discoverBackendIPsTrue, DiscoverBackendIPs: &discoverBackendIPsTrue,
}, },
}, nil) })
// Multiple users // Multiple users
f(` f(`
@@ -399,7 +393,7 @@ users:
Username: "bar", Username: "bar",
URLPrefix: mustParseURL("https://bar/x/"), URLPrefix: mustParseURL("https://bar/x/"),
}, },
}, nil) })
// non-empty URLMap // non-empty URLMap
sharedUserInfo := &UserInfo{ sharedUserInfo := &UserInfo{
@@ -449,7 +443,7 @@ users:
`, map[string]*UserInfo{ `, map[string]*UserInfo{
getHTTPAuthBearerToken("foo"): sharedUserInfo, getHTTPAuthBearerToken("foo"): sharedUserInfo,
getHTTPAuthBasicToken("foo", ""): sharedUserInfo, getHTTPAuthBasicToken("foo", ""): sharedUserInfo,
}, nil) })
// Multiple users with the same name - this should work, since these users have different passwords // Multiple users with the same name - this should work, since these users have different passwords
f(` f(`
@@ -471,7 +465,7 @@ users:
Password: "bar", Password: "bar",
URLPrefix: mustParseURL("https://bar/x"), URLPrefix: mustParseURL("https://bar/x"),
}, },
}, nil) })
// with default url // with default url
keepOriginalHost := true keepOriginalHost := true
@@ -487,8 +481,6 @@ users:
- "foo: bar" - "foo: bar"
- "xxx: y" - "xxx: y"
keep_original_host: true keep_original_host: true
load_balancing_policy: first_available
merge_query_args: [foo, bar]
default_url: default_url:
- http://default1/select/0/prometheus - http://default1/select/0/prometheus
- http://default2/select/0/prometheus - http://default2/select/0/prometheus
@@ -513,8 +505,6 @@ users:
}, },
KeepOriginalHost: &keepOriginalHost, KeepOriginalHost: &keepOriginalHost,
}, },
LoadBalancingPolicy: "first_available",
MergeQueryArgs: []string{"foo", "bar"},
}, },
}, },
DefaultURL: mustParseURLs([]string{ DefaultURL: mustParseURLs([]string{
@@ -542,8 +532,6 @@ users:
}, },
KeepOriginalHost: &keepOriginalHost, KeepOriginalHost: &keepOriginalHost,
}, },
LoadBalancingPolicy: "first_available",
MergeQueryArgs: []string{"foo", "bar"},
}, },
}, },
DefaultURL: mustParseURLs([]string{ DefaultURL: mustParseURLs([]string{
@@ -551,7 +539,7 @@ users:
"http://default2/select/0/prometheus", "http://default2/select/0/prometheus",
}), }),
}, },
}, nil) })
// With metric_labels // With metric_labels
f(` f(`
@@ -603,40 +591,7 @@ users:
}, },
}, },
}, },
}, nil)
// unauthorized_user
f(`
unauthorized_user:
merge_query_args: [extra_filters]
url_map:
- src_paths: ["/select/.+"]
url_prefix: 'http://victoria-logs:9428/?extra_filters={env="prod"}'
`, nil, &UserInfo{
MergeQueryArgs: []string{"extra_filters"},
URLMaps: []URLMap{
{
SrcPaths: getRegexs([]string{"/select/.+"}),
URLPrefix: mustParseURL(`http://victoria-logs:9428/?extra_filters={env="prod"}`),
},
},
}) })
// skip user info with jwt, it is parsed by parseJWTUsers
f(`
users:
- username: foo
password: bar
url_prefix: http://aaa:343/bbb
- jwt: {skip_verify: true}
url_prefix: http://aaa:343/bbb
`, map[string]*UserInfo{
getHTTPAuthBasicToken("foo", "bar"): {
Username: "foo",
Password: "bar",
URLPrefix: mustParseURL("http://aaa:343/bbb"),
},
}, nil)
} }
func TestParseAuthConfigPassesTLSVerificationConfig(t *testing.T) { func TestParseAuthConfigPassesTLSVerificationConfig(t *testing.T) {
@@ -768,12 +723,10 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
}) })
up.loadBalancingPolicy = "least_loaded" up.loadBalancingPolicy = "least_loaded"
pbus := up.bus.Load()
bus := pbus.bus
fn := func(ns ...int) { fn := func(ns ...int) {
t.Helper() t.Helper()
pbus := up.bus.Load()
bus := *pbus
for i, b := range bus { for i, b := range bus {
got := int(b.concurrentRequests.Load()) got := int(b.concurrentRequests.Load())
exp := ns[i] exp := ns[i]
@@ -785,52 +738,45 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
up.getBackendURL() up.getBackendURL()
fn(1, 0, 0) fn(1, 0, 0)
up.getBackendURL() up.getBackendURL()
fn(1, 1, 0) fn(1, 1, 0)
up.getBackendURL() up.getBackendURL()
fn(1, 1, 1) fn(1, 1, 1)
bus[1].put() up.getBackendURL()
bus[2].put() up.getBackendURL()
fn(1, 0, 0) fn(2, 2, 1)
bus := up.bus.Load()
pbus := *bus
pbus[0].concurrentRequests.Add(2)
pbus[2].concurrentRequests.Add(5)
fn(4, 2, 6)
up.getBackendURL() up.getBackendURL()
fn(1, 1, 0) fn(4, 3, 6)
bus[1].put()
up.getBackendURL() up.getBackendURL()
fn(1, 0, 1) fn(4, 4, 6)
up.getBackendURL()
fn(4, 5, 6)
up.getBackendURL()
fn(5, 5, 6)
up.getBackendURL()
fn(6, 5, 6)
up.getBackendURL()
fn(6, 6, 6)
up.getBackendURL()
fn(6, 6, 7)
up.getBackendURL() up.getBackendURL()
up.getBackendURL() up.getBackendURL()
fn(1, 1, 2) fn(7, 7, 7)
bus[0].concurrentRequests.Add(2)
bus[2].concurrentRequests.Add(2)
fn(3, 1, 4)
up.getBackendURL()
fn(3, 2, 4)
up.getBackendURL()
fn(3, 3, 4)
up.getBackendURL()
fn(4, 3, 4)
up.getBackendURL()
fn(4, 4, 4)
bus[0].put()
bus[2].put()
up.getBackendURL()
fn(3, 4, 4)
up.getBackendURL()
fn(4, 4, 4)
} }
func TestBrokenBackend(t *testing.T) { func TestBrokenBackend(t *testing.T) {
@@ -841,13 +787,13 @@ func TestBrokenBackend(t *testing.T) {
}) })
up.loadBalancingPolicy = "least_loaded" up.loadBalancingPolicy = "least_loaded"
pbus := up.bus.Load() pbus := up.bus.Load()
bus := pbus.bus bus := *pbus
// explicitly mark one of the backends as broken // explicitly mark one of the backends as broken
bus[1].setBroken() bus[1].setBroken()
// broken backend should never return while there are healthy backends // broken backend should never return while there are healthy backends
for range int(1e3) { for i := 0; i < 1e3; i++ {
b := up.getBackendURL() b := up.getBackendURL()
if b.isBroken() { if b.isBroken() {
t.Fatalf("unexpected broken backend %q", b.url) t.Fatalf("unexpected broken backend %q", b.url)
@@ -864,7 +810,7 @@ func TestDiscoverBackendIPsWithIPV6(t *testing.T) {
up.discoverBackendAddrsIfNeeded() up.discoverBackendAddrsIfNeeded()
pbus := up.bus.Load() pbus := up.bus.Load()
bus := pbus.bus bus := *pbus
if len(bus) != 1 { if len(bus) != 1 {
t.Fatalf("expected url list to be of size 1; got %d instead", len(bus)) t.Fatalf("expected url list to be of size 1; got %d instead", len(bus))
@@ -938,7 +884,7 @@ func removeMetrics(m map[string]*UserInfo) {
} }
} }
func areEqualConfigs(a, b any) error { func areEqualConfigs(a, b map[string]*UserInfo) error {
aData, err := yaml.Marshal(a) aData, err := yaml.Marshal(a)
if err != nil { if err != nil {
return fmt.Errorf("cannot marshal a: %w", err) return fmt.Errorf("cannot marshal a: %w", err)
@@ -958,14 +904,16 @@ func mustParseURL(u string) *URLPrefix {
} }
func mustParseURLs(us []string) *URLPrefix { func mustParseURLs(us []string) *URLPrefix {
bus := newBackendURLs() bus := make([]*backendURL, len(us))
urls := make([]*url.URL, len(us)) urls := make([]*url.URL, len(us))
for i, u := range us { for i, u := range us {
pu, err := url.Parse(u) pu, err := url.Parse(u)
if err != nil { if err != nil {
panic(fmt.Errorf("BUG: cannot parse %q: %w", u, err)) panic(fmt.Errorf("BUG: cannot parse %q: %w", u, err))
} }
bus.add(pu) bus[i] = &backendURL{
url: pu,
}
urls[i] = pu urls[i] = pu
} }
up := &URLPrefix{} up := &URLPrefix{}
@@ -974,11 +922,15 @@ func mustParseURLs(us []string) *URLPrefix {
} else { } else {
up.vOriginal = us up.vOriginal = us
} }
up.bus.Store(bus) up.bus.Store(&bus)
up.busOriginal = urls up.busOriginal = urls
return up return up
} }
func intp(n int) *int {
return &n
}
func mustNewRegex(s string) *Regex { func mustNewRegex(s string) *Regex {
var re Regex var re Regex
if err := yaml.Unmarshal([]byte(s), &re); err != nil { if err := yaml.Unmarshal([]byte(s), &re); err != nil {

View File

@@ -1,156 +0,0 @@
package main
import (
"fmt"
"os"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
type jwtCache struct {
// users contain UserInfo`s from AuthConfig with JWTConfig set
users []*UserInfo
}
type JWTConfig struct {
PublicKeys []string `yaml:"public_keys,omitempty"`
PublicKeyFiles []string `yaml:"public_key_files,omitempty"`
SkipVerify bool `yaml:"skip_verify,omitempty"`
verifierPool *jwt.VerifierPool
}
func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, error) {
jui := make([]*UserInfo, 0, len(ac.Users))
for _, ui := range ac.Users {
jwtToken := ui.JWT
if jwtToken == nil {
continue
}
if ui.AuthToken != "" || ui.BearerToken != "" || ui.Username != "" || ui.Password != "" {
return nil, fmt.Errorf("auth_token, bearer_token, username and password cannot be specified if jwt is set")
}
if len(jwtToken.PublicKeys) == 0 && len(jwtToken.PublicKeyFiles) == 0 && !jwtToken.SkipVerify {
return nil, fmt.Errorf("jwt must contain at least a single public key, public_key_files or have skip_verify=true")
}
if len(jwtToken.PublicKeys) > 0 || len(jwtToken.PublicKeyFiles) > 0 {
keys := make([]any, 0, len(jwtToken.PublicKeys)+len(jwtToken.PublicKeyFiles))
for i := range jwtToken.PublicKeys {
k, err := jwt.ParseKey([]byte(jwtToken.PublicKeys[i]))
if err != nil {
return nil, err
}
keys = append(keys, k)
}
for _, filePath := range jwtToken.PublicKeyFiles {
keyData, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("cannot read public key from file %q: %w", filePath, err)
}
k, err := jwt.ParseKey(keyData)
if err != nil {
return nil, fmt.Errorf("cannot parse public key from file %q: %w", filePath, err)
}
keys = append(keys, k)
}
vp, err := jwt.NewVerifierPool(keys)
if err != nil {
return nil, err
}
jwtToken.verifierPool = vp
}
if err := ui.initURLs(); err != nil {
return nil, err
}
metricLabels, err := ui.getMetricLabels()
if err != nil {
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
}
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
mcr := ui.getMaxConcurrentRequests()
ui.concurrencyLimitCh = make(chan struct{}, mcr)
ui.concurrencyLimitReached = ac.ms.GetOrCreateCounter(`vmauth_user_concurrent_requests_limit_reached_total` + metricLabels)
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_capacity`+metricLabels, func() float64 {
return float64(cap(ui.concurrencyLimitCh))
})
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_current`+metricLabels, func() float64 {
return float64(len(ui.concurrencyLimitCh))
})
rt, err := newRoundTripper(ui.TLSCAFile, ui.TLSCertFile, ui.TLSKeyFile, ui.TLSServerName, ui.TLSInsecureSkipVerify)
if err != nil {
return nil, fmt.Errorf("cannot initialize HTTP RoundTripper: %w", err)
}
ui.rt = rt
jui = append(jui, &ui)
}
// the limitation will be lifted once claim based matching will be implemented
if len(jui) > 1 {
return nil, fmt.Errorf("multiple users with JWT tokens are not supported; found %d users", len(jui))
}
return jui, nil
}
func getUserInfoByJWTToken(ats []string) *UserInfo {
js := *jwtAuthCache.Load()
if len(js.users) == 0 {
return nil
}
for _, at := range ats {
if strings.Count(at, ".") != 2 {
continue
}
at, _ = strings.CutPrefix(at, `http_auth:`)
tkn, err := jwt.NewToken(at, true)
if err != nil {
if *logInvalidAuthTokens {
logger.Infof("cannot parse jwt token: %s", err)
}
continue
}
if tkn.IsExpired(time.Now()) {
if *logInvalidAuthTokens {
logger.Infof("jwt token is expired")
}
continue
}
for _, ui := range js.users {
if ui.JWT.SkipVerify {
return ui
}
if err := ui.JWT.verifierPool.Verify(tkn); err != nil {
if *logInvalidAuthTokens {
logger.Infof("cannot verify jwt token: %s", err)
}
continue
}
return ui
}
}
return nil
}

View File

@@ -1,304 +0,0 @@
package main
import (
"fmt"
"os"
"path/filepath"
"testing"
)
func TestJWTParseAuthConfigFailure(t *testing.T) {
validRSAPublicKey := `-----BEGIN PUBLIC KEY-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiX7oPWKOWRQsGFEWvwZO
mL2PYsdYUsu9nr0qtPCjxQHUJgLfT3rdKlvKpPFYv7ZmKnqTncg36Wz9uiYmWJ7e
IB5Z+fko8kVIMzarCqVvpAJDzYF/pUii68xvuYoK3L9TIOAeyCXv+prwnr2IH+Mw
9AONzWbRrYoO74XyTE9vMU5qmI/L1VPk+PR8lqPOSptLvzsfoaIk2ED4yK2nRB+6
st+k4nccPqbErqHc8aiXnXfugfnr6b+NPFYUzKsDqkymGOokVijrI8B3jNw6c6Do
zphk+D3wgLsXYHfMcZbXIMqffqm/aB8Qg88OpFOkQ3rd2p6R9+hacnZkfkn3Phiw
yQIDAQAB
-----END PUBLIC KEY-----
`
// ECDSA with the P-521 curve
validECDSAPublicKey := `-----BEGIN PUBLIC KEY-----
MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAU9RmtkCRuYTKCyvLlDn5DtBZOHSe
QTa5j9q/oQVpCKqcXVFrH5dgh0GL+P/ZhkeuowPzCZqntGf0+7wPt9OxSJcADVJm
dv92m540MXss8zdHf5qtE0gsu2Ved0R7Z8a8QwGZ/1mYZ+kFGGbdQTlSvRqDySTq
XOtclIk1uhc03oL9nOQ=
-----END PUBLIC KEY-----
`
f := func(s string, expErr string) {
t.Helper()
ac, err := parseAuthConfig([]byte(s))
if err != nil {
if expErr != err.Error() {
t.Fatalf("unexpected error; got %q; want %q", err.Error(), expErr)
}
return
}
users, err := parseJWTUsers(ac)
if err != nil {
if expErr != err.Error() {
t.Fatalf("unexpected error; got %q; want %q", err.Error(), expErr)
}
return
}
t.Fatalf("expecting non-nil error; got %v", users)
}
// unauthorized_user cannot be used with jwt
f(`
unauthorized_user:
jwt: {skip_verify: true}
url_prefix: http://foo.bar
`, `field jwt can't be specified for unauthorized_user section`)
// username and jwt in a single config
f(`
users:
- username: foo
jwt: {skip_verify: true}
url_prefix: http://foo.bar
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
// bearer_token and jwt in a single config
f(`
users:
- bearer_token: foo
jwt: {skip_verify: true}
url_prefix: http://foo.bar
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
// bearer_token and jwt in a single config
f(`
users:
- auth_token: "Foo token"
jwt: {skip_verify: true}
url_prefix: http://foo.bar
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
// jwt public_keys or skip_verify must be set, part 1
f(`
users:
- jwt: {}
url_prefix: http://foo.bar
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
// jwt public_keys or skip_verify must be set, part 2
f(`
users:
- jwt: {public_keys: null}
url_prefix: http://foo.bar
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
// jwt public_keys or skip_verify must be set, part 3
f(`
users:
- jwt: {public_keys: []}
url_prefix: http://foo.bar
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
// jwt public_keys, public_key_files or skip_verify must be set
f(`
users:
- jwt: {public_key_files: []}
url_prefix: http://foo.bar
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
// invalid public key, part 1
f(`
users:
- jwt: {public_keys: [""]}
url_prefix: http://foo.bar
`, `failed to parse key "": failed to decode PEM block containing public key`)
// invalid public key, part 2
f(`
users:
- jwt: {public_keys: ["invalid"]}
url_prefix: http://foo.bar
`, `failed to parse key "invalid": failed to decode PEM block containing public key`)
// invalid public key, part 2
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
- %q
- "invalid"
url_prefix: http://foo.bar
`, validRSAPublicKey, validECDSAPublicKey), `failed to parse key "invalid": failed to decode PEM block containing public key`)
// several jwt users
// invalid public key, part 2
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
url_prefix: http://foo.bar
- jwt:
public_keys:
- %q
url_prefix: http://foo.bar
`, validRSAPublicKey, validECDSAPublicKey), `multiple users with JWT tokens are not supported; found 2 users`)
// public key file doesn't exist
f(`
users:
- jwt:
public_key_files:
- /path/to/nonexistent/file.pem
url_prefix: http://foo.bar
`, "cannot read public key from file \"/path/to/nonexistent/file.pem\": open /path/to/nonexistent/file.pem: no such file or directory")
// public key file invalid
// auth with key from file
publicKeyFile := filepath.Join(t.TempDir(), "a_public_key.pem")
if err := os.WriteFile(publicKeyFile, []byte(`invalidPEM`), 0o644); err != nil {
t.Fatalf("failed to write public key file: %s", err)
}
f(`
users:
- jwt:
public_key_files:
- `+publicKeyFile+`
url_prefix: http://foo.bar
`, "cannot parse public key from file \""+publicKeyFile+"\": failed to parse key \"invalidPEM\": failed to decode PEM block containing public key")
}
func TestJWTParseAuthConfigSuccess(t *testing.T) {
validRSAPublicKey := `-----BEGIN PUBLIC KEY-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiX7oPWKOWRQsGFEWvwZO
mL2PYsdYUsu9nr0qtPCjxQHUJgLfT3rdKlvKpPFYv7ZmKnqTncg36Wz9uiYmWJ7e
IB5Z+fko8kVIMzarCqVvpAJDzYF/pUii68xvuYoK3L9TIOAeyCXv+prwnr2IH+Mw
9AONzWbRrYoO74XyTE9vMU5qmI/L1VPk+PR8lqPOSptLvzsfoaIk2ED4yK2nRB+6
st+k4nccPqbErqHc8aiXnXfugfnr6b+NPFYUzKsDqkymGOokVijrI8B3jNw6c6Do
zphk+D3wgLsXYHfMcZbXIMqffqm/aB8Qg88OpFOkQ3rd2p6R9+hacnZkfkn3Phiw
yQIDAQAB
-----END PUBLIC KEY-----
`
// ECDSA with the P-521 curve
validECDSAPublicKey := `-----BEGIN PUBLIC KEY-----
MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAU9RmtkCRuYTKCyvLlDn5DtBZOHSe
QTa5j9q/oQVpCKqcXVFrH5dgh0GL+P/ZhkeuowPzCZqntGf0+7wPt9OxSJcADVJm
dv92m540MXss8zdHf5qtE0gsu2Ved0R7Z8a8QwGZ/1mYZ+kFGGbdQTlSvRqDySTq
XOtclIk1uhc03oL9nOQ=
-----END PUBLIC KEY-----
`
f := func(s string) {
t.Helper()
ac, err := parseAuthConfig([]byte(s))
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
jui, err := parseJWTUsers(ac)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
for _, ui := range jui {
if ui.JWT == nil {
t.Fatalf("unexpected nil JWTConfig")
}
if ui.JWT.SkipVerify {
if ui.JWT.verifierPool != nil {
t.Fatalf("unexpected non-nil verifier pool for skip_verify=true")
}
continue
}
if ui.JWT.verifierPool == nil {
t.Fatalf("unexpected nil verifier pool for non-empty public keys")
}
}
}
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
url_prefix: http://foo.bar
`, validRSAPublicKey))
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
url_prefix: http://foo.bar
`, validECDSAPublicKey))
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
- %q
url_prefix: http://foo.bar
`, validRSAPublicKey, validECDSAPublicKey))
f(`
users:
- jwt:
skip_verify: true
url_prefix: http://foo.bar
`)
// combined with other auth methods
f(`
users:
- username: foo
password: bar
url_prefix: http://foo.bar
- jwt:
skip_verify: true
url_prefix: http://foo.bar
- bearer_token: foo
url_prefix: http://foo.bar
`)
rsaKeyFile := filepath.Join(t.TempDir(), "rsa_public_key.pem")
if err := os.WriteFile(rsaKeyFile, []byte(validRSAPublicKey), 0o644); err != nil {
t.Fatalf("failed to write RSA key file: %s", err)
}
ecdsaKeyFile := filepath.Join(t.TempDir(), "ecdsa_public_key.pem")
if err := os.WriteFile(ecdsaKeyFile, []byte(validECDSAPublicKey), 0o644); err != nil {
t.Fatalf("failed to write ECDSA key file: %s", err)
}
// Test single public key file
f(fmt.Sprintf(`
users:
- jwt:
public_key_files:
- %q
url_prefix: http://foo.bar
`, rsaKeyFile))
// Test multiple public key files
f(fmt.Sprintf(`
users:
- jwt:
public_key_files:
- %q
- %q
url_prefix: http://foo.bar
`, rsaKeyFile, ecdsaKeyFile))
// Test combined inline keys and files
f(fmt.Sprintf(`
users:
- jwt:
public_keys:
- %q
public_key_files:
- %q
url_prefix: http://foo.bar
`, validECDSAPublicKey, rsaKeyFile))
}

View File

@@ -24,7 +24,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ioutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
@@ -41,38 +40,22 @@ var (
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+ useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+ "See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing") "With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host") maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host. "+
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+ "See also -maxConcurrentRequests")
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
"It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services") "It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services")
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend") responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process. Other requests are rejected with "+
requestBufferSize = flagutil.NewBytes("requestBufferSize", 32*1024, "The size of the buffer for reading the request body before proxying the request to backends. "+ "'429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options")
"This allows reducing the comsumption of backend resources when processing requests from clients connected via slow networks. "+ maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 300, "The maximum number of concurrent requests vmauth can process per each configured user. "+
"Set to 0 to disable request buffering. See https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering") "Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option "+
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size to buffer in memory for potential retries at other backends. "+ "in per-user config")
"Request bodies larger than this size cannot be retried if the backend fails. Zero or negative value disables request body buffering and retries. "+
"See also -requestBufferSize")
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process simultaneously. "+
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
"This protects vmauth itself from overloading and out-of-memory (OOM) failures. See also -maxConcurrentPerUserRequests "+
"and https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 100, "The maximum number of concurrent requests vmauth can process per each configured user. "+
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
"This provides fairness and isolation between users, preventing a single user from consuming all the available resources. "+
"It works in conjunction with -maxConcurrentRequests, which sets the global limit across all users. "+
"This default can be overridden for individual users via max_concurrent_requests option in per-user config. "+
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
maxQueueDuration = flag.Duration("maxQueueDuration", 10*time.Second, "The maximum duration to wait before rejecting incoming requests if concurrency limit "+
"specified via -maxConcurrentRequests or -maxConcurrentPerUserRequests command-line flags is reached. "+
"Requests are rejected with '429 Too Many Requests' http status code if the limit is still reached after the -maxQueueDuration duration. "+
"This allows graceful handling of short spikes in concurrent requests. See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*") reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+ logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`) `Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend") failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size, which can be cached and re-tried at other backends. "+
"Bigger values may require more memory. Zero or negative value disables caching of request body. This may be useful when proxying data ingestion requests")
backendTLSInsecureSkipVerify = flag.Bool("backend.tlsInsecureSkipVerify", false, "Whether to skip TLS verification when connecting to backends over HTTPS. "+ backendTLSInsecureSkipVerify = flag.Bool("backend.tlsInsecureSkipVerify", false, "Whether to skip TLS verification when connecting to backends over HTTPS. "+
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup") "See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup")
backendTLSCAFile = flag.String("backend.TLSCAFile", "", "Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. "+ backendTLSCAFile = flag.String("backend.TLSCAFile", "", "Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. "+
@@ -168,6 +151,7 @@ func requestHandlerWithInternalRoutes(w http.ResponseWriter, r *http.Request) bo
} }
func requestHandler(w http.ResponseWriter, r *http.Request) bool { func requestHandler(w http.ResponseWriter, r *http.Request) bool {
ats := getAuthTokensFromRequest(r) ats := getAuthTokensFromRequest(r)
if len(ats) == 0 { if len(ats) == 0 {
// Process requests for unauthorized users // Process requests for unauthorized users
@@ -181,32 +165,29 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
return true return true
} }
if ui := getUserInfoByAuthTokens(ats); ui != nil { ui := getUserInfoByAuthTokens(ats)
processUserRequest(w, r, ui) if ui == nil {
return true uu := authConfig.Load().UnauthorizedUser
} if uu != nil {
if ui := getUserInfoByJWTToken(ats); ui != nil { processUserRequest(w, r, uu)
processUserRequest(w, r, ui) return true
return true
}
uu := authConfig.Load().UnauthorizedUser
if uu != nil {
processUserRequest(w, r, uu)
return true
}
invalidAuthTokenRequests.Inc()
if *logInvalidAuthTokens {
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
err = &httpserver.ErrorWithStatusCode{
Err: err,
StatusCode: http.StatusUnauthorized,
} }
httpserver.Errorf(w, r, "%s", err)
} else { invalidAuthTokenRequests.Inc()
http.Error(w, "Unauthorized", http.StatusUnauthorized) if *logInvalidAuthTokens {
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
err = &httpserver.ErrorWithStatusCode{
Err: err,
StatusCode: http.StatusUnauthorized,
}
httpserver.Errorf(w, r, "%s", err)
} else {
http.Error(w, "Unauthorized", http.StatusUnauthorized)
}
return true
} }
processUserRequest(w, r, ui)
return true return true
} }
@@ -227,124 +208,26 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
ui.requests.Inc() ui.requests.Inc()
ctx, cancel := context.WithTimeout(r.Context(), *maxQueueDuration) // Limit the concurrency of requests to backends
defer cancel()
// Acquire global concurrency limit.
if err := beginConcurrencyLimit(ctx); err != nil {
handleConcurrencyLimitError(w, r, err)
return
}
defer endConcurrencyLimit()
// Set read deadline for reading the initial chunk for the request body.
rc := http.NewResponseController(w)
deadline, ok := ctx.Deadline()
if !ok {
logger.Panicf("BUG: expecting valid deadline for the context")
}
if err := rc.SetReadDeadline(deadline); err != nil {
logger.Panicf("BUG: cannot set read deadline: %s", err)
}
// Read the initial chunk for the request body.
userName := ui.name()
if userName == "" {
userName = "unauthorized"
}
bb, err := bufferRequestBody(ctx, r.Body, userName)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
r.Body = bb
// Disable the read deadline for the rest of the request body.
if err := rc.SetReadDeadline(time.Time{}); err != nil {
logger.Panicf("BUG: cannot reset read deadline: %s", err)
}
// Acquire concurrency limit for the given user.
if err := ui.beginConcurrencyLimit(ctx); err != nil {
handleConcurrencyLimitError(w, r, err)
return
}
defer ui.endConcurrencyLimit()
// Process the request.
processRequest(w, r, ui)
}
func beginConcurrencyLimit(ctx context.Context) error {
concurrencyLimitOnce.Do(concurrencyLimitInit) concurrencyLimitOnce.Do(concurrencyLimitInit)
select { select {
case concurrencyLimitCh <- struct{}{}: case concurrencyLimitCh <- struct{}{}:
return nil if err := ui.beginConcurrencyLimit(); err != nil {
handleConcurrencyLimitError(w, r, err)
<-concurrencyLimitCh
return
}
default: default:
// The -maxConcurrentRequests are executed. Wait until some of the requests are finished, concurrentRequestsLimitReached.Inc()
// so the current request could be executed. err := fmt.Errorf("cannot serve more than -maxConcurrentRequests=%d concurrent requests", cap(concurrencyLimitCh))
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078 handleConcurrencyLimitError(w, r, err)
select { return
case concurrencyLimitCh <- struct{}{}:
return nil
case <-ctx.Done():
err := ctx.Err()
if errors.Is(err, context.DeadlineExceeded) {
// The current request couldn't be executed until the request timeout.
concurrentRequestsLimitReached.Inc()
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because -maxConcurrentRequests=%d concurrent requests are executed",
*maxQueueDuration, cap(concurrencyLimitCh))
}
return fmt.Errorf("cannot start executing the request because -maxConcurrentRequests=%d concurrent requests are executed: %w", cap(concurrencyLimitCh), err)
}
} }
} processRequest(w, r, ui)
ui.endConcurrencyLimit()
func endConcurrencyLimit() {
<-concurrencyLimitCh <-concurrencyLimitCh
} }
func bufferRequestBody(ctx context.Context, r io.ReadCloser, userName string) (io.ReadCloser, error) {
if r == nil {
// This is a GET request with nil reader.
return nil, nil
}
maxBufSize := max(requestBufferSize.IntN(), maxRequestBodySizeToRetry.IntN())
if maxBufSize <= 0 {
return r, nil
}
lr := ioutil.GetLimitedReader(r, int64(maxBufSize))
defer ioutil.PutLimitedReader(lr)
start := time.Now()
buf, err := io.ReadAll(lr)
bufferRequestBodyDuration.UpdateDuration(start)
if err != nil {
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
rejectSlowClientRequests.Inc()
d := time.Since(start)
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("reject request from the user %s because the request body couldn't be read in -maxQueueDuration=%s; read %d bytes in %s",
userName, *maxQueueDuration, len(buf), d.Truncate(time.Second)),
StatusCode: http.StatusBadRequest,
}
}
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot read request body: %w", err),
StatusCode: http.StatusBadRequest,
}
}
bb := newBufferedBody(r, buf, maxBufSize)
return bb, nil
}
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) { func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
u := normalizeURL(r.URL) u := normalizeURL(r.URL)
up, hc := ui.getURLPrefixAndHeaders(u, r.Host, r.Header) up, hc := ui.getURLPrefixAndHeaders(u, r.Host, r.Header)
@@ -370,26 +253,28 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
isDefault = true isDefault = true
} }
rtb := newReadTrackingBody(r.Body, maxRequestBodySizeToRetry.IntN())
r.Body = rtb
maxAttempts := up.getBackendsCount() maxAttempts := up.getBackendsCount()
for range maxAttempts { for i := 0; i < maxAttempts; i++ {
bu := up.getBackendURL() bu := up.getBackendURL()
if bu == nil { if bu == nil {
break break
} }
targetURL := bu.url targetURL := bu.url
// Don't change path and add request_path query param for default route.
if isDefault { if isDefault {
// Don't change path and add request_path query param for default route.
query := targetURL.Query() query := targetURL.Query()
query.Set("request_path", u.String()) query.Set("request_path", u.String())
targetURL.RawQuery = query.Encode() targetURL.RawQuery = query.Encode()
} else { } else { // Update path for regular routes.
// Update path for regular routes. targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts)
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
} }
wasLocalRetry := false wasLocalRetry := false
again: again:
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui, bu) ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui)
if needLocalRetry && !wasLocalRetry { if needLocalRetry && !wasLocalRetry {
wasLocalRetry = true wasLocalRetry = true
goto again goto again
@@ -399,20 +284,17 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
if ok { if ok {
return return
} }
bu.setBroken() bu.setBroken()
ui.backendErrors.Inc()
} }
err := &httpserver.ErrorWithStatusCode{ err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("all the %d backends for the user %q are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend", up.getBackendsCount(), ui.name()), Err: fmt.Errorf("all the %d backends for the user %q are unavailable", up.getBackendsCount(), ui.name()),
StatusCode: http.StatusBadGateway, StatusCode: http.StatusBadGateway,
} }
httpserver.Errorf(w, r, "%s", err) httpserver.Errorf(w, r, "%s", err)
ui.requestErrors.Inc() ui.backendErrors.Inc()
} }
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo, bu *backendURL) (bool, bool) { func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo) (bool, bool) {
ui.backendRequests.Inc()
req := sanitizeRequestHeaders(r) req := sanitizeRequestHeaders(r)
req.URL = targetURL req.URL = targetURL
@@ -426,19 +308,21 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
} }
} }
bb, bbOK := req.Body.(*bufferedBody) rtb, rtbOK := req.Body.(*readTrackingBody)
canRetry := !bbOK || bb.canRetry()
res, err := ui.rt.RoundTrip(req) res, err := ui.rt.RoundTrip(req)
if errors.Is(r.Context().Err(), context.Canceled) {
// Do not retry canceled requests.
clientCanceledRequests.Inc()
return true, false
}
if err != nil { if err != nil {
if !canRetry { if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
// Do not retry canceled or timed out requests
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
if errors.Is(err, context.DeadlineExceeded) {
// Timed out request must be counted as errors, since this usually means that the backend is slow.
ui.backendErrors.Inc()
}
return false, false
}
if !rtbOK || !rtb.canRetry() {
// Request body cannot be re-sent to another backend. Return the error to the client then. // Request body cannot be re-sent to another backend. Return the error to the client then.
err = &httpserver.ErrorWithStatusCode{ err = &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot proxy the request to %s: %w", targetURL, err), Err: fmt.Errorf("cannot proxy the request to %s: %w", targetURL, err),
@@ -446,51 +330,41 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
} }
httpserver.Errorf(w, r, "%s", err) httpserver.Errorf(w, r, "%s", err)
ui.backendErrors.Inc() ui.backendErrors.Inc()
ui.requestErrors.Inc()
bu.setBroken()
return true, false return true, false
} }
if netutil.IsTrivialNetworkError(err) { if netutil.IsTrivialNetworkError(err) {
// Retry request at the same backend on trivial network errors, such as proxy idle timeout misconfiguration or socket close by OS // Retry request at the same backend on trivial network errors, such as proxy idle timeout misconfiguration or socket close by OS
if bbOK {
bb.resetReader()
}
return false, true return false, true
} }
// Retry the request at another backend // Retry the request if its body wasn't read yet. This usually means that the backend isn't reachable.
remoteAddr := httpserver.GetQuotedRemoteAddr(r) remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r) // NOTE: do not use httpserver.GetRequestURI
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed: %s, retrying the request at another backend", remoteAddr, requestURI, targetURL, err) // it explicitly reads request body, which may fail retries.
if bbOK { logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because of response error: %s", remoteAddr, req.URL, targetURL, err)
bb.resetReader()
}
return false, false return false, false
} }
if slices.Contains(retryStatusCodes, res.StatusCode) { if slices.Contains(retryStatusCodes, res.StatusCode) {
if !canRetry { _ = res.Body.Close()
if !rtbOK || !rtb.canRetry() {
// If we get an error from the retry_status_codes list, but cannot execute retry, // If we get an error from the retry_status_codes list, but cannot execute retry,
// we consider such a request an error as well. // we consider such a request an error as well.
err := &httpserver.ErrorWithStatusCode{ err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request at another backend, because the request body has been already consumed", Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request on another backend, because the request has been already consumed",
res.StatusCode, targetURL), res.StatusCode, targetURL),
StatusCode: http.StatusServiceUnavailable, StatusCode: http.StatusServiceUnavailable,
} }
httpserver.Errorf(w, r, "%s", err) httpserver.Errorf(w, r, "%s", err)
ui.backendErrors.Inc() ui.backendErrors.Inc()
ui.requestErrors.Inc()
return true, false return true, false
} }
// Retry requests at other backends if it matches retryStatusCodes. // Retry requests at other backends if it matches retryStatusCodes.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893 // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893
remoteAddr := httpserver.GetQuotedRemoteAddr(r) remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r) // NOTE: do not use httpserver.GetRequestURI
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed, retrying the request at another backend because response status code=%d belongs to retry_status_codes=%d", // it explicitly reads request body, which may fail retries.
remoteAddr, requestURI, targetURL, res.StatusCode, retryStatusCodes) logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because response status code=%d belongs to retry_status_codes=%d",
if bbOK { remoteAddr, req.URL, targetURL, res.StatusCode, retryStatusCodes)
bb.resetReader()
}
return false, false return false, false
} }
removeHopHeaders(res.Header) removeHopHeaders(res.Header)
@@ -498,61 +372,20 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
updateHeadersByConfig(w.Header(), hc.ResponseHeaders) updateHeadersByConfig(w.Header(), hc.ResponseHeaders)
w.WriteHeader(res.StatusCode) w.WriteHeader(res.StatusCode)
err = copyStreamToClient(w, res.Body) copyBuf := copyBufPool.Get()
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
_, err = io.CopyBuffer(w, res.Body, copyBuf.B)
copyBufPool.Put(copyBuf)
_ = res.Body.Close() _ = res.Body.Close()
if errors.Is(r.Context().Err(), context.Canceled) {
// Do not retry canceled requests.
clientCanceledRequests.Inc()
return true, false
}
if err != nil && !netutil.IsTrivialNetworkError(err) { if err != nil && !netutil.IsTrivialNetworkError(err) {
remoteAddr := httpserver.GetQuotedRemoteAddr(r) remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r) requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err) logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
ui.requestErrors.Inc()
return true, false return true, false
} }
return true, false return true, false
} }
func copyStreamToClient(client io.Writer, backend io.Reader) error {
copyBuf := copyBufPool.Get()
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
defer copyBufPool.Put(copyBuf)
buf := copyBuf.B
flusher, ok := client.(http.Flusher)
if !ok {
logger.Panicf("BUG: client must implement net/http.Flusher interface; got %T", client)
}
for {
n, backendErr := backend.Read(buf)
if n > 0 {
data := buf[:n]
n, clientErr := client.Write(data)
if clientErr != nil {
return fmt.Errorf("cannot write data to client: %w", clientErr)
}
if n != len(data) {
logger.Panicf("BUG: unexpected number of bytes written returned by client.Write; got %d; want %d", n, len(data))
}
// Flush the read data from the backend to the client as fast as possible
// in order to reduce delays for data propagation.
// See https://github.com/VictoriaMetrics/VictoriaLogs/issues/667
flusher.Flush()
}
if backendErr != nil {
if backendErr == io.EOF {
return nil
}
return fmt.Errorf("cannot read data from backend: %w", backendErr)
}
}
}
var copyBufPool bytesutil.ByteBufferPool var copyBufPool bytesutil.ByteBufferPool
func copyHeader(dst, src http.Header) { func copyHeader(dst, src http.Header) {
@@ -639,10 +472,6 @@ var (
configReloadRequests = metrics.NewCounter(`vmauth_http_requests_total{path="/-/reload"}`) configReloadRequests = metrics.NewCounter(`vmauth_http_requests_total{path="/-/reload"}`)
invalidAuthTokenRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="invalid_auth_token"}`) invalidAuthTokenRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="invalid_auth_token"}`)
missingRouteRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`) missingRouteRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`)
clientCanceledRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="client_canceled"}`)
rejectSlowClientRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="reject_slow_client"}`)
bufferRequestBodyDuration = metrics.NewSummary(`vmauth_buffer_request_body_duration_seconds`)
) )
func newRoundTripper(caFileOpt, certFileOpt, keyFileOpt, serverNameOpt string, insecureSkipVerifyP *bool) (http.RoundTripper, error) { func newRoundTripper(caFileOpt, certFileOpt, keyFileOpt, serverNameOpt string, insecureSkipVerifyP *bool) (http.RoundTripper, error) {
@@ -726,13 +555,6 @@ func handleMissingAuthorizationError(w http.ResponseWriter) {
} }
func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err error) { func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err error) {
if errors.Is(r.Context().Err(), context.Canceled) {
// Do not return any response for the request canceled by the client,
// since the connection to the client is already closed.
clientCanceledRequests.Inc()
return
}
w.Header().Add("Retry-After", "10") w.Header().Add("Retry-After", "10")
err = &httpserver.ErrorWithStatusCode{ err = &httpserver.ErrorWithStatusCode{
Err: err, Err: err,
@@ -741,76 +563,120 @@ func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err err
httpserver.Errorf(w, r, "%s", err) httpserver.Errorf(w, r, "%s", err)
} }
// bufferedBody serves two purposes: // readTrackingBody must be obtained via getReadTrackingBody()
// 1. Enables request retries when the body size does not exceed maxBodySize type readTrackingBody struct {
// by fully buffering the body in memory. // maxBodySize is the maximum body size to cache in buf.
// 2. Prevents slow clients from reducing effective server capacity by
// buffering the request body before acquiring a per-user concurrency slot.
//
// See bufferRequestBody for details on how bufferedBody is used.
type bufferedBody struct {
// r contains reader for reading the data after buf is read.
// //
// r is nil if buf contains all the data. // Bigger bodies cannot be retried.
maxBodySize int
// r contains reader for initial data reading
r io.ReadCloser r io.ReadCloser
// buf contains the initial buffer read from r. // buf is a buffer for data read from r. Buf size is limited by maxBodySize.
// If more than maxBodySize is read from r, then cannotRetry is set to true.
buf []byte buf []byte
// bufOffset is the offset at buf for already read bytes. // readBuf points to the cached data at buf, which must be read in the next call to Read().
bufOffset int readBuf []byte
// cannotRetry is set to true after Close() call on non-nil r. // cannotRetry is set to true when more than maxBodySize bytes are read from r.
// In this case the read data cannot fit buf, so it cannot be re-read from buf.
cannotRetry bool cannotRetry bool
// bufComplete is set to true when buf contains complete request body read from r.
bufComplete bool
} }
func newBufferedBody(r io.ReadCloser, buf []byte, maxBufSize int) *bufferedBody { func newReadTrackingBody(r io.ReadCloser, maxBodySize int) *readTrackingBody {
// Do not use sync.Pool here, since http.RoundTrip may still use request body after return. // do not use sync.Pool there
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051 // since http.RoundTrip may still use request body after return
// See this issue for details https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
if len(buf) < maxBufSize { rtb := &readTrackingBody{}
// Read the full request body into buf. if maxBodySize < 0 {
r = nil maxBodySize = 0
} }
rtb.maxBodySize = maxBodySize
return &bufferedBody{ if r == nil {
r: r, // This is GET request without request body
buf: buf, r = (*zeroReader)(nil)
} }
rtb.r = r
return rtb
} }
// Read implements io.Reader interface. type zeroReader struct{}
func (bb *bufferedBody) Read(p []byte) (int, error) {
if bb.cannotRetry {
return 0, fmt.Errorf("cannot read already closed body")
}
if bb.bufOffset < len(bb.buf) {
n := copy(p, bb.buf[bb.bufOffset:])
bb.bufOffset += n
return n, nil
}
if bb.r == nil {
return 0, io.EOF
}
return bb.r.Read(p)
}
func (bb *bufferedBody) canRetry() bool { func (r *zeroReader) Read(_ []byte) (int, error) {
return bb.r == nil return 0, io.EOF
} }
func (r *zeroReader) Close() error {
// Close implements io.Closer interface.
func (bb *bufferedBody) Close() error {
bb.resetReader()
if bb.r != nil {
bb.cannotRetry = true
return bb.r.Close()
}
return nil return nil
} }
func (bb *bufferedBody) resetReader() { // Read implements io.Reader interface.
bb.bufOffset = 0 func (rtb *readTrackingBody) Read(p []byte) (int, error) {
if len(rtb.readBuf) > 0 {
n := copy(p, rtb.readBuf)
rtb.readBuf = rtb.readBuf[n:]
return n, nil
}
if rtb.r == nil {
if rtb.bufComplete {
return 0, io.EOF
}
return 0, fmt.Errorf("cannot read client request body after closing client reader")
}
n, err := rtb.r.Read(p)
if rtb.cannotRetry {
return n, err
}
if len(rtb.buf)+n > rtb.maxBodySize {
rtb.cannotRetry = true
return n, err
}
rtb.buf = append(rtb.buf, p[:n]...)
if err == io.EOF {
rtb.bufComplete = true
}
return n, err
}
func (rtb *readTrackingBody) canRetry() bool {
if rtb.cannotRetry {
return false
}
if rtb.bufComplete {
return true
}
return rtb.r != nil
}
// Close implements io.Closer interface.
func (rtb *readTrackingBody) Close() error {
if !rtb.cannotRetry {
rtb.readBuf = rtb.buf
} else {
rtb.readBuf = nil
}
// Close rtb.r only if the request body is completely read or if it is too big.
// http.Roundtrip performs body.Close call even without any Read calls,
// so this hack allows us to reuse request body.
if rtb.bufComplete || rtb.cannotRetry {
if rtb.r == nil {
return nil
}
err := rtb.r.Close()
rtb.r = nil
return err
}
return nil
} }
func debugInfo(u *url.URL, r *http.Request) string { func debugInfo(u *url.URL, r *http.Request) string {

Some files were not shown because too many files have changed in this diff Show More