Compare commits

..

1 Commits

Author SHA1 Message Date
Max Kotliar
e9261be945 lib/promutil: Weak pointer based labels compressor 2025-08-20 20:02:11 +03:00
4382 changed files with 272505 additions and 415604 deletions

View File

@@ -5,7 +5,7 @@ body:
- type: textarea - type: textarea
id: describe-the-component id: describe-the-component
attributes: attributes:
label: Is your question related to a specific component? label: Is your question request related to a specific component?
placeholder: | placeholder: |
VictoriaMetrics, vmagent, vmalert, vmui, etc... VictoriaMetrics, vmagent, vmalert, vmui, etc...
validations: validations:

23
.github/copilot-instructions.md vendored Normal file
View File

@@ -0,0 +1,23 @@
# Project Overview
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
## Folder Structure
- `/app`: Contains the compilable binaries.
- `/lib`: Contains the golang reusable libraries
- `/docs/victoriametrics`: Contains documentation for the project.
- `/apptest/tests`: Contains integration tests.
## Libraries and Frameworks
- Backend: Golang, no framework. Use third-party libraries sparingly.
- Frontend: React.
## Code review guidelines
Ensure the feature or bugfix includes a changelog entry in /docs/victoriametrics/changelog/CHANGELOG.md.
Verify the entry is under the ## tip section and matches the structure and style of existing entries.
Chore-only changes may be omitted from the changelog.

View File

@@ -4,8 +4,6 @@ updates:
directory: "/" directory: "/"
schedule: schedule:
interval: "daily" interval: "daily"
cooldown:
default-days: 21
- package-ecosystem: "gomod" - package-ecosystem: "gomod"
directory: "/" directory: "/"
schedule: schedule:
@@ -25,8 +23,6 @@ updates:
directory: "/" directory: "/"
schedule: schedule:
interval: "daily" interval: "daily"
cooldown:
default-days: 21
- package-ecosystem: "npm" - package-ecosystem: "npm"
directory: "/app/vmui/packages/vmui" directory: "/app/vmui/packages/vmui"
schedule: schedule:

View File

@@ -1,3 +1,10 @@
**PLEASE REMOVE LINE BELOW BEFORE SUBMITTING** ### Describe Your Changes
Before creating the PR, make sure you have read and followed the [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist). Please provide a brief description of the changes you made. Be as specific as possible to help others understand the purpose and impact of your modifications.
### Checklist
The following checks are **mandatory**:
- [ ] My change adheres to [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
- [ ] My change adheres to [VictoriaMetrics development goals](https://docs.victoriametrics.com/victoriametrics/goals/).

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env sh
set -e
CHANGELOG_FILE="docs/victoriametrics/changelog/CHANGELOG.md"
GITHUB_BASE_REF=${GITHUB_BASE_REF:-"master"}
GIT_REMOTE=${GIT_REMOTE:-"origin"}
git diff "${GIT_REMOTE}/${GITHUB_BASE_REF}"...HEAD -- $CHANGELOG_FILE > diff.txt
if ! grep -q "^+" diff.txt; then
echo "No additions in CHANGELOG.md"
exit 0
fi
ADDED_LINES=$(grep "^+\S" diff.txt | sed 's/^+//')
START_TIP=$(grep -n "^## tip" "$CHANGELOG_FILE" | head -1 | cut -d: -f1)
if [ -z "$START_TIP" ]; then
echo "ERROR: ${CHANGELOG_FILE} does not contain a ## tip section"
exit 1
fi
END_TIP=$(awk "NR>$START_TIP && /^## / {print NR; exit}" "${CHANGELOG_FILE}")
if [ -z "$END_TIP" ]; then
END_TIP=$(wc -l < "$CHANGELOG_FILE")
fi
BAD=0
while IFS= read -r line; do
# Grep exact line inside the file and get line numbers
MATCHES=$(grep -n -F "$line" "$CHANGELOG_FILE" | cut -d: -f1)
for m in $MATCHES; do
if [ "$m" -lt "$START_TIP" ] || [ "$m" -gt "$END_TIP" ]; then
echo "'$line' on line ${m} is outside ## tip section (lines ${START_TIP}-${END_TIP})"
BAD=1
fi
done
done << EOF
$ADDED_LINES
EOF
if [ "$BAD" -ne 0 ]; then
echo "CHANGELOG modifications must be placed inside the ## tip section."
exit 1
fi
echo "CHANGELOG modifications are valid."

View File

@@ -22,7 +22,8 @@ on:
- '!app/vmui/**' - '!app/vmui/**'
- '.github/workflows/build.yml' - '.github/workflows/build.yml'
permissions: {} permissions:
contents: read
concurrency: concurrency:
cancel-in-progress: true cancel-in-progress: true
@@ -31,8 +32,6 @@ concurrency:
jobs: jobs:
build: build:
name: ${{ matrix.os }}-${{ matrix.arch }} name: ${{ matrix.os }}-${{ matrix.arch }}
permissions:
contents: read
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
fail-fast: false fail-fast: false
@@ -48,8 +47,6 @@ jobs:
arch: arm arch: arm
- os: linux - os: linux
arch: ppc64le arch: ppc64le
- os: linux
arch: s390x
- os: darwin - os: darwin
arch: amd64 arch: amd64
- os: darwin - os: darwin
@@ -58,24 +55,21 @@ jobs:
arch: amd64 arch: amd64
- os: openbsd - os: openbsd
arch: amd64 arch: amd64
- os: netbsd
arch: amd64
- os: windows - os: windows
arch: amd64 arch: amd64
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 uses: actions/checkout@v5
- name: Setup Go - name: Setup Go
id: go id: go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 uses: actions/setup-go@v5
with: with:
cache-dependency-path: | cache-dependency-path: |
go.sum go.sum
Makefile Makefile
app/**/Makefile app/**/Makefile
go-version-file: 'go.mod' go-version: stable
- run: go version
- name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }} - name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }}
run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }} run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }}

View File

@@ -1,23 +0,0 @@
name: 'changelog-linter'
on:
pull_request:
paths:
- "docs/victoriametrics/changelog/CHANGELOG.md"
permissions: {}
jobs:
tip-lint:
permissions:
contents: read
runs-on: 'ubuntu-latest'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
# needed for proper diff
fetch-depth: 0
- name: 'Validate that changelog changes are under ## tip'
run: |
GITHUB_BASE_REF=${{ github.base_ref }} ./.github/scripts/lint-changelog-tip.sh

View File

@@ -1,51 +0,0 @@
name: check-commit-signed
on:
pull_request:
permissions: {}
jobs:
check-commit-signed:
permissions:
contents: read
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0 # we need full history for commit verification
- name: Check commit signatures
run: |
if [ "${{ github.event_name }}" != "pull_request" ]; then
echo "Not a PR event, skipping signature check"
exit 0
fi
RANGE="${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
echo "Checking commits in PR range: $RANGE"
if [ -z "$(git rev-list $RANGE)" ]; then
echo "No new commits in this PR, skipping signature check"
exit 0
fi
# Check raw commit objects for a "gpgsig" header as a fast early signal for
# contributors. Both GPG and SSH signatures use this header.
# This avoids relying on %G? which returns N for SSH commits.
# This check is not a security enforcement — unsigned commits cannot be merged
# anyway due to the GitHub repository merge policy.
unsigned=""
for sha in $(git rev-list $RANGE); do
if ! git cat-file commit "$sha" | grep -q "^gpgsig"; then
unsigned="$unsigned $sha"
fi
done
if [ -n "$unsigned" ]; then
echo "Found unsigned commits:"
echo "$unsigned"
exit 1
fi
echo "All commits in PR are signed (GPG or SSH)"

View File

@@ -6,37 +6,33 @@ on:
pull_request: pull_request:
paths: paths:
- 'vendor' - 'vendor'
permissions:
permissions: {} contents: read
jobs: jobs:
build: build:
name: Build name: Build
permissions:
contents: read
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 uses: actions/checkout@master
- name: Setup Go - name: Setup Go
id: go id: go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 uses: actions/setup-go@v5
with: with:
go-version-file: 'go.mod' go-version: stable
cache: false cache: false
- run: go version
- name: Cache Go artifacts - name: Cache Go artifacts
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 uses: actions/cache@v4
with: with:
path: | path: |
~/.cache/go-build ~/.cache/go-build
~/go/pkg/mod ~/go/pkg/mod
~/go/bin ~/go/bin
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }} key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}- restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
- name: Check License - name: Check License
run: make check-licenses run: make check-licenses

View File

@@ -18,8 +18,6 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
permissions: {}
jobs: jobs:
analyze: analyze:
name: Analyze name: Analyze
@@ -31,35 +29,34 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 uses: actions/checkout@v5
- name: Set up Go - name: Set up Go
id: go id: go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 uses: actions/setup-go@v5
with: with:
cache: false cache: false
go-version-file: 'go.mod' go-version: stable
- run: go version
- name: Cache Go artifacts - name: Cache Go artifacts
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 uses: actions/cache@v4
with: with:
path: | path: |
~/.cache/go-build ~/.cache/go-build
~/go/bin ~/go/bin
~/go/pkg/mod ~/go/pkg/mod
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }} key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}- restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
- name: Initialize CodeQL - name: Initialize CodeQL
uses: github/codeql-action/init@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3 uses: github/codeql-action/init@v3
with: with:
languages: go languages: go
- name: Autobuild - name: Autobuild
uses: github/codeql-action/autobuild@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3 uses: github/codeql-action/autobuild@v3
- name: Perform CodeQL Analysis - name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3 uses: github/codeql-action/analyze@v3
with: with:
category: 'language:go' category: 'language:go'

View File

@@ -7,30 +7,28 @@ on:
- 'docs/**' - 'docs/**'
- '.github/workflows/docs.yaml' - '.github/workflows/docs.yaml'
workflow_dispatch: {} workflow_dispatch: {}
permissions:
permissions: {} contents: read # This is required for actions/checkout and to commit back image update
deployments: write
jobs: jobs:
build: build:
name: Build name: Build
permissions:
contents: read
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 uses: actions/checkout@v5
with: with:
path: __vm path: __vm
- name: Checkout private code - name: Checkout private code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 uses: actions/checkout@v5
with: with:
repository: VictoriaMetrics/vmdocs repository: VictoriaMetrics/vmdocs
token: ${{ secrets.VM_BOT_GH_TOKEN }} token: ${{ secrets.VM_BOT_GH_TOKEN }}
path: __vm-docs path: __vm-docs
- name: Import GPG key - name: Import GPG key
uses: crazy-max/ghaction-import-gpg@2dc316deee8e90f13e1a351ab510b4d5bc0c82cd # v7.0.0 uses: crazy-max/ghaction-import-gpg@v6
id: import-gpg id: import-gpg
with: with:
gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }} gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }}

View File

@@ -18,7 +18,8 @@ on:
- 'go.*' - 'go.*'
- '.github/workflows/main.yml' - '.github/workflows/main.yml'
permissions: {} permissions:
contents: read
concurrency: concurrency:
cancel-in-progress: true cancel-in-progress: true
@@ -28,32 +29,29 @@ concurrency:
jobs: jobs:
lint: lint:
name: lint name: lint
permissions:
contents: read
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 uses: actions/checkout@v5
- name: Setup Go - name: Setup Go
id: go id: go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 uses: actions/setup-go@v5
with: with:
cache-dependency-path: | cache-dependency-path: |
go.sum go.sum
Makefile Makefile
app/**/Makefile app/**/Makefile
go-version-file: 'go.mod' go-version: stable
- run: go version
- name: Cache golangci-lint - name: Cache golangci-lint
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 uses: actions/cache@v4
with: with:
path: | path: |
~/.cache/golangci-lint ~/.cache/golangci-lint
~/go/bin ~/go/bin
key: golangci-lint-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('.golangci.yml') }} key: golangci-lint-${{ runner.os }}-${{ hashFiles('.golangci.yml') }}
- name: Run check-all - name: Run check-all
run: | run: |
@@ -62,55 +60,54 @@ jobs:
unit: unit:
name: unit name: unit
permissions:
contents: read
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
scenario: scenario:
- 'test' - 'test-full'
- 'test-386' - 'test-full-386'
- 'test-pure' - 'test-pure'
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 uses: actions/checkout@v5
- name: Setup Go - name: Setup Go
id: go id: go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 uses: actions/setup-go@v5
with: with:
cache-dependency-path: | cache-dependency-path: |
go.sum go.sum
Makefile Makefile
app/**/Makefile app/**/Makefile
go-version-file: 'go.mod' go-version: stable
- run: go version
- name: Run tests - name: Run tests
run: make ${{ matrix.scenario}} run: GOGC=10 make ${{ matrix.scenario}}
apptest: - name: Publish coverage
name: apptest uses: codecov/codecov-action@v5
permissions: with:
contents: read files: ./coverage.txt
runs-on: apptest
integration:
name: integration
runs-on: ubuntu-latest
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 uses: actions/checkout@v5
- name: Setup Go - name: Setup Go
id: go id: go
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 uses: actions/setup-go@v5
with: with:
cache-dependency-path: | cache-dependency-path: |
go.sum go.sum
Makefile Makefile
app/**/Makefile app/**/Makefile
go-version-file: 'go.mod' go-version: stable
- run: go version
- name: Run app tests - name: Run integration tests
run: make apptest run: make integration-test

View File

@@ -16,7 +16,11 @@ on:
- 'app/vmui/packages/vmui/**' - 'app/vmui/packages/vmui/**'
- '.github/workflows/vmui.yml' - '.github/workflows/vmui.yml'
permissions: {} permissions:
contents: read
packages: read
pull-requests: read
checks: write
concurrency: concurrency:
cancel-in-progress: true cancel-in-progress: true
@@ -25,51 +29,41 @@ concurrency:
jobs: jobs:
vmui-checks: vmui-checks:
name: VMUI Checks (lint, test, typecheck) name: VMUI Checks (lint, test, typecheck)
permissions:
checks: write
contents: read
pull-requests: read
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Code checkout - name: Code checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 uses: actions/checkout@v5
- name: Cache node_modules - name: Setup Node
id: cache uses: actions/setup-node@v4
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with: with:
path: app/vmui/packages/vmui/node_modules node-version: '24.x'
key: vmui-deps-${{ runner.os }}-${{ hashFiles('app/vmui/packages/vmui/package-lock.json', 'app/vmui/Dockerfile-build') }}
restore-keys: |
vmui-deps-${{ runner.os }}-
- name: Install dependencies - name: Cache node-modules
if: steps.cache.outputs.cache-hit != 'true' uses: actions/cache@v4
run: make vmui-install with:
path: |
app/vmui/packages/vmui/node_modules
key: vmui-artifacts-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
restore-keys: vmui-artifacts-${{ runner.os }}-
- name: Run lint - name: Run lint
id: lint id: lint
run: make vmui-lint run: make vmui-lint
continue-on-error: true continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run tests - name: Run tests
id: test id: test
run: make vmui-test run: make vmui-test
continue-on-error: true continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run typecheck - name: Run typecheck
id: typecheck id: typecheck
run: make vmui-typecheck run: make vmui-typecheck
continue-on-error: true continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Annotate Code Linting Results - name: Annotate Code Linting Results
uses: ataylorme/eslint-annotate-action@d57a1193d4c59cbfbf3f86c271f42612f9dbd9e9 # 3.0.0 uses: ataylorme/eslint-annotate-action@v3
with: with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
report-json: app/vmui/packages/vmui/vmui-lint-report.json report-json: app/vmui/packages/vmui/vmui-lint-report.json

View File

@@ -175,7 +175,7 @@
END OF TERMS AND CONDITIONS END OF TERMS AND CONDITIONS
Copyright 2019-2026 VictoriaMetrics, Inc. Copyright 2019-2025 VictoriaMetrics, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.

View File

@@ -17,7 +17,7 @@ EXTRA_GO_BUILD_TAGS ?=
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)' GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
TAR_OWNERSHIP ?= --owner=1000 --group=1000 TAR_OWNERSHIP ?= --owner=1000 --group=1000
GOLANGCI_LINT_VERSION := 2.9.0 GOLANGCI_LINT_VERSION := 2.4.0
.PHONY: $(MAKECMDGOALS) .PHONY: $(MAKECMDGOALS)
@@ -125,15 +125,6 @@ vmutils-linux-ppc64le: \
vmrestore-linux-ppc64le \ vmrestore-linux-ppc64le \
vmctl-linux-ppc64le vmctl-linux-ppc64le
vmutils-linux-s390x: \
vmagent-linux-s390x \
vmalert-linux-s390x \
vmalert-tool-linux-s390x \
vmauth-linux-s390x \
vmbackup-linux-s390x \
vmrestore-linux-s390x \
vmctl-linux-s390x
vmutils-darwin-amd64: \ vmutils-darwin-amd64: \
vmagent-darwin-amd64 \ vmagent-darwin-amd64 \
vmalert-darwin-amd64 \ vmalert-darwin-amd64 \
@@ -266,7 +257,6 @@ release-victoria-metrics: \
release-victoria-metrics-linux-amd64 \ release-victoria-metrics-linux-amd64 \
release-victoria-metrics-linux-arm \ release-victoria-metrics-linux-arm \
release-victoria-metrics-linux-arm64 \ release-victoria-metrics-linux-arm64 \
release-victoria-metrics-linux-s390x \
release-victoria-metrics-darwin-amd64 \ release-victoria-metrics-darwin-amd64 \
release-victoria-metrics-darwin-arm64 \ release-victoria-metrics-darwin-arm64 \
release-victoria-metrics-freebsd-amd64 \ release-victoria-metrics-freebsd-amd64 \
@@ -285,9 +275,6 @@ release-victoria-metrics-linux-arm:
release-victoria-metrics-linux-arm64: release-victoria-metrics-linux-arm64:
GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch
release-victoria-metrics-linux-s390x:
GOOS=linux GOARCH=s390x $(MAKE) release-victoria-metrics-goos-goarch
release-victoria-metrics-darwin-amd64: release-victoria-metrics-darwin-amd64:
GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
@@ -327,7 +314,6 @@ release-vmutils: \
release-vmutils-linux-amd64 \ release-vmutils-linux-amd64 \
release-vmutils-linux-arm64 \ release-vmutils-linux-arm64 \
release-vmutils-linux-arm \ release-vmutils-linux-arm \
release-vmutils-linux-s390x \
release-vmutils-darwin-amd64 \ release-vmutils-darwin-amd64 \
release-vmutils-darwin-arm64 \ release-vmutils-darwin-arm64 \
release-vmutils-freebsd-amd64 \ release-vmutils-freebsd-amd64 \
@@ -346,9 +332,6 @@ release-vmutils-linux-arm64:
release-vmutils-linux-arm: release-vmutils-linux-arm:
GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch
release-vmutils-linux-s390x:
GOOS=linux GOARCH=s390x $(MAKE) release-vmutils-goos-goarch
release-vmutils-darwin-amd64: release-vmutils-darwin-amd64:
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
@@ -435,7 +418,7 @@ release-vmutils-windows-goarch: \
vmctl-windows-$(GOARCH)-prod.exe vmctl-windows-$(GOARCH)-prod.exe
pprof-cpu: pprof-cpu:
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics $(PPROF_FILE) go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
fmt: fmt:
gofmt -l -w -s ./lib gofmt -l -w -s ./lib
@@ -443,7 +426,7 @@ fmt:
gofmt -l -w -s ./apptest gofmt -l -w -s ./apptest
vet: vet:
go vet -tags 'synctest' ./lib/... GOEXPERIMENT=synctest go vet ./lib/...
go vet ./app/... go vet ./app/...
go vet ./apptest/... go vet ./apptest/...
@@ -452,55 +435,39 @@ check-all: fmt vet golangci-lint govulncheck
clean-checkers: remove-golangci-lint remove-govulncheck clean-checkers: remove-golangci-lint remove-govulncheck
test: test:
go test -tags 'synctest' ./lib/... ./app/... GOEXPERIMENT=synctest go test ./lib/... ./app/...
test-race: test-race:
go test -tags 'synctest' -race ./lib/... ./app/... GOEXPERIMENT=synctest go test -race ./lib/... ./app/...
test-386:
GOARCH=386 go test -tags 'synctest' ./lib/... ./app/...
test-pure: test-pure:
CGO_ENABLED=0 go test -tags 'synctest' ./lib/... ./app/... GOEXPERIMENT=synctest CGO_ENABLED=0 go test ./lib/... ./app/...
test-full: test-full:
go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/... GOEXPERIMENT=synctest go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
test-full-386: test-full-386:
GOARCH=386 go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/... GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
integration-test:
$(MAKE) apptest
apptest: apptest:
$(MAKE) victoria-metrics-race vmagent-race vmalert-race vmauth-race vmctl-race vmbackup-race vmrestore-race $(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
go test ./apptest/... -skip="^Test(Cluster|Legacy).*" go test ./apptest/... -skip="^TestCluster.*"
apptest-legacy: victoria-metrics-race vmbackup-race vmrestore-race
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
VERSION=v1.132.0; \
VMSINGLE=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}.tar.gz; \
VMCLUSTER=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}-cluster.tar.gz; \
URL=https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/$${VERSION}; \
DIR=/tmp/$${VERSION}; \
test -d $${DIR} || (mkdir $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
); \
VMSINGLE_V1_132_0_PATH=$${DIR}/victoria-metrics-prod \
VMSTORAGE_V1_132_0_PATH=$${DIR}/vmstorage-prod \
go test ./apptest/tests -run="^TestLegacySingle.*"
benchmark: benchmark:
go test -run=NO_TESTS -bench=. ./lib/... GOEXPERIMENT=synctest go test -bench=. ./lib/...
go test -run=NO_TESTS -bench=. ./app/... go test -bench=. ./app/...
benchmark-pure: benchmark-pure:
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./lib/... GOEXPERIMENT=synctest CGO_ENABLED=0 go test -bench=. ./lib/...
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./app/... CGO_ENABLED=0 go test -bench=. ./app/...
vendor-update: vendor-update:
go get -u ./lib/... go get -u ./lib/...
go get -u ./app/... go get -u ./app/...
go mod tidy -compat=1.26 go mod tidy -compat=1.24
go mod vendor go mod vendor
app-local: app-local:
@@ -516,15 +483,14 @@ app-local-windows-goarch:
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME) CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
quicktemplate-gen: install-qtc quicktemplate-gen: install-qtc
qtc -dir=lib qtc
qtc -dir=app
install-qtc: install-qtc:
which qtc || go install github.com/valyala/quicktemplate/qtc@latest which qtc || go install github.com/valyala/quicktemplate/qtc@latest
golangci-lint: install-golangci-lint golangci-lint: install-golangci-lint
golangci-lint run --build-tags 'synctest' GOEXPERIMENT=synctest golangci-lint run
install-golangci-lint: install-golangci-lint:
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION) which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)
@@ -535,15 +501,6 @@ remove-golangci-lint:
govulncheck: install-govulncheck govulncheck: install-govulncheck
govulncheck ./... govulncheck ./...
govulncheck-docker:
docker run -w $(PWD) -v $(PWD):$(PWD) \
-v govulncheck-gomod-cache:/root/go/pkg/mod \
-v govulncheck-gobuild-cache:/root/.cache/go-build \
-v govulncheck-go-bin:/root/go/bin \
--env="GOCACHE=/root/.cache/go-build" \
--env="GOMODCACHE=/root/go/pkg/mod" \
"$(GO_BUILDER_IMAGE)" /bin/sh -c "which govulncheck || go install golang.org/x/vuln/cmd/govulncheck@latest && govulncheck ./..."
install-govulncheck: install-govulncheck:
which govulncheck || go install golang.org/x/vuln/cmd/govulncheck@latest which govulncheck || go install golang.org/x/vuln/cmd/govulncheck@latest

View File

@@ -1,11 +1,12 @@
# VictoriaMetrics # VictoriaMetrics
[![Latest Release](https://img.shields.io/github/v/release/VictoriaMetrics/VictoriaMetrics?sort=semver&label=&filter=!*-victorialogs&logo=github&labelColor=gray&color=gray&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Freleases%2Flatest)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) [![Latest Release](https://img.shields.io/github/v/release/VictoriaMetrics/VictoriaMetrics?sort=semver&label=&filter=!*-victorialogs&logo=github&labelColor=gray&color=gray&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Freleases%2Flatest)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
[![Docker Pulls](https://img.shields.io/docker/pulls/victoriametrics/victoria-metrics?label=&logo=docker&logoColor=white&labelColor=2496ED&color=2496ED&link=https%3A%2F%2Fhub.docker.com%2Fr%2Fvictoriametrics%2Fvictoria-metrics)](https://hub.docker.com/u/victoriametrics) ![Docker Pulls](https://img.shields.io/docker/pulls/victoriametrics/victoria-metrics?label=&logo=docker&logoColor=white&labelColor=2496ED&color=2496ED&link=https%3A%2F%2Fhub.docker.com%2Fr%2Fvictoriametrics%2Fvictoria-metrics)
[![Go Report](https://goreportcard.com/badge/github.com/VictoriaMetrics/VictoriaMetrics?link=https%3A%2F%2Fgoreportcard.com%2Freport%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics)](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics) [![Go Report](https://goreportcard.com/badge/github.com/VictoriaMetrics/VictoriaMetrics?link=https%3A%2F%2Fgoreportcard.com%2Freport%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics)](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml/badge.svg?branch=master&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Factions)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml) [![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml/badge.svg?branch=master&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Factions)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml)
[![codecov](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics/branch/master/graph/badge.svg?link=https%3A%2F%2Fcodecov.io%2Fgh%2FVictoriaMetrics%2FVictoriaMetrics)](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
[![License](https://img.shields.io/github/license/VictoriaMetrics/VictoriaMetrics?labelColor=green&label=&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Fblob%2Fmaster%2FLICENSE)](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE) [![License](https://img.shields.io/github/license/VictoriaMetrics/VictoriaMetrics?labelColor=green&label=&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Fblob%2Fmaster%2FLICENSE)](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
[![Join Slack](https://img.shields.io/badge/Join%20Slack-4A154B?logo=slack)](https://slack.victoriametrics.com) ![Slack](https://img.shields.io/badge/Join-4A154B?logo=slack&link=https%3A%2F%2Fslack.victoriametrics.com)
[![X](https://img.shields.io/twitter/follow/VictoriaMetrics?style=flat&label=Follow&color=black&logo=x&labelColor=black&link=https%3A%2F%2Fx.com%2FVictoriaMetrics)](https://x.com/VictoriaMetrics/) [![X](https://img.shields.io/twitter/follow/VictoriaMetrics?style=flat&label=Follow&color=black&logo=x&labelColor=black&link=https%3A%2F%2Fx.com%2FVictoriaMetrics)](https://x.com/VictoriaMetrics/)
[![Reddit](https://img.shields.io/reddit/subreddit-subscribers/VictoriaMetrics?style=flat&label=Join&labelColor=red&logoColor=white&logo=reddit&link=https%3A%2F%2Fwww.reddit.com%2Fr%2FVictoriaMetrics)](https://www.reddit.com/r/VictoriaMetrics/) [![Reddit](https://img.shields.io/reddit/subreddit-subscribers/VictoriaMetrics?style=flat&label=Join&labelColor=red&logoColor=white&logo=reddit&link=https%3A%2F%2Fwww.reddit.com%2Fr%2FVictoriaMetrics)](https://www.reddit.com/r/VictoriaMetrics/)
@@ -15,21 +16,16 @@
<img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo"> <img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo">
</picture> </picture>
VictoriaMetrics is a fast, cost-effective, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes. VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
Here are some resources and information about VictoriaMetrics: Here are some resources and information about VictoriaMetrics:
- **Case studies**: [Grammarly, Roblox, Wix, Spotify,...](https://docs.victoriametrics.com/victoriametrics/casestudies/). - Documentation: [docs.victoriametrics.com](https://docs.victoriametrics.com)
- **Available**: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), Docker images on [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics). - Case studies: [Grammarly, Roblox, Wix,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
- **Deployment types**: [Single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) under [Apache License 2.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE). - Available: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), docker images [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics)
- **Getting started:** Read [key concepts](https://docs.victoriametrics.com/victoriametrics/keyconcepts/) and follow the - Deployment types: [Single-node version](https://docs.victoriametrics.com/), [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/), and [Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/)
[quick start guide](https://docs.victoriametrics.com/victoriametrics/quick-start/). - Changelog: [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics)
- **Community**: [Slack](https://slack.victoriametrics.com/) (join via [Slack Inviter](https://slack.victoriametrics.com/)), [X (Twitter)](https://x.com/VictoriaMetrics), [YouTube](https://www.youtube.com/@VictoriaMetrics). See full list [here](https://docs.victoriametrics.com/victoriametrics/#community-and-contributions). - Community: [Slack](https://slack.victoriametrics.com/), [X (Twitter)](https://x.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
- **Changelog**: Project evolves fast - check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).
- **Enterprise support:** [Contact us](mailto:info@victoriametrics.com) for commercial support with additional [enterprise features](https://docs.victoriametrics.com/victoriametrics/enterprise/).
- **Enterprise releases:** Enterprise and [long-term support releases (LTS)](https://docs.victoriametrics.com/victoriametrics/lts-releases/) are publicly available and can be evaluated for free
using a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
- **Security:** we achieved [security certifications](https://victoriametrics.com/security/) for Database Software Development and Software-Based Monitoring Services.
Yes, we open-source both the single-node VictoriaMetrics and the cluster version. Yes, we open-source both the single-node VictoriaMetrics and the cluster version.

View File

@@ -1,4 +1,18 @@
# Security Policy # Security Policy
You can find out about our security policy and VictoriaMetrics version support on the [security page](https://docs.victoriametrics.com/victoriametrics/#security) in the documentation. ## Supported Versions
The following versions of VictoriaMetrics receive regular security fixes:
| Version | Supported |
|---------|--------------------|
| [latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
| v1.102.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| v1.110.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| other releases | :x: |
See [this page](https://victoriametrics.com/security/) for more details.
## Reporting a Vulnerability
Please report any security issues to <security@victoriametrics.com>

View File

@@ -27,9 +27,6 @@ victoria-metrics-linux-ppc64le-prod:
victoria-metrics-linux-386-prod: victoria-metrics-linux-386-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386 APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386
victoria-metrics-linux-s390x-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-s390x
victoria-metrics-darwin-amd64-prod: victoria-metrics-darwin-amd64-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64 APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64

View File

@@ -22,6 +22,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics" "github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
) )
var ( var (
@@ -29,26 +30,23 @@ var (
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+ useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+ "See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing") "With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Leave only the last sample in every time series per each discrete interval "+
"equal to -dedup.minScrapeInterval > 0. See also -streamAggr.dedupInterval and https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication")
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running VictoriaMetrics. The following config files are checked: "+ dryRun = flag.Bool("dryRun", false, "Whether to check config files without running VictoriaMetrics. The following config files are checked: "+
"-promscrape.config, -relabelConfig and -streamAggr.config. Unknown config entries aren't allowed in -promscrape.config by default. "+ "-promscrape.config, -relabelConfig and -streamAggr.config. Unknown config entries aren't allowed in -promscrape.config by default. "+
"This can be changed with -promscrape.config.strictParse=false command-line flag") "This can be changed with -promscrape.config.strictParse=false command-line flag")
inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+
"The saved data survives unclean shutdowns such as OOM crash, hardware reset, SIGKILL, etc. "+
"Bigger intervals may help increase the lifetime of flash storage with limited write cycles (e.g. Raspberry PI). "+
"Smaller intervals increase disk IO load. Minimum supported value is 1s")
maxIngestionRate = flag.Int("maxIngestionRate", 0, "The maximum number of samples vmsingle can receive per second. Data ingestion is paused when the limit is exceeded. "+ maxIngestionRate = flag.Int("maxIngestionRate", 0, "The maximum number of samples vmsingle can receive per second. Data ingestion is paused when the limit is exceeded. "+
"By default there are no limits on samples ingestion rate.") "By default there are no limits on samples ingestion rate.")
vmselectMaxConcurrentRequests = flag.Int("search.maxConcurrentRequests", getDefaultMaxConcurrentRequests(), "The maximum number of concurrent search requests. "+ finalDedupScheduleInterval = flag.Duration("storage.finalDedupScheduleCheckInterval", time.Hour, "The interval for checking when final deduplication process should be started."+
"It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. "+ "Storage unconditionally adds 25% jitter to the interval value on each check evaluation."+
"See also -search.maxQueueDuration and -search.maxMemoryPerQuery") " Changing the interval to the bigger values may delay downsampling, deduplication for historical data."+
vmselectMaxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when -search.maxConcurrentRequests "+ " See also https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication")
"limit is reached; see also -search.maxQueryDuration")
) )
func getDefaultMaxConcurrentRequests() int {
// A single request can saturate all the CPU cores, so there is no sense
// in allowing higher number of concurrent requests - they will just contend
// for unavailable CPU time.
n := min(cgroup.AvailableCPUs()*2, 16)
return n
}
func main() { func main() {
// VictoriaMetrics is optimized for reduced memory allocations, // VictoriaMetrics is optimized for reduced memory allocations,
// so it can run with the reduced GOGC in order to reduce the used memory, // so it can run with the reduced GOGC in order to reduce the used memory,
@@ -89,8 +87,14 @@ func main() {
} }
logger.Infof("starting VictoriaMetrics at %q...", listenAddrs) logger.Infof("starting VictoriaMetrics at %q...", listenAddrs)
startTime := time.Now() startTime := time.Now()
vmstorage.Init(*vmselectMaxConcurrentRequests, promql.ResetRollupResultCacheIfNeeded) storage.SetDedupInterval(*minScrapeInterval)
vmselect.Init(*vmselectMaxConcurrentRequests, *vmselectMaxQueueDuration) storage.SetDataFlushInterval(*inmemoryDataFlushInterval)
if *finalDedupScheduleInterval < time.Hour {
logger.Fatalf("-dedup.finalDedupScheduleCheckInterval cannot be smaller than 1 hour; got %s", *finalDedupScheduleInterval)
}
storage.SetFinalDedupScheduleInterval(*finalDedupScheduleInterval)
vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
vmselect.Init()
vminsertcommon.StartIngestionRateLimiter(*maxIngestionRate) vminsertcommon.StartIngestionRateLimiter(*maxIngestionRate)
vminsert.Init() vminsert.Init()
@@ -114,8 +118,8 @@ func main() {
logger.Fatalf("cannot stop the webservice: %s", err) logger.Fatalf("cannot stop the webservice: %s", err)
} }
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds()) logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
vminsertcommon.StopIngestionRateLimiter()
vminsert.Stop() vminsert.Stop()
vminsertcommon.StopIngestionRateLimiter()
vmstorage.Stop() vmstorage.Stop()
vmselect.Stop() vmselect.Stop()
@@ -130,7 +134,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
} }
w.Header().Add("Content-Type", "text/html; charset=utf-8") w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>") fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>")
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>") fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>") fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{ httpserver.WriteAPIHelp(w, [][2]string{
@@ -166,7 +169,7 @@ func usage() {
const s = ` const s = `
victoria-metrics is a time series database and monitoring solution. victoria-metrics is a time series database and monitoring solution.
See the docs at https://docs.victoriametrics.com/victoriametrics/ See the docs at https://docs.victoriametrics.com/
` `
flagutil.Usage(s) flagutil.Usage(s)
} }

View File

@@ -10,11 +10,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal" "github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits" "github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
) )
@@ -29,9 +27,11 @@ var selfScraperWG sync.WaitGroup
func startSelfScraper() { func startSelfScraper() {
selfScraperStopCh = make(chan struct{}) selfScraperStopCh = make(chan struct{})
selfScraperWG.Go(func() { selfScraperWG.Add(1)
go func() {
defer selfScraperWG.Done()
selfScraper(*selfScrapeInterval) selfScraper(*selfScrapeInterval)
}) }()
} }
func stopSelfScraper() { func stopSelfScraper() {
@@ -48,7 +48,6 @@ func selfScraper(scrapeInterval time.Duration) {
var bb bytesutil.ByteBuffer var bb bytesutil.ByteBuffer
var rows prometheus.Rows var rows prometheus.Rows
var metadataRows prometheus.MetadataRows
var mrs []storage.MetricRow var mrs []storage.MetricRow
var labels []prompb.Label var labels []prompb.Label
t := time.NewTicker(scrapeInterval) t := time.NewTicker(scrapeInterval)
@@ -58,12 +57,8 @@ func selfScraper(scrapeInterval time.Duration) {
appmetrics.WritePrometheusMetrics(&bb) appmetrics.WritePrometheusMetrics(&bb)
s := bytesutil.ToUnsafeString(bb.B) s := bytesutil.ToUnsafeString(bb.B)
rows.Reset() rows.Reset()
// Parse metrics and optionally metadata when enabled // VictoriaMetrics components don't expose metadata yet, only need to parse samples
if prommetadata.IsEnabled() { rows.UnmarshalWithErrLogger(s, nil)
rows, metadataRows = prometheus.UnmarshalWithMetadata(rows, metadataRows, s, nil)
} else {
rows.UnmarshalWithErrLogger(s, nil)
}
mrs = mrs[:0] mrs = mrs[:0]
for i := range rows.Rows { for i := range rows.Rows {
r := &rows.Rows[i] r := &rows.Rows[i]
@@ -93,22 +88,9 @@ func selfScraper(scrapeInterval time.Duration) {
mr.Value = r.Value mr.Value = r.Value
} }
} }
if err := vmstorage.VMInsertAPI.WriteRows(mrs); err != nil { if err := vmstorage.AddRows(mrs); err != nil {
logger.Errorf("cannot store self-scraped metrics: %s", err) logger.Errorf("cannot store self-scraped metrics: %s", err)
} }
if len(metadataRows.Rows) > 0 {
mms := make([]metricsmetadata.Row, 0, len(metadataRows.Rows))
for _, mm := range metadataRows.Rows {
mms = append(mms, metricsmetadata.Row{
MetricFamilyName: bytesutil.ToUnsafeBytes(mm.Metric),
Help: bytesutil.ToUnsafeBytes(mm.Help),
Type: mm.Type,
})
}
if err := vmstorage.VMInsertAPI.WriteMetadata(mms); err != nil {
logger.Errorf("cannot store self-scraped metrics metadata: %s", err)
}
}
} }
for { for {
select { select {

View File

@@ -33,13 +33,13 @@ func PopulateTimeTpl(b []byte, tGlobal time.Time) []byte {
} }
switch strings.TrimSpace(parts[0]) { switch strings.TrimSpace(parts[0]) {
case `TIME_S`: case `TIME_S`:
return fmt.Appendf(nil, "%d", t.Unix()) return []byte(fmt.Sprintf("%d", t.Unix()))
case `TIME_MSZ`: case `TIME_MSZ`:
return fmt.Appendf(nil, "%d", t.Unix()*1e3) return []byte(fmt.Sprintf("%d", t.Unix()*1e3))
case `TIME_MS`: case `TIME_MS`:
return fmt.Appendf(nil, "%d", timeToMillis(t)) return []byte(fmt.Sprintf("%d", timeToMillis(t)))
case `TIME_NS`: case `TIME_NS`:
return fmt.Appendf(nil, "%d", t.UnixNano()) return []byte(fmt.Sprintf("%d", t.UnixNano()))
default: default:
log.Fatalf("unknown time pattern %s in %s", parts[0], repl) log.Fatalf("unknown time pattern %s in %s", parts[0], repl)
} }

View File

@@ -27,9 +27,6 @@ vmagent-linux-ppc64le-prod:
vmagent-linux-386-prod: vmagent-linux-386-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-386 APP_NAME=vmagent $(MAKE) app-via-docker-linux-386
vmagent-linux-s390x-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-s390x
vmagent-darwin-amd64-prod: vmagent-darwin-amd64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64 APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64

View File

@@ -49,11 +49,6 @@ func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels
Name: "__name__", Name: "__name__",
Value: m.Name, Value: m.Name,
}) })
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10557
labels = append(labels, prompb.Label{
Name: "host",
Value: sketch.Host,
})
for _, label := range m.Labels { for _, label := range m.Labels {
labels = append(labels, prompb.Label{ labels = append(labels, prompb.Label{
Name: label.Name, Name: label.Name,
@@ -62,6 +57,9 @@ func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels
} }
for _, tag := range sketch.Tags { for _, tag := range sketch.Tags {
name, value := datadogutil.SplitTag(tag) name, value := datadogutil.SplitTag(tag)
if name == "host" {
name = "exported_host"
}
labels = append(labels, prompb.Label{ labels = append(labels, prompb.Label{
Name: name, Name: name,
Value: value, Value: value,

View File

@@ -27,7 +27,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo" "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -75,7 +74,7 @@ var (
"See also -opentsdbHTTPListenAddr.useProxyProtocol") "See also -opentsdbHTTPListenAddr.useProxyProtocol")
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+ opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt") "at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config and /remotewrite-.*-config pages. It must be passed via authKey query arg. It overrides -httpAuth.*") configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config page. It must be passed via authKey query arg. It overrides -httpAuth.*")
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*") reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+ dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+ "-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
@@ -83,9 +82,6 @@ var (
maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 0, "The maximum number of labels per time series to be accepted. Series with superfluous labels are ignored. In this case the vm_rows_ignored_total{reason=\"too_many_labels\"} metric at /metrics page is incremented") maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 0, "The maximum number of labels per time series to be accepted. Series with superfluous labels are ignored. In this case the vm_rows_ignored_total{reason=\"too_many_labels\"} metric at /metrics page is incremented")
maxLabelNameLen = flag.Int("maxLabelNameLen", 0, "The maximum length of label names in the accepted time series. Series with longer label name are ignored. In this case the vm_rows_ignored_total{reason=\"too_long_label_name\"} metric at /metrics page is incremented") maxLabelNameLen = flag.Int("maxLabelNameLen", 0, "The maximum length of label names in the accepted time series. Series with longer label name are ignored. In this case the vm_rows_ignored_total{reason=\"too_long_label_name\"} metric at /metrics page is incremented")
maxLabelValueLen = flag.Int("maxLabelValueLen", 0, "The maximum length of label values in the accepted time series. Series with longer label value are ignored. In this case the vm_rows_ignored_total{reason=\"too_long_label_value\"} metric at /metrics page is incremented") maxLabelValueLen = flag.Int("maxLabelValueLen", 0, "The maximum length of label values in the accepted time series. Series with longer label value are ignored. In this case the vm_rows_ignored_total{reason=\"too_long_label_value\"} metric at /metrics page is incremented")
enableMultitenancyViaHeaders = flag.Bool("enableMultitenancyViaHeaders", false, "Enables multitenancy via HTTP headers. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#multitenancy")
) )
var ( var (
@@ -118,7 +114,6 @@ func main() {
remotewrite.InitSecretFlags() remotewrite.InitSecretFlags()
buildinfo.Init() buildinfo.Init()
logger.Init() logger.Init()
opentelemetry.Init()
timeserieslimits.Init(*maxLabelsPerTimeseries, *maxLabelNameLen, *maxLabelValueLen) timeserieslimits.Init(*maxLabelsPerTimeseries, *maxLabelNameLen, *maxLabelValueLen)
if promscrape.IsDryRun() { if promscrape.IsDryRun() {
@@ -220,7 +215,7 @@ func getOpenTSDBHTTPInsertHandler() func(req *http.Request) error {
} }
return func(req *http.Request) error { return func(req *http.Request) error {
path := strings.ReplaceAll(req.URL.Path, "//", "/") path := strings.ReplaceAll(req.URL.Path, "//", "/")
at, err := getAuthTokenFromPath(path, req.Header) at, err := getAuthTokenFromPath(path)
if err != nil { if err != nil {
return fmt.Errorf("cannot obtain auth token from path %q: %w", path, err) return fmt.Errorf("cannot obtain auth token from path %q: %w", path, err)
} }
@@ -228,15 +223,8 @@ func getOpenTSDBHTTPInsertHandler() func(req *http.Request) error {
} }
} }
func parsePath(path string, header http.Header) (*httpserver.Path, error) { func getAuthTokenFromPath(path string) (*auth.Token, error) {
if *enableMultitenancyViaHeaders { p, err := httpserver.ParsePath(path)
return httpserver.ParsePathAndHeaders(path, header)
}
return httpserver.ParsePath(path)
}
func getAuthTokenFromPath(path string, header http.Header) (*auth.Token, error) {
p, err := parsePath(path, header)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot parse multitenant path: %w", err) return nil, fmt.Errorf("cannot parse multitenant path: %w", err)
} }
@@ -256,7 +244,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
} }
w.Header().Add("Content-Type", "text/html; charset=utf-8") w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>vmagent</h2>") fmt.Fprintf(w, "<h2>vmagent</h2>")
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>") fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>") fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{ httpserver.WriteAPIHelp(w, [][2]string{
@@ -265,8 +252,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
{"metric-relabel-debug", "debug metric relabeling"}, {"metric-relabel-debug", "debug metric relabeling"},
{"api/v1/targets", "advanced information about discovered targets in JSON format"}, {"api/v1/targets", "advanced information about discovered targets in JSON format"},
{"config", "-promscrape.config contents"}, {"config", "-promscrape.config contents"},
{"remotewrite-relabel-config", "-remoteWrite.relabelConfig contents"},
{"remotewrite-url-relabel-config", "-remoteWrite.urlRelabelConfig contents"},
{"metrics", "available service metrics"}, {"metrics", "available service metrics"},
{"flags", "command-line flags"}, {"flags", "command-line flags"},
{"-/reload", "reload configuration"}, {"-/reload", "reload configuration"},
@@ -363,17 +348,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
} }
firehose.WriteSuccessResponse(w, r) firehose.WriteSuccessResponse(w, r)
return true return true
case "/zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(nil, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "/newrelic": case "/newrelic":
newrelicCheckRequest.Inc() newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
@@ -503,42 +477,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
promscrape.WriteConfigData(&bb) promscrape.WriteConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B))) fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true return true
case "/remotewrite-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
remotewrite.WriteRelabelConfigData(w)
return true
case "/api/v1/status/remotewrite-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteStatusRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
remotewrite.WriteRelabelConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/remotewrite-url-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteURLRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
remotewrite.WriteURLRelabelConfigData(w)
return true
case "/api/v1/status/remotewrite-url-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteStatusURLRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
remotewrite.WriteURLRelabelConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/prometheus/-/reload", "/-/reload": case "/prometheus/-/reload", "/-/reload":
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) { if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
return true return true
@@ -570,15 +508,14 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
} }
func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path string) bool { func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path string) bool {
p, err := parsePath(path, r.Header) p, err := httpserver.ParsePath(path)
if err != nil { if err != nil {
// Cannot parse multitenant path. Skip it - probably it will be parsed later. // Cannot parse multitenant path. Skip it - probably it will be parsed later.
return false return false
} }
if p.Prefix != "insert" { if p.Prefix != "insert" {
// processMultitenantRequest is called for all unmatched path variants, httpserver.Errorf(w, r, `unsupported multitenant prefix: %q; expected "insert"`, p.Prefix)
// but we should try parsing only /insert prefixed to avoid catching all possible paths. return true
return false
} }
at, err := auth.NewTokenPossibleMultitenant(p.AuthToken) at, err := auth.NewTokenPossibleMultitenant(p.AuthToken)
if err != nil { if err != nil {
@@ -669,17 +606,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
} }
firehose.WriteSuccessResponse(w, r) firehose.WriteSuccessResponse(w, r)
return true return true
case "zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(at, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "newrelic": case "newrelic":
newrelicCheckRequest.Inc() newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
@@ -801,9 +727,6 @@ var (
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`) opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`) opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
zabbixconnectorHistoryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
zabbixconnectorHistoryErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`) newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`) newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
@@ -824,12 +747,6 @@ var (
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`) promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`) promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`)
remoteWriteRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-relabel-config"}`)
remoteWriteStatusRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-relabel-config"}`)
remoteWriteURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-url-relabel-config"}`)
remoteWriteStatusURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-url-relabel-config"}`)
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`) promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
) )

View File

@@ -78,7 +78,7 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompb.Label)
if !remotewrite.TryPush(at, &ctx.WriteRequest) { if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry return remotewrite.ErrQueueFullHTTPRetry
} }
rowsInserted.Add(samplesCount) rowsInserted.Add(len(rows))
if at != nil { if at != nil {
rowsTenantInserted.Get(at).Add(samplesCount) rowsTenantInserted.Get(at).Add(samplesCount)
} }

View File

@@ -2,14 +2,13 @@ package opentelemetry
import ( import (
"fmt" "fmt"
"io"
"net/http" "net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
@@ -25,18 +24,6 @@ var (
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`) rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
) )
// Init must be called after flag.Parse and before using the opentelemetry package.
func Init() {
stream.InitDecodeOptions()
}
// InsertHandlerForReader processes metrics from given reader.
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
return insertRows(at, tss, mms, nil)
})
}
// InsertHandler processes opentelemetry metrics. // InsertHandler processes opentelemetry metrics.
func InsertHandler(at *auth.Token, req *http.Request) error { func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req) extraLabels, err := protoparserutil.GetExtraLabels(req)
@@ -81,7 +68,17 @@ func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMeta
ctx.WriteRequest.Timeseries = tssDst ctx.WriteRequest.Timeseries = tssDst
var metadataTotal int var metadataTotal int
if prommetadata.IsEnabled() { if promscrape.IsMetadataEnabled() {
var accountID, projectID uint32
if at != nil {
accountID = at.AccountID
projectID = at.ProjectID
for i := range mms {
mm := &mms[i]
mm.AccountID = accountID
mm.ProjectID = projectID
}
}
ctx.WriteRequest.Metadata = mms ctx.WriteRequest.Metadata = mms
metadataTotal = len(mms) metadataTotal = len(mms)
} }

View File

@@ -7,8 +7,8 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
@@ -36,7 +36,7 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
return err return err
} }
encoding := req.Header.Get("Content-Encoding") encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, defaultTimestamp, encoding, true, prommetadata.IsEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error { return stream.Parse(req.Body, defaultTimestamp, encoding, true, promscrape.IsMetadataEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
return insertRows(at, rows, mms, extraLabels) return insertRows(at, rows, mms, extraLabels)
}, func(s string) { }, func(s string) {
httpserver.LogError(req, s) httpserver.LogError(req, s)
@@ -75,6 +75,11 @@ func insertRows(at *auth.Token, rows []prometheus.Row, mms []prometheus.Metadata
Samples: samples[len(samples)-1:], Samples: samples[len(samples)-1:],
}) })
} }
var accountID, projectID uint32
if at != nil {
accountID = at.AccountID
projectID = at.ProjectID
}
for i := range mms { for i := range mms {
mm := &mms[i] mm := &mms[i]
mmsDst = append(mmsDst, prompb.MetricMetadata{ mmsDst = append(mmsDst, prompb.MetricMetadata{
@@ -83,6 +88,8 @@ func insertRows(at *auth.Token, rows []prometheus.Row, mms []prometheus.Metadata
Type: mm.Type, Type: mm.Type,
// there is no unit in Prometheus exposition formats // there is no unit in Prometheus exposition formats
AccountID: accountID,
ProjectID: projectID,
}) })
} }
ctx.WriteRequest.Timeseries = tssDst ctx.WriteRequest.Timeseries = tssDst

View File

@@ -6,8 +6,8 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics" "github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
@@ -71,7 +71,12 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
ctx.WriteRequest.Timeseries = tssDst ctx.WriteRequest.Timeseries = tssDst
var metadataTotal int var metadataTotal int
if prommetadata.IsEnabled() { if promscrape.IsMetadataEnabled() {
var accountID, projectID uint32
if at != nil {
accountID = at.AccountID
projectID = at.ProjectID
}
for i := range mms { for i := range mms {
mm := &mms[i] mm := &mms[i]
mmsDst = append(mmsDst, prompb.MetricMetadata{ mmsDst = append(mmsDst, prompb.MetricMetadata{
@@ -80,8 +85,8 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
Type: mm.Type, Type: mm.Type,
Unit: mm.Unit, Unit: mm.Unit,
AccountID: mm.AccountID, AccountID: accountID,
ProjectID: mm.ProjectID, ProjectID: projectID,
}) })
} }
ctx.WriteRequest.Metadata = mmsDst ctx.WriteRequest.Metadata = mmsDst

View File

@@ -2,7 +2,6 @@ package remotewrite
import ( import (
"bytes" "bytes"
"context"
"errors" "errors"
"fmt" "fmt"
"io" "io"
@@ -14,18 +13,19 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/VictoriaMetrics/metrics"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi" "github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue" "github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter" "github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/metrics"
"github.com/golang/snappy"
) )
var ( var (
@@ -60,8 +60,6 @@ var (
"Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'") "Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'")
basicAuthUsername = flagutil.NewArrayString("remoteWrite.basicAuth.username", "Optional basic auth username to use for the corresponding -remoteWrite.url") basicAuthUsername = flagutil.NewArrayString("remoteWrite.basicAuth.username", "Optional basic auth username to use for the corresponding -remoteWrite.url")
basicAuthUsernameFile = flagutil.NewArrayString("remoteWrite.basicAuth.usernameFile", "Optional path to basic auth username to use for the corresponding -remoteWrite.url. "+
"The file is re-read every second")
basicAuthPassword = flagutil.NewArrayString("remoteWrite.basicAuth.password", "Optional basic auth password to use for the corresponding -remoteWrite.url") basicAuthPassword = flagutil.NewArrayString("remoteWrite.basicAuth.password", "Optional basic auth password to use for the corresponding -remoteWrite.url")
basicAuthPasswordFile = flagutil.NewArrayString("remoteWrite.basicAuth.passwordFile", "Optional path to basic auth password to use for the corresponding -remoteWrite.url. "+ basicAuthPasswordFile = flagutil.NewArrayString("remoteWrite.basicAuth.passwordFile", "Optional path to basic auth password to use for the corresponding -remoteWrite.url. "+
"The file is re-read every second") "The file is re-read every second")
@@ -205,10 +203,14 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL)) c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL)) c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 { metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
return float64(concurrency) return float64(*queues)
}) })
for range concurrency { for i := 0; i < concurrency; i++ {
c.wg.Go(c.runWorker) c.wg.Add(1)
go func() {
defer c.wg.Done()
c.runWorker()
}()
} }
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL) logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
} }
@@ -226,14 +228,12 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) {
hdrs = strings.Split(headersValue, "^^") hdrs = strings.Split(headersValue, "^^")
} }
username := basicAuthUsername.GetOptionalArg(argIdx) username := basicAuthUsername.GetOptionalArg(argIdx)
usernameFile := basicAuthUsernameFile.GetOptionalArg(argIdx)
password := basicAuthPassword.GetOptionalArg(argIdx) password := basicAuthPassword.GetOptionalArg(argIdx)
passwordFile := basicAuthPasswordFile.GetOptionalArg(argIdx) passwordFile := basicAuthPasswordFile.GetOptionalArg(argIdx)
var basicAuthCfg *promauth.BasicAuthConfig var basicAuthCfg *promauth.BasicAuthConfig
if username != "" || usernameFile != "" || password != "" || passwordFile != "" { if username != "" || password != "" || passwordFile != "" {
basicAuthCfg = &promauth.BasicAuthConfig{ basicAuthCfg = &promauth.BasicAuthConfig{
Username: username, Username: username,
UsernameFile: usernameFile,
Password: promauth.NewSecret(password), Password: promauth.NewSecret(password),
PasswordFile: passwordFile, PasswordFile: passwordFile,
} }
@@ -295,7 +295,7 @@ func getAWSAPIConfig(argIdx int) (*awsapi.Config, error) {
accessKey := awsAccessKey.GetOptionalArg(argIdx) accessKey := awsAccessKey.GetOptionalArg(argIdx)
secretKey := awsSecretKey.GetOptionalArg(argIdx) secretKey := awsSecretKey.GetOptionalArg(argIdx)
service := awsService.GetOptionalArg(argIdx) service := awsService.GetOptionalArg(argIdx)
cfg, err := awsapi.NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, service, "") cfg, err := awsapi.NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, service)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -311,6 +311,11 @@ func (c *client) runWorker() {
if !ok { if !ok {
return return
} }
if len(block) == 0 {
// skip empty data blocks from sending
// see https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6241
continue
}
go func() { go func() {
startTime := time.Now() startTime := time.Now()
ch <- c.sendBlock(block) ch <- c.sendBlock(block)
@@ -326,20 +331,15 @@ func (c *client) runWorker() {
c.fq.MustWriteBlockIgnoreDisabledPQ(block) c.fq.MustWriteBlockIgnoreDisabledPQ(block)
return return
case <-c.stopCh: case <-c.stopCh:
// c must be stopped. Wait up to 5 seconds for the in-flight request to complete. // c must be stopped. Wait for a while in the hope the block will be sent.
// If it succeeds, drain the remaining in-memory queue before returning. graceDuration := 5 * time.Second
stopCtx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
select { select {
case ok := <-ch: case ok := <-ch:
if !ok { if !ok {
// Return unsent block to the queue. // Return unsent block to the queue.
c.fq.MustWriteBlockIgnoreDisabledPQ(block) c.fq.MustWriteBlockIgnoreDisabledPQ(block)
} else {
c.drainInMemoryQueue(stopCtx, block[:0])
} }
case <-stopCtx.Done(): case <-time.After(graceDuration):
// Return unsent block to the queue. // Return unsent block to the queue.
c.fq.MustWriteBlockIgnoreDisabledPQ(block) c.fq.MustWriteBlockIgnoreDisabledPQ(block)
} }
@@ -410,7 +410,8 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
// Otherwise, it tries sending the block to remote storage indefinitely. // Otherwise, it tries sending the block to remote storage indefinitely.
func (c *client) sendBlockHTTP(block []byte) bool { func (c *client) sendBlockHTTP(block []byte) bool {
c.rl.Register(len(block)) c.rl.Register(len(block))
bt := timeutil.NewBackoffTimer(c.retryMinInterval, c.retryMaxInterval) maxRetryDuration := timeutil.AddJitterToDuration(c.retryMaxInterval)
retryDuration := timeutil.AddJitterToDuration(c.retryMinInterval)
retriesCount := 0 retriesCount := 0
again: again:
@@ -419,10 +420,19 @@ again:
c.requestDuration.UpdateDuration(startTime) c.requestDuration.UpdateDuration(startTime)
if err != nil { if err != nil {
c.errorsCount.Inc() c.errorsCount.Inc()
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %s", retryDuration *= 2
len(block), c.sanitizedURL, err, bt.CurrentDelay()) if retryDuration > maxRetryDuration {
if !bt.Wait(c.stopCh) { retryDuration = maxRetryDuration
}
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
len(block), c.sanitizedURL, err, retryDuration.Seconds())
t := timerpool.Get(retryDuration)
select {
case <-c.stopCh:
timerpool.Put(t)
return false return false
case <-t.C:
timerpool.Put(t)
} }
c.retriesCount.Inc() c.retriesCount.Inc()
goto again goto again
@@ -453,6 +463,12 @@ again:
// - Real-world implementations of v1 use both 400 and 415 status codes. // - Real-world implementations of v1 use both 400 and 415 status codes.
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054 // See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
case 415, 400: case 415, 400:
if c.canDowngradeVMProto.Swap(false) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
c.useVMProto.Store(false)
}
if encoding.IsZstd(block) { if encoding.IsZstd(block) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+ logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL) "See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
@@ -471,7 +487,7 @@ again:
goto again goto again
} }
logger.Warnf("failed to repack zstd block (%d bytes) to snappy: %s; The block will be rejected. "+ logger.Warnf("failed to repack zstd block (%s bytes) to snappy: %s; The block will be rejected. "+
"Possible cause: ungraceful shutdown leading to persisted queue corruption.", "Possible cause: ungraceful shutdown leading to persisted queue corruption.",
zstdBlockLen, err) zstdBlockLen, err)
} }
@@ -488,10 +504,7 @@ again:
// Unexpected status code returned // Unexpected status code returned
retriesCount++ retriesCount++
retryAfterHeader := parseRetryAfterHeader(resp.Header.Get("Retry-After")) retryAfterHeader := parseRetryAfterHeader(resp.Header.Get("Retry-After"))
// retryAfterDuration has the highest priority duration retryDuration = getRetryDuration(retryAfterHeader, retryDuration, maxRetryDuration)
if retryAfterHeader > 0 {
bt.SetDelay(retryAfterHeader)
}
// Handle response // Handle response
body, err := io.ReadAll(resp.Body) body, err := io.ReadAll(resp.Body)
@@ -500,44 +513,44 @@ again:
logger.Errorf("cannot read response body from %q during retry #%d: %s", c.sanitizedURL, retriesCount, err) logger.Errorf("cannot read response body from %q during retry #%d: %s", c.sanitizedURL, retriesCount, err)
} else { } else {
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+ logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+
"re-sending the block in %s", len(block), c.sanitizedURL, retriesCount, statusCode, body, bt.CurrentDelay()) "re-sending the block in %.3f seconds", len(block), c.sanitizedURL, retriesCount, statusCode, body, retryDuration.Seconds())
} }
if !bt.Wait(c.stopCh) { t := timerpool.Get(retryDuration)
select {
case <-c.stopCh:
timerpool.Put(t)
return false return false
case <-t.C:
timerpool.Put(t)
} }
c.retriesCount.Inc() c.retriesCount.Inc()
goto again goto again
} }
func (c *client) drainInMemoryQueue(stopCtx context.Context, block []byte) {
var ok bool
for {
select {
case <-stopCtx.Done():
return
default:
}
block, ok = c.fq.MustReadInMemoryBlock(block[:0])
if !ok {
// The in memory queue has already been drained,
// or persisted queue is being used.
// In this case it is guaranteed that fq will be empty
return
}
// at this stage c.stopCh should be closed
// so sendBlock function should not perform retries
if ok := c.sendBlock(block); !ok {
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
return
}
}
}
var remoteWriteRejectedLogger = logger.WithThrottler("remoteWriteRejected", 5*time.Second) var remoteWriteRejectedLogger = logger.WithThrottler("remoteWriteRejected", 5*time.Second)
var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Second) var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Second)
// getRetryDuration returns retry duration.
// retryAfterDuration has the highest priority.
// If retryAfterDuration is not specified, retryDuration gets doubled.
// retryDuration can't exceed maxRetryDuration.
//
// Also see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6097
func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.Duration) time.Duration {
// retryAfterDuration has the highest priority duration
if retryAfterDuration > 0 {
return timeutil.AddJitterToDuration(retryAfterDuration)
}
// default backoff retry policy
retryDuration *= 2
if retryDuration > maxRetryDuration {
retryDuration = maxRetryDuration
}
return retryDuration
}
// repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block. // repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block.
// //
// The input block may be corrupted, for example, if vmagent was shut down ungracefully and // The input block may be corrupted, for example, if vmagent was shut down ungracefully and
@@ -547,9 +560,9 @@ var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Sec
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417 // For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) { func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
plainBlock := make([]byte, 0, len(zstdBlock)*2) plainBlock := make([]byte, 0, len(zstdBlock)*2)
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock) plainBlock, err := zstd.Decompress(plainBlock, zstdBlock)
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("zstd: decompress: %s", err)
} }
return snappy.Encode(nil, plainBlock), nil return snappy.Encode(nil, plainBlock), nil
@@ -568,20 +581,24 @@ func logBlockRejected(block []byte, sanitizedURL string, resp *http.Response) {
} }
// parseRetryAfterHeader parses `Retry-After` value retrieved from HTTP response header. // parseRetryAfterHeader parses `Retry-After` value retrieved from HTTP response header.
// // retryAfterString should be in either HTTP-date or a number of seconds.
// s should be in either HTTP-date or a number of seconds. // It will return time.Duration(0) if `retryAfterString` does not follow RFC 7231.
// It returns time.Duration(0) if s does not follow RFC 7231. func parseRetryAfterHeader(retryAfterString string) (retryAfterDuration time.Duration) {
func parseRetryAfterHeader(s string) time.Duration { if retryAfterString == "" {
if s == "" { return retryAfterDuration
return 0
} }
defer func() {
v := retryAfterDuration.Seconds()
logger.Infof("'Retry-After: %s' parsed into %.2f second(s)", retryAfterString, v)
}()
// Retry-After could be in "Mon, 02 Jan 2006 15:04:05 GMT" format. // Retry-After could be in "Mon, 02 Jan 2006 15:04:05 GMT" format.
if parsedTime, err := time.Parse(http.TimeFormat, s); err == nil { if parsedTime, err := time.Parse(http.TimeFormat, retryAfterString); err == nil {
return time.Duration(time.Until(parsedTime).Seconds()) * time.Second return time.Duration(time.Until(parsedTime).Seconds()) * time.Second
} }
// Retry-After could be in seconds. // Retry-After could be in seconds.
if seconds, err := strconv.Atoi(s); err == nil { if seconds, err := strconv.Atoi(retryAfterString); err == nil {
return time.Duration(seconds) * time.Second return time.Duration(seconds) * time.Second
} }

View File

@@ -6,12 +6,66 @@ import (
"testing" "testing"
"time" "time"
"github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/golang/snappy"
) )
func TestCalculateRetryDuration(t *testing.T) {
// `testFunc` call `calculateRetryDuration` for `n` times
// and evaluate if the result of `calculateRetryDuration` is
// 1. >= expectMinDuration
// 2. <= expectMinDuration + 10% (see timeutil.AddJitterToDuration)
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
t.Helper()
for i := 0; i < n; i++ {
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
}
expectMaxDuration := helper(expectMinDuration)
expectMinDuration = expectMinDuration - (1000 * time.Millisecond) // Avoid edge case when calculating time.Until(now)
if retryDuration < expectMinDuration || retryDuration > expectMaxDuration {
t.Fatalf(
"incorrect retry duration, want (ms): [%d, %d], got (ms): %d",
expectMinDuration.Milliseconds(), expectMaxDuration.Milliseconds(),
retryDuration.Milliseconds(),
)
}
}
// Call calculateRetryDuration for 1 time.
{
// default backoff policy
f(0, time.Second, 1, 2*time.Second)
// default backoff policy exceed max limit"
f(0, 10*time.Minute, 1, time.Minute)
// retry after > default backoff policy
f(10*time.Second, 1*time.Second, 1, 10*time.Second)
// retry after < default backoff policy
f(1*time.Second, 10*time.Second, 1, 1*time.Second)
// retry after invalid and < default backoff policy
f(0, time.Second, 1, 2*time.Second)
}
// Call calculateRetryDuration for multiple times.
{
// default backoff policy 2 times
f(0, time.Second, 2, 4*time.Second)
// default backoff policy 3 times
f(0, time.Second, 3, 8*time.Second)
// default backoff policy N times exceed max limit
f(0, time.Second, 10, time.Minute)
// retry after 120s 1 times
f(120*time.Second, time.Second, 1, 120*time.Second)
// retry after 120s 2 times
f(120*time.Second, time.Second, 2, 120*time.Second)
}
}
func TestParseRetryAfterHeader(t *testing.T) { func TestParseRetryAfterHeader(t *testing.T) {
f := func(retryAfterString string, expectResult time.Duration) { f := func(retryAfterString string, expectResult time.Duration) {
t.Helper() t.Helper()
@@ -37,38 +91,14 @@ func TestParseRetryAfterHeader(t *testing.T) {
f(time.Now().Add(10*time.Second).Format("Mon, 02 Jan 2006 15:04:05 FAKETZ"), 0) f(time.Now().Add(10*time.Second).Format("Mon, 02 Jan 2006 15:04:05 FAKETZ"), 0)
} }
func TestInitSecretFlags(t *testing.T) { // helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
showRemoteWriteURLOrig := *showRemoteWriteURL func helper(d time.Duration) time.Duration {
defer func() { dv := d / 10
*showRemoteWriteURL = showRemoteWriteURLOrig if dv > 10*time.Second {
flagutil.UnregisterAllSecretFlags() dv = 10 * time.Second
}()
flagutil.UnregisterAllSecretFlags()
*showRemoteWriteURL = false
InitSecretFlags()
if !flagutil.IsSecretFlag("remotewrite.url") {
t.Fatalf("expecting remoteWrite.url to be secret")
}
if !flagutil.IsSecretFlag("remotewrite.headers") {
t.Fatalf("expecting remoteWrite.headers to be secret")
}
if !flagutil.IsSecretFlag("remotewrite.proxyurl") {
t.Fatalf("expecting remoteWrite.proxyURL to be secret")
} }
flagutil.UnregisterAllSecretFlags() return d + dv
*showRemoteWriteURL = true
InitSecretFlags()
if flagutil.IsSecretFlag("remotewrite.url") {
t.Fatalf("remoteWrite.url must remain visible when -remoteWrite.showURL is set")
}
if !flagutil.IsSecretFlag("remotewrite.headers") {
t.Fatalf("expecting remoteWrite.headers to remain secret")
}
if !flagutil.IsSecretFlag("remotewrite.proxyurl") {
t.Fatalf("expecting remoteWrite.proxyURL to remain secret")
}
} }
func TestRepackBlockFromZstdToSnappy(t *testing.T) { func TestRepackBlockFromZstdToSnappy(t *testing.T) {

View File

@@ -48,7 +48,11 @@ func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite *atomic.Boo
ps.wr.significantFigures = significantFigures ps.wr.significantFigures = significantFigures
ps.wr.roundDigits = roundDigits ps.wr.roundDigits = roundDigits
ps.stopCh = make(chan struct{}) ps.stopCh = make(chan struct{})
ps.periodicFlusherWG.Go(ps.periodicFlusher) ps.periodicFlusherWG.Add(1)
go func() {
defer ps.periodicFlusherWG.Done()
ps.periodicFlusher()
}()
return &ps return &ps
} }
@@ -211,9 +215,6 @@ func (wr *writeRequest) copyMetadata(dst, src *prompb.MetricMetadata) {
dst.Type = src.Type dst.Type = src.Type
dst.Unit = src.Unit dst.Unit = src.Unit
dst.AccountID = src.AccountID
dst.ProjectID = src.ProjectID
// Pre-allocate memory for all string fields. // Pre-allocate memory for all string fields.
neededBufLen := len(src.MetricFamilyName) + len(src.Help) neededBufLen := len(src.MetricFamilyName) + len(src.Help)
bufLen := len(wr.metadatabuf) bufLen := len(wr.metadatabuf)

View File

@@ -51,9 +51,9 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest { func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
var wr prompb.WriteRequest var wr prompb.WriteRequest
for i := range seriesCount { for i := 0; i < seriesCount; i++ {
var labels []prompb.Label var labels []prompb.Label
for j := range labelsCount { for j := 0; j < labelsCount; j++ {
labels = append(labels, prompb.Label{ labels = append(labels, prompb.Label{
Name: fmt.Sprintf("label_%d_%d", i, j), Name: fmt.Sprintf("label_%d_%d", i, j),
Value: fmt.Sprintf("value_%d_%d", i, j), Value: fmt.Sprintf("value_%d_%d", i, j),

View File

@@ -3,24 +3,22 @@ package remotewrite
import ( import (
"flag" "flag"
"fmt" "fmt"
"io"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
"sync/atomic"
"github.com/VictoriaMetrics/metrics"
"gopkg.in/yaml.v2"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/metrics"
) )
var ( var (
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to all -remoteWrite.url.") unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+ relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+ "to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
"The path can point either to local file or to http url. "+ "The path can point either to local file or to http url. "+
@@ -34,12 +32,9 @@ var (
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels") "See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
) )
var labelsGlobal []prompb.Label
var ( var (
labelsGlobal []prompb.Label
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
remoteWriteURLRelabelConfigData atomic.Pointer[[]any]
relabelConfigReloads *metrics.Counter relabelConfigReloads *metrics.Counter
relabelConfigReloadErrors *metrics.Counter relabelConfigReloadErrors *metrics.Counter
relabelConfigSuccess *metrics.Gauge relabelConfigSuccess *metrics.Gauge
@@ -72,42 +67,6 @@ func initRelabelConfigs() {
} }
} }
// WriteRelabelConfigData writes -remoteWrite.relabelConfig contents to w
func WriteRelabelConfigData(w io.Writer) {
p := remoteWriteRelabelConfigData.Load()
if p == nil {
// Nothing to write to w
return
}
_, _ = w.Write(*p)
}
// WriteURLRelabelConfigData writes -remoteWrite.urlRelabelConfig contents to w
func WriteURLRelabelConfigData(w io.Writer) {
p := remoteWriteURLRelabelConfigData.Load()
if p == nil {
// Nothing to write to w
return
}
type urlRelabelCfg struct {
Url string `yaml:"url"`
RelabelConfig any `yaml:"relabel_config"`
}
var cs []urlRelabelCfg
for i, url := range *remoteWriteURLs {
cfgData := (*p)[i]
if !*showRemoteWriteURL {
url = fmt.Sprintf("%d:secret-url", i+1)
}
cs = append(cs, urlRelabelCfg{
Url: url,
RelabelConfig: cfgData,
})
}
d, _ := yaml.Marshal(cs)
_, _ = w.Write(d)
}
func reloadRelabelConfigs() { func reloadRelabelConfigs() {
rcs := allRelabelConfigs.Load() rcs := allRelabelConfigs.Load()
if !rcs.isSet() { if !rcs.isSet() {
@@ -131,43 +90,28 @@ func reloadRelabelConfigs() {
func loadRelabelConfigs() (*relabelConfigs, error) { func loadRelabelConfigs() (*relabelConfigs, error) {
var rcs relabelConfigs var rcs relabelConfigs
if *relabelConfigPathGlobal != "" { if *relabelConfigPathGlobal != "" {
global, rawCfg, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal) global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err) return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
} }
remoteWriteRelabelConfigData.Store(&rawCfg)
rcs.global = global rcs.global = global
} }
if len(*relabelConfigPaths) > len(*remoteWriteURLs) { if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d", return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
len(*relabelConfigPaths), (len(*remoteWriteURLs))) len(*relabelConfigPaths), (len(*remoteWriteURLs)))
} }
var urlRelabelCfgs []any
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs)) rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
for i, path := range *relabelConfigPaths { for i, path := range *relabelConfigPaths {
if len(path) == 0 { if len(path) == 0 {
urlRelabelCfgs = append(urlRelabelCfgs, nil) // Skip empty relabel config.
continue continue
} }
prc, rawCfg, err := promrelabel.LoadRelabelConfigs(path) prc, err := promrelabel.LoadRelabelConfigs(path)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err) return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
} }
rcs.perURL[i] = prc rcs.perURL[i] = prc
var parsedCfg any
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
} }
if len(*remoteWriteURLs) > len(*relabelConfigPaths) {
// fill the urlRelabelCfgs with empty relabel configs if not set
for i := len(*relabelConfigPaths); i < len(*remoteWriteURLs); i++ {
urlRelabelCfgs = append(urlRelabelCfgs, nil)
}
}
remoteWriteURLRelabelConfigData.Store(&urlRelabelCfgs)
return &rcs, nil return &rcs, nil
} }
@@ -176,9 +120,19 @@ type relabelConfigs struct {
perURL []*promrelabel.ParsedConfigs perURL []*promrelabel.ParsedConfigs
} }
// isSet indicates whether (global or per-URL) command-line flags is set
func (rcs *relabelConfigs) isSet() bool { func (rcs *relabelConfigs) isSet() bool {
return *relabelConfigPathGlobal != "" || len(*relabelConfigPaths) > 0 if rcs == nil {
return false
}
if rcs.global.Len() > 0 {
return true
}
for _, pc := range rcs.perURL {
if pc.Len() > 0 {
return true
}
}
return false
} }
// initLabelsGlobal must be called after parsing command-line flags. // initLabelsGlobal must be called after parsing command-line flags.

View File

@@ -3,7 +3,6 @@ package remotewrite
import ( import (
"flag" "flag"
"fmt" "fmt"
"math"
"net/http" "net/http"
"net/url" "net/url"
"path/filepath" "path/filepath"
@@ -12,10 +11,6 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/cespare/xxhash/v2"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -28,14 +23,14 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory" "github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue" "github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter" "github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr" "github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits" "github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
"github.com/VictoriaMetrics/metrics"
"github.com/cespare/xxhash/v2"
) )
var ( var (
@@ -63,7 +58,7 @@ var (
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue") "See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+ keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.") "Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
queues = flagutil.NewArrayInt("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+ queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
"isn't enough for sending high volume of collected data to remote storage. "+ "isn't enough for sending high volume of collected data to remote storage. "+
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage") "Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+ showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
@@ -79,20 +74,15 @@ var (
"writing them to remote storage. "+ "writing them to remote storage. "+
"Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. "+ "Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. "+
"By default, digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. "+ "By default, digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. "+
"This option may be used for improving data compression for the stored metrics. "+ "This option may be used for improving data compression for the stored metrics")
"See also -remoteWrite.significantFigures")
sortLabels = flag.Bool("sortLabels", false, `Whether to sort labels for incoming samples before writing them to all the configured remote storage systems. `+ sortLabels = flag.Bool("sortLabels", false, `Whether to sort labels for incoming samples before writing them to all the configured remote storage systems. `+
`This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. `+ `This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. `+
`For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}`+ `For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}`+
`Enabled sorting for labels can slow down ingestion performance a bit`) `Enabled sorting for labels can slow down ingestion performance a bit`)
maxHourlySeries = flag.Int64("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+ maxHourlySeries = flag.Int("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+
"Excess series are logged and dropped. This can be useful for limiting series cardinality. "+ "Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+ maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter") "Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
maxDailySeries = flag.Int64("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
"Excess series are logged and dropped. This can be useful for limiting series churn rate. "+
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
maxIngestionRate = flag.Int("maxIngestionRate", 0, "The maximum number of samples vmagent can receive per second. Data ingestion is paused when the limit is exceeded. "+ maxIngestionRate = flag.Int("maxIngestionRate", 0, "The maximum number of samples vmagent can receive per second. Data ingestion is paused when the limit is exceeded. "+
"By default there are no limits on samples ingestion rate. See also -remoteWrite.rateLimit") "By default there are no limits on samples ingestion rate. See also -remoteWrite.rateLimit")
@@ -101,8 +91,6 @@ var (
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence . See also -remoteWrite.dropSamplesOnOverload") "See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence . See also -remoteWrite.dropSamplesOnOverload")
dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+ dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence") "cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
disableMetadataPerURL = flagutil.NewArrayBool("remoteWrite.disableMetadata", "Whether to disable sending metadata to the corresponding -remoteWrite.url. "+
"By default, metadata sending is controlled by the global -enableMetadata flag")
) )
var ( var (
@@ -152,10 +140,6 @@ func InitSecretFlags() {
// remoteWrite.url can contain authentication codes, so hide it at `/metrics` output. // remoteWrite.url can contain authentication codes, so hide it at `/metrics` output.
flagutil.RegisterSecretFlag("remoteWrite.url") flagutil.RegisterSecretFlag("remoteWrite.url")
} }
// remoteWrite.proxyURL can contain authentication codes.
flagutil.RegisterSecretFlag("remoteWrite.proxyURL")
// remoteWrite.headers can contain auth headers such as Authorization and API keys.
flagutil.RegisterSecretFlag("remoteWrite.headers")
} }
var ( var (
@@ -172,20 +156,8 @@ func Init() {
if len(*remoteWriteURLs) == 0 { if len(*remoteWriteURLs) == 0 {
logger.Fatalf("at least one `-remoteWrite.url` command-line flag must be set") logger.Fatalf("at least one `-remoteWrite.url` command-line flag must be set")
} }
if *shardByURL && len(*disableOnDiskQueue) > 1 { if *maxHourlySeries > 0 {
disableOnDiskQueues := *disableOnDiskQueue hourlySeriesLimiter = bloomfilter.NewLimiter(*maxHourlySeries, time.Hour)
firstValue := disableOnDiskQueues[0]
for _, v := range disableOnDiskQueues[1:] {
if firstValue != v {
logger.Fatalf("all -remoteWrite.url targets must have the same -remoteWrite.disableOnDiskQueue setting when -remoteWrite.shardByURL is enabled; " +
"either enable or disable -remoteWrite.disableOnDiskQueue for all targets")
}
}
}
if limit := getMaxHourlySeries(); limit > 0 {
hourlySeriesLimiter = bloomfilter.NewLimiter(limit, time.Hour)
_ = metrics.NewGauge(`vmagent_hourly_series_limit_max_series`, func() float64 { _ = metrics.NewGauge(`vmagent_hourly_series_limit_max_series`, func() float64 {
return float64(hourlySeriesLimiter.MaxItems()) return float64(hourlySeriesLimiter.MaxItems())
}) })
@@ -193,8 +165,8 @@ func Init() {
return float64(hourlySeriesLimiter.CurrentItems()) return float64(hourlySeriesLimiter.CurrentItems())
}) })
} }
if limit := getMaxDailySeries(); limit > 0 { if *maxDailySeries > 0 {
dailySeriesLimiter = bloomfilter.NewLimiter(limit, 24*time.Hour) dailySeriesLimiter = bloomfilter.NewLimiter(*maxDailySeries, 24*time.Hour)
_ = metrics.NewGauge(`vmagent_daily_series_limit_max_series`, func() float64 { _ = metrics.NewGauge(`vmagent_daily_series_limit_max_series`, func() float64 {
return float64(dailySeriesLimiter.MaxItems()) return float64(dailySeriesLimiter.MaxItems())
}) })
@@ -203,6 +175,13 @@ func Init() {
}) })
} }
if *queues > maxQueues {
*queues = maxQueues
}
if *queues <= 0 {
*queues = 1
}
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 { if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " + logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages") "see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
@@ -235,7 +214,9 @@ func Init() {
dropDanglingQueues() dropDanglingQueues()
// Start config reloader. // Start config reloader.
configReloaderWG.Go(func() { configReloaderWG.Add(1)
go func() {
defer configReloaderWG.Done()
for { for {
select { select {
case <-configReloaderStopCh: case <-configReloaderStopCh:
@@ -245,7 +226,7 @@ func Init() {
reloadRelabelConfigs() reloadRelabelConfigs()
reloadStreamAggrConfigs() reloadStreamAggrConfigs()
} }
}) }()
} }
func dropDanglingQueues() { func dropDanglingQueues() {
@@ -285,6 +266,17 @@ func initRemoteWriteCtxs(urls []string) {
if len(urls) == 0 { if len(urls) == 0 {
logger.Panicf("BUG: urls must be non-empty") logger.Panicf("BUG: urls must be non-empty")
} }
maxInmemoryBlocks := memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
if maxInmemoryBlocks / *queues > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 100 * *queues
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
rwctxs := make([]*remoteWriteCtx, len(urls)) rwctxs := make([]*remoteWriteCtx, len(urls))
rwctxIdx := make([]int, len(urls)) rwctxIdx := make([]int, len(urls))
if retryMaxTime.String() != "" { if retryMaxTime.String() != "" {
@@ -299,10 +291,9 @@ func initRemoteWriteCtxs(urls []string) {
if *showRemoteWriteURL { if *showRemoteWriteURL {
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL) sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
} }
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, sanitizedURL) rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
rwctxIdx[i] = i rwctxIdx[i] = i
} }
fs.RegisterPathFsMetrics(*tmpDataPath)
if *shardByURL { if *shardByURL {
consistentHashNodes := make([]string, 0, len(urls)) consistentHashNodes := make([]string, 0, len(urls))
@@ -416,7 +407,7 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
// Push metadata separately from time series, since it doesn't need sharding, // Push metadata separately from time series, since it doesn't need sharding,
// relabeling, stream aggregation, deduplication, etc. // relabeling, stream aggregation, deduplication, etc.
if !tryPushMetadataToRemoteStorages(at, rwctxs, mms, forceDropSamplesOnFailure) { if !tryPushMetadataToRemoteStorages(rwctxs, mms, forceDropSamplesOnFailure) {
return false return false
} }
@@ -494,9 +485,6 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
matchIdxs.B = sas.Push(tssBlock, matchIdxs.B) matchIdxs.B = sas.Push(tssBlock, matchIdxs.B)
if !*streamAggrGlobalKeepInput { if !*streamAggrGlobalKeepInput {
tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput) tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput)
} else if *streamAggrGlobalDropInput {
// if both keep_input and drop_input are true, we keep only the aggregated series
tssBlock = dropUnaggregatedSeries(tssBlock, matchIdxs.B)
} }
matchIdxsPool.Put(matchIdxs) matchIdxsPool.Put(matchIdxs)
} }
@@ -516,9 +504,7 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
// //
// calculateHealthyRwctxIdx will rely on the order of rwctx to be in ascending order. // calculateHealthyRwctxIdx will rely on the order of rwctx to be in ascending order.
func getEligibleRemoteWriteCtxs(tss []prompb.TimeSeries, forceDropSamplesOnFailure bool) ([]*remoteWriteCtx, bool) { func getEligibleRemoteWriteCtxs(tss []prompb.TimeSeries, forceDropSamplesOnFailure bool) ([]*remoteWriteCtx, bool) {
// When -remoteWrite.shardByURL=true always use all configured remote writes to preserve stable metrics distribution across shards. if !disableOnDiskQueueAny {
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10507
if !disableOnDiskQueueAny || *shardByURL {
return rwctxsGlobal, true return rwctxsGlobal, true
} }
@@ -533,6 +519,12 @@ func getEligibleRemoteWriteCtxs(tss []prompb.TimeSeries, forceDropSamplesOnFailu
return nil, false return nil, false
} }
rowsCount := getRowsCount(tss) rowsCount := getRowsCount(tss)
if *shardByURL {
// Todo: When shardByURL is enabled, the following metrics won't be 100% accurate. Because vmagent don't know
// which rwctx should data be pushed to yet. Let's consider the hashing algorithm fair and will distribute
// data to all rwctxs evenly.
rowsCount = rowsCount / len(rwctxsGlobal)
}
rwctx.rowsDroppedOnPushFailure.Add(rowsCount) rwctx.rowsDroppedOnPushFailure.Add(rowsCount)
} }
} }
@@ -550,18 +542,11 @@ func pushTimeSeriesToRemoteStoragesTrackDropped(tss []prompb.TimeSeries) {
} }
} }
func tryPushMetadataToRemoteStorages(at *auth.Token, rwctxs []*remoteWriteCtx, mms []prompb.MetricMetadata, forceDropSamplesOnFailure bool) bool { func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.MetricMetadata, forceDropSamplesOnFailure bool) bool {
if len(mms) == 0 { if len(mms) == 0 {
// Nothing to push // Nothing to push
return true return true
} }
if at != nil {
for idx := range mms {
mm := &mms[idx]
mm.AccountID = at.AccountID
mm.ProjectID = at.ProjectID
}
}
// Do not shard metadata even if -remoteWrite.shardByURL is set, just replicate it among rwctxs. // Do not shard metadata even if -remoteWrite.shardByURL is set, just replicate it among rwctxs.
// Since metadata is usually small and there is no guarantee that metadata can be sent to // Since metadata is usually small and there is no guarantee that metadata can be sent to
// the same remote storage with the corresponding metrics. // the same remote storage with the corresponding metrics.
@@ -569,13 +554,11 @@ func tryPushMetadataToRemoteStorages(at *auth.Token, rwctxs []*remoteWriteCtx, m
// Push metadata to remote storage systems in parallel to reduce // Push metadata to remote storage systems in parallel to reduce
// the time needed for sending the data to multiple remote storage systems. // the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(len(rwctxs))
var anyPushFailed atomic.Bool var anyPushFailed atomic.Bool
for _, rwctx := range rwctxs { for _, rwctx := range rwctxs {
if !rwctx.enableMetadata { go func(rwctx *remoteWriteCtx) {
// Skip remote storage with disabled metadata defer wg.Done()
continue
}
wg.Go(func() {
if !rwctx.tryPushMetadataInternal(mms) { if !rwctx.tryPushMetadataInternal(mms) {
rwctx.pushFailures.Inc() rwctx.pushFailures.Inc()
if forceDropSamplesOnFailure { if forceDropSamplesOnFailure {
@@ -584,7 +567,7 @@ func tryPushMetadataToRemoteStorages(at *auth.Token, rwctxs []*remoteWriteCtx, m
} }
anyPushFailed.Store(true) anyPushFailed.Store(true)
} }
}) }(rwctx)
} }
wg.Wait() wg.Wait()
return !anyPushFailed.Load() return !anyPushFailed.Load()
@@ -616,13 +599,15 @@ func tryPushTimeSeriesToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prom
// Push tssBlock to remote storage systems in parallel to reduce // Push tssBlock to remote storage systems in parallel to reduce
// the time needed for sending the data to multiple remote storage systems. // the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(len(rwctxs))
var anyPushFailed atomic.Bool var anyPushFailed atomic.Bool
for _, rwctx := range rwctxs { for _, rwctx := range rwctxs {
wg.Go(func() { go func(rwctx *remoteWriteCtx) {
defer wg.Done()
if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) { if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) {
anyPushFailed.Store(true) anyPushFailed.Store(true)
} }
}) }(rwctx)
} }
wg.Wait() wg.Wait()
return !anyPushFailed.Load() return !anyPushFailed.Load()
@@ -644,11 +629,13 @@ func tryShardingTimeSeriesAmongRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock
if len(shard) == 0 { if len(shard) == 0 {
continue continue
} }
wg.Go(func() { wg.Add(1)
if !rwctx.TryPushTimeSeries(shard, forceDropSamplesOnFailure) { go func(rwctx *remoteWriteCtx, tss []prompb.TimeSeries) {
defer wg.Done()
if !rwctx.TryPushTimeSeries(tss, forceDropSamplesOnFailure) {
anyPushFailed.Store(true) anyPushFailed.Store(true)
} }
}) }(rwctx, shard)
} }
wg.Wait() wg.Wait()
return !anyPushFailed.Load() return !anyPushFailed.Load()
@@ -712,7 +699,7 @@ func shardAmountRemoteWriteCtx(tssBlock []prompb.TimeSeries, shards [][]prompb.T
} }
tmpLabels.Labels = hashLabels tmpLabels.Labels = hashLabels
} }
h := getLabelsHashForShard(hashLabels) h := getLabelsHash(hashLabels)
// Get the rwctxIdx through consistent hashing and then map it to the index in shards. // Get the rwctxIdx through consistent hashing and then map it to the index in shards.
// The rwctxIdx is not always equal to the shardIdx, for example, when some rwctx are not available. // The rwctxIdx is not always equal to the shardIdx, for example, when some rwctx are not available.
@@ -803,28 +790,11 @@ var (
dailySeriesLimitRowsDropped = metrics.NewCounter(`vmagent_daily_series_limit_rows_dropped_total`) dailySeriesLimitRowsDropped = metrics.NewCounter(`vmagent_daily_series_limit_rows_dropped_total`)
) )
// getLabelsHashForShard is a separate function from getLabelsHash because
// it omits the '=' separator between label name and value for backward compatibility.
// Changing it would re-shard all series across remoteWrite targets.
func getLabelsHashForShard(labels []prompb.Label) uint64 {
bb := labelsHashBufPool.Get()
b := bb.B[:0]
for _, label := range labels {
b = append(b, label.Name...)
b = append(b, label.Value...)
}
h := xxhash.Sum64(b)
bb.B = b
labelsHashBufPool.Put(bb)
return h
}
func getLabelsHash(labels []prompb.Label) uint64 { func getLabelsHash(labels []prompb.Label) uint64 {
bb := labelsHashBufPool.Get() bb := labelsHashBufPool.Get()
b := bb.B[:0] b := bb.B[:0]
for _, label := range labels { for _, label := range labels {
b = append(b, label.Name...) b = append(b, label.Name...)
b = append(b, '=')
b = append(b, label.Value...) b = append(b, label.Value...)
} }
h := xxhash.Sum64(b) h := xxhash.Sum64(b)
@@ -863,11 +833,6 @@ type remoteWriteCtx struct {
streamAggrKeepInput bool streamAggrKeepInput bool
streamAggrDropInput bool streamAggrDropInput bool
// enableMetadata indicates whether metadata should be sent to this remote storage.
// It is determined by -remoteWrite.enableMetadata per-URL flag if set,
// otherwise by the global -enableMetadata flag.
enableMetadata bool
pss []*pendingSeries pss []*pendingSeries
pssNextIdx atomic.Uint64 pssNextIdx atomic.Uint64
@@ -879,19 +844,7 @@ type remoteWriteCtx struct {
rowsDroppedOnPushFailure *metrics.Counter rowsDroppedOnPushFailure *metrics.Counter
} }
// isMetadataEnabledForURL returns true if metadata should be sent to the remote storage at argIdx. func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
// It checks the per-URL -remoteWrite.disableMetadata flag first.
// If not set, it falls back to the global -enableMetadata flag.
func isMetadataEnabledForURL(argIdx int) bool {
if disableMetadataPerURL.GetOptionalArg(argIdx) {
// Metadata is explicitly disabled for this URL
return false
}
// Use global -enableMetadata value
return prommetadata.IsEnabled()
}
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string) *remoteWriteCtx {
// strip query params, otherwise changing params resets pq // strip query params, otherwise changing params resets pq
pqURL := *remoteWriteURL pqURL := *remoteWriteURL
pqURL.RawQuery = "" pqURL.RawQuery = ""
@@ -906,23 +859,6 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
} }
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx) isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
queuesSize := queues.GetOptionalArg(argIdx)
if queuesSize > maxQueues {
queuesSize = maxQueues
} else if queuesSize <= 0 {
queuesSize = 1
}
maxInmemoryBlocks := memory.Allowed() / len(*remoteWriteURLs) / *maxRowsPerBlock / 100
if maxInmemoryBlocks/queuesSize > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 100 * queuesSize
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled) fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 { _ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
return float64(fq.GetPendingBytes()) return float64(fq.GetPendingBytes())
@@ -940,16 +876,16 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
var c *client var c *client
switch remoteWriteURL.Scheme { switch remoteWriteURL.Scheme {
case "http", "https": case "http", "https":
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, queuesSize) c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
default: default:
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL) logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
} }
c.init(argIdx, queuesSize, sanitizedURL) c.init(argIdx, *queues, sanitizedURL)
// Initialize pss // Initialize pss
sf := significantFigures.GetOptionalArg(argIdx) sf := significantFigures.GetOptionalArg(argIdx)
rd := roundDigits.GetOptionalArg(argIdx) rd := roundDigits.GetOptionalArg(argIdx)
pssLen := queuesSize pssLen := *queues
if n := cgroup.AvailableCPUs(); pssLen > n { if n := cgroup.AvailableCPUs(); pssLen > n {
// There is no sense in running more than availableCPUs concurrent pendingSeries, // There is no sense in running more than availableCPUs concurrent pendingSeries,
// since every pendingSeries can saturate up to a single CPU. // since every pendingSeries can saturate up to a single CPU.
@@ -961,11 +897,10 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
} }
rwctx := &remoteWriteCtx{ rwctx := &remoteWriteCtx{
idx: argIdx, idx: argIdx,
fq: fq, fq: fq,
c: c, c: c,
pss: pss, pss: pss,
enableMetadata: isMetadataEnabledForURL(argIdx),
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q,url=%q}`, queuePath, sanitizedURL)), rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)), rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
@@ -1053,17 +988,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
tss = append(*v, tss...) tss = append(*v, tss...)
} }
tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput) tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput)
} else if rwctx.streamAggrDropInput {
// if both keep_input and drop_input are true, we keep only the aggregated series
if rctx == nil {
rctx = getRelabelCtx()
// Make a copy of tss before dropping aggregated series
v = tssPool.Get().(*[]prompb.TimeSeries)
tss = append(*v, tss...)
}
tss = dropUnaggregatedSeries(tss, matchIdxs.B)
} }
matchIdxsPool.Put(matchIdxs) matchIdxsPool.Put(matchIdxs)
} }
if rwctx.deduplicator != nil { if rwctx.deduplicator != nil {
@@ -1086,10 +1011,9 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
return false return false
} }
var matchIdxsPool slicesutil.BufferPool[uint32] var matchIdxsPool bytesutil.ByteBufferPool
// dropAggregatedSeries drops matched series, also the unmatched if dropInput is true. func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []byte, dropInput bool) []prompb.TimeSeries {
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput bool) []prompb.TimeSeries {
dst := src[:0] dst := src[:0]
if !dropInput { if !dropInput {
for i, match := range matchIdxs { for i, match := range matchIdxs {
@@ -1104,20 +1028,6 @@ func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput
return dst return dst
} }
// dropUnaggregatedSeries drops unmatched series.
func dropUnaggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32) []prompb.TimeSeries {
dst := src[:0]
for i, match := range matchIdxs {
if match == 0 {
continue
}
dst = append(dst, src[i])
}
tail := src[len(dst):]
clear(tail)
return dst
}
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) { func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) {
if rwctx.tryPushTimeSeriesInternal(tss) { if rwctx.tryPushTimeSeriesInternal(tss) {
return return
@@ -1150,7 +1060,7 @@ func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries)
}() }()
if len(labelsGlobal) > 0 { if len(labelsGlobal) > 0 {
// Make a copy of tss before adding extra labels to prevent // Make a copy of tss before adding extra labels in order to prevent
// from affecting time series for other remoteWrite.url configs. // from affecting time series for other remoteWrite.url configs.
rctx = getRelabelCtx() rctx = getRelabelCtx()
v = tssPool.Get().(*[]prompb.TimeSeries) v = tssPool.Get().(*[]prompb.TimeSeries)
@@ -1186,21 +1096,3 @@ func newMapFromStrings(a []string) map[string]struct{} {
} }
return m return m
} }
func getMaxHourlySeries() int {
limit := *maxHourlySeries
if limit == -1 || limit > math.MaxInt32 {
return math.MaxInt32
}
return int(limit)
}
func getMaxDailySeries() int {
limit := *maxDailySeries
if limit == -1 || limit > math.MaxInt32 {
return math.MaxInt32
}
return int(limit)
}

View File

@@ -10,8 +10,6 @@ import (
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash" "github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus" "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
@@ -25,15 +23,15 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
t.Helper() t.Helper()
// Distribute itemsCount hashes returned by getLabelsHash() across bucketsCount buckets. // Distribute itemsCount hashes returned by getLabelsHash() across bucketsCount buckets.
itemsCount := 10_000 * bucketsCount itemsCount := 1_000 * bucketsCount
m := make([]int, bucketsCount) m := make([]int, bucketsCount)
var labels []prompb.Label var labels []prompb.Label
for i := range itemsCount { for i := 0; i < itemsCount; i++ {
labels = append(labels[:0], prompb.Label{ labels = append(labels[:0], prompb.Label{
Name: "__name__", Name: "__name__",
Value: fmt.Sprintf("some_name_%d", i), Value: fmt.Sprintf("some_name_%d", i),
}) })
for j := range 10 { for j := 0; j < 10; j++ {
labels = append(labels, prompb.Label{ labels = append(labels, prompb.Label{
Name: fmt.Sprintf("label_%d", j), Name: fmt.Sprintf("label_%d", j),
Value: fmt.Sprintf("value_%d_%d", i, j), Value: fmt.Sprintf("value_%d_%d", i, j),
@@ -44,12 +42,10 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
} }
// Verify that the distribution is even // Verify that the distribution is even
expectedItemsPerBucket := float64(itemsCount / bucketsCount) expectedItemsPerBucket := itemsCount / bucketsCount
allowedDeviation := math.Round(float64(expectedItemsPerBucket) * 0.04)
for _, n := range m { for _, n := range m {
if math.Abs(expectedItemsPerBucket-float64(n)) > allowedDeviation { if math.Abs(1-float64(n)/float64(expectedItemsPerBucket)) > 0.04 {
t.Fatalf("unexpected items in the bucket for %d buckets; got %d; want in range [%.0f, %.0f]", t.Fatalf("unexpected items in the bucket for %d buckets; got %d; want around %d", bucketsCount, n, expectedItemsPerBucket)
bucketsCount, n, expectedItemsPerBucket-allowedDeviation, expectedItemsPerBucket+allowedDeviation)
} }
} }
} }
@@ -61,8 +57,8 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
f(10) f(10)
} }
func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) { func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string, expectedRowsPushedAfterRelabel, expectedPushedSample int) { f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string) {
t.Helper() t.Helper()
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig)) perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
if err != nil { if err != nil {
@@ -75,16 +71,10 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
} }
allRelabelConfigs.Store(rcs) allRelabelConfigs.Store(rcs)
path := "fast-queue-write-test"
fs.MustRemoveDir(path)
fq := persistentqueue.MustOpenFastQueue(path, "test", 100, 0, false)
defer fs.MustRemoveDir(path)
defer fq.MustClose()
pss := make([]*pendingSeries, 1) pss := make([]*pendingSeries, 1)
isVMProto := &atomic.Bool{} isVMProto := &atomic.Bool{}
isVMProto.Store(true) isVMProto.Store(true)
pss[0] = newPendingSeries(fq, isVMProto, 0, 100) pss[0] = newPendingSeries(nil, isVMProto, 0, 100)
rwctx := &remoteWriteCtx{ rwctx := &remoteWriteCtx{
idx: 0, idx: 0,
streamAggrKeepInput: keepInput, streamAggrKeepInput: keepInput,
@@ -93,8 +83,6 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`), rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`), rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
} }
defer metrics.UnregisterAllMetrics()
if dedupInterval > 0 { if dedupInterval > 0 {
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global") rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
} }
@@ -116,27 +104,23 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs) inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
expectedTss := make([]prompb.TimeSeries, len(inputTss)) expectedTss := make([]prompb.TimeSeries, len(inputTss))
// check inputTss is not modified after TryPushTimeSeries // copy inputTss to make sure it is not mutated during TryPush call
copy(expectedTss, inputTss) copy(expectedTss, inputTss)
if !rwctx.TryPushTimeSeries(inputTss, false) { if !rwctx.TryPushTimeSeries(inputTss, false) {
t.Fatalf("cannot push samples to rwctx") t.Fatalf("cannot push samples to rwctx")
} }
if int(rwctx.rowsPushedAfterRelabel.Get()) != expectedRowsPushedAfterRelabel {
t.Fatalf("unexpected number of rows after relabel; got %d; want %d", rwctx.rowsPushedAfterRelabel.Get(), expectedRowsPushedAfterRelabel)
}
if len(pss[0].wr.tss) != expectedPushedSample {
t.Fatalf("unexpected number of pushed samples; got %d; want %d", len(pss[0].wr.tss), expectedPushedSample)
}
if !reflect.DeepEqual(expectedTss, inputTss) { if !reflect.DeepEqual(expectedTss, inputTss) {
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss) t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
} }
} }
// relabeling f(`
f(``, ` - interval: 1m
outputs: [sum_samples]
- interval: 2m
outputs: [count_series]
`, `
- action: keep - action: keep
source_labels: [env] source_labels: [env]
regex: "dev" regex: "dev"
@@ -145,66 +129,53 @@ metric{env="dev"} 10
metric{env="bar"} 20 metric{env="bar"} 20
metric{env="dev"} 15 metric{env="dev"} 15
metric{env="bar"} 25 metric{env="bar"} 25
`, 2, 2) `)
// relabeling + aggregation
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, `
- action: keep
source_labels: [env]
regex: ".*"
`, false, 0, false, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 2)
// aggregation + keepInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, true, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 4)
// aggregation + dropInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, false, true, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 0)
// aggregation + keepInput + dropInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, true, true, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="bar"} 25
`, 3, 1)
// aggregation + deduplication
f(``, ``, true, time.Hour, false, false, ` f(``, ``, true, time.Hour, false, false, `
metric{env="dev"} 10 metric{env="dev"} 10
metric{env="foo"} 20 metric{env="foo"} 20
metric{env="dev"} 15 metric{env="dev"} 15
metric{env="foo"} 25 metric{env="foo"} 25
`, 4, 0) `)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, false, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, true, false, `
metric{env="test"} 10
metric{env="dev"} 20
metric{env="foo"} 15
metric{env="dev"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, false, true, `
metric{env="foo"} 10
metric{env="dev"} 20
metric{env="foo"} 15
metric{env="dev"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, true, true, `
metric{env="dev"} 10
metric{env="test"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`)
} }
func TestShardAmountRemoteWriteCtx(t *testing.T) { func TestShardAmountRemoteWriteCtx(t *testing.T) {
@@ -250,7 +221,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
seriesCount := 100000 seriesCount := 100000
// build 1000000 series // build 1000000 series
tssBlock := make([]prompb.TimeSeries, 0, seriesCount) tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
for i := range seriesCount { for i := 0; i < seriesCount; i++ {
tssBlock = append(tssBlock, prompb.TimeSeries{ tssBlock = append(tssBlock, prompb.TimeSeries{
Labels: []prompb.Label{ Labels: []prompb.Label{
{ {
@@ -271,7 +242,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
// build active time series set // build active time series set
nodes := make([]string, 0, remoteWriteCount) nodes := make([]string, 0, remoteWriteCount)
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount) activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
for i := range remoteWriteCount { for i := 0; i < remoteWriteCount; i++ {
nodes = append(nodes, fmt.Sprintf("node%d", i)) nodes = append(nodes, fmt.Sprintf("node%d", i))
activeTimeSeriesByNodes[i] = make(map[string]struct{}) activeTimeSeriesByNodes[i] = make(map[string]struct{})
} }

View File

@@ -18,12 +18,12 @@ var (
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+ streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+ "See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval") "See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples that match any rule in "+ streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation "+
"-streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+ "with -streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the remote storage. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/") "are written to remote storages write. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop input samples that not matching any rule in "+ streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation "+
"-streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+ "with -remoteWrite.streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the remote storage. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/") "are written to remote storages write. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+ streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
"aggregator before optional aggregation with -streamAggr.config . "+ "aggregator before optional aggregation with -streamAggr.config . "+
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication") "See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
@@ -43,11 +43,11 @@ var (
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+ streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+ "See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval") "See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop input samples that not matching any rule in "+ streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop all the input samples after the aggregation "+
"the corresponding -remoteWrite.streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+ "with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/") "are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples that match any rule in "+ streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep all the input samples after the aggregation "+
"the corresponding -remoteWrite.streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+ "with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/") "are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+ streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication") "with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")

View File

@@ -1,80 +0,0 @@
package zabbixconnector
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="zabbixconnector"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="zabbixconnector"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="zabbixconnector"}`)
)
// InsertHandlerForHTTP processes remote write for ZabbixConnector POST /zabbixconnector/v1/history request.
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, encoding, func(rows []zabbixconnector.Row) error {
return insertRows(at, rows, extraLabels)
})
}
func insertRows(at *auth.Token, rows []zabbixconnector.Row, extraLabels []prompb.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := len(rows)
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
Name: bytesutil.ToUnsafeString(tag.Key),
Value: bytesutil.ToUnsafeString(tag.Value),
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
samples = append(samples, prompb.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -27,9 +27,6 @@ vmalert-tool-linux-ppc64le-prod:
vmalert-tool-linux-386-prod: vmalert-tool-linux-386-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386 APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
vmalert-tool-linux-s390x-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-s390x
vmalert-tool-darwin-amd64-prod: vmalert-tool-darwin-amd64-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64 APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64

View File

@@ -52,7 +52,7 @@ func writeInputSeries(input []series, interval *promutil.Duration, startStamp ti
data := testutil.Compress(r) data := testutil.Compress(r)
// write input series to vm // write input series to vm
httpWrite(dst, bytes.NewBuffer(data)) httpWrite(dst, bytes.NewBuffer(data))
vmstorage.DebugFlush() vmstorage.Storage.DebugFlush()
return nil return nil
} }

View File

@@ -41,7 +41,7 @@ func TestParseInputValue_Success(t *testing.T) {
if len(outputExpected) != len(output) { if len(outputExpected) != len(output) {
t.Fatalf("unexpected output length; got %d; want %d", len(outputExpected), len(output)) t.Fatalf("unexpected output length; got %d; want %d", len(outputExpected), len(output))
} }
for i := range outputExpected { for i := 0; i < len(outputExpected); i++ {
if outputExpected[i].Omitted != output[i].Omitted { if outputExpected[i].Omitted != output[i].Omitted {
t.Fatalf("unexpected Omitted field in the output\ngot\n%v\nwant\n%v", output, outputExpected) t.Fatalf("unexpected Omitted field in the output\ngot\n%v\nwant\n%v", output, outputExpected)
} }

View File

@@ -4,7 +4,6 @@ import (
"context" "context"
"flag" "flag"
"fmt" "fmt"
"maps"
"net" "net"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
@@ -13,7 +12,6 @@ import (
"os/signal" "os/signal"
"path/filepath" "path/filepath"
"reflect" "reflect"
"slices"
"sort" "sort"
"strings" "strings"
"syscall" "syscall"
@@ -36,7 +34,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"github.com/VictoriaMetrics/metrics" "github.com/VictoriaMetrics/metrics"
) )
@@ -61,7 +58,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
} }
eu, err := url.Parse(externalURL) eu, err := url.Parse(externalURL)
if err != nil { if err != nil {
logger.Fatalf("failed to parse external URL: %s", err) logger.Fatalf("failed to parse external URL: %w", err)
} }
if err := templates.Load([]string{}, *eu); err != nil { if err := templates.Load([]string{}, *eu); err != nil {
logger.Fatalf("failed to load template: %v", err) logger.Fatalf("failed to load template: %v", err)
@@ -87,8 +84,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
defer server.Close() defer server.Close()
} else { } else {
httpListenAddr = httpListenPort httpListenAddr = httpListenPort
ln, err := net.Listen("tcp", fmt.Sprintf(":%s", httpListenPort))
ln, err := net.Listen(netutil.GetTCPNetwork(), fmt.Sprintf(":%s", httpListenPort))
if err != nil { if err != nil {
logger.Fatalf("cannot listen on port %s: %v", httpListenPort, err) logger.Fatalf("cannot listen on port %s: %v", httpListenPort, err)
} }
@@ -108,9 +104,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
storagePath = tmpFolder storagePath = tmpFolder
processFlags() processFlags()
vminsert.Init() vminsert.Init()
const maxConcurrentRequests = 4 vmselect.Init()
maxQueueDuration := 5 * time.Second
vmselect.Init(maxConcurrentRequests, maxQueueDuration)
// storagePath will be created again when closing vmselect, so remove it again. // storagePath will be created again when closing vmselect, so remove it again.
defer fs.MustRemoveDir(storagePath) defer fs.MustRemoveDir(storagePath)
defer vminsert.Stop() defer vminsert.Stop()
@@ -136,7 +130,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
} }
labels[s[:n]] = s[n+1:] labels[s[:n]] = s[n+1:]
} }
err = notifier.Init(labels, externalURL) _, err = notifier.Init(nil, labels, externalURL)
if err != nil { if err != nil {
logger.Fatalf("failed to init notifier: %v", err) logger.Fatalf("failed to init notifier: %v", err)
} }
@@ -281,8 +275,7 @@ func processFlags() {
} }
func setUp() { func setUp() {
const maxConcurrentRequests = 4 vmstorage.Init(promql.ResetRollupResultCacheIfNeeded)
vmstorage.Init(maxConcurrentRequests, promql.ResetRollupResultCacheIfNeeded)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel() defer cancel()
readyCheckFunc := func() bool { readyCheckFunc := func() bool {
@@ -353,7 +346,9 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
for k := range alertEvalTimesMap { for k := range alertEvalTimesMap {
alertEvalTimes = append(alertEvalTimes, k) alertEvalTimes = append(alertEvalTimes, k)
} }
slices.Sort(alertEvalTimes) sort.Slice(alertEvalTimes, func(i, j int) bool {
return alertEvalTimes[i] < alertEvalTimes[j]
})
// sort group eval order according to the given "group_eval_order". // sort group eval order according to the given "group_eval_order".
sort.Slice(testGroups, func(i, j int) bool { sort.Slice(testGroups, func(i, j int) bool {
@@ -364,8 +359,12 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
var groups []*rule.Group var groups []*rule.Group
for _, group := range testGroups { for _, group := range testGroups {
mergedExternalLabels := make(map[string]string) mergedExternalLabels := make(map[string]string)
maps.Copy(mergedExternalLabels, tg.ExternalLabels) for k, v := range tg.ExternalLabels {
maps.Copy(mergedExternalLabels, externalLabels) mergedExternalLabels[k] = v
}
for k, v := range externalLabels {
mergedExternalLabels[k] = v
}
ng := rule.NewGroup(group, q, time.Minute, mergedExternalLabels) ng := rule.NewGroup(group, q, time.Minute, mergedExternalLabels)
ng.Init() ng.Init()
groups = append(groups, ng) groups = append(groups, ng)
@@ -378,7 +377,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
if len(g.Rules) == 0 { if len(g.Rules) == 0 {
continue continue
} }
errs := g.ExecOnce(context.Background(), rw, ts) errs := g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, rw, ts)
for err := range errs { for err := range errs {
if err != nil { if err != nil {
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name, checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,
@@ -387,7 +386,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
} }
} }
// flush series after each group evaluation // flush series after each group evaluation
vmstorage.DebugFlush() vmstorage.Storage.DebugFlush()
} }
// check alert_rule_test case at every eval time // check alert_rule_test case at every eval time

View File

@@ -27,9 +27,6 @@ vmalert-linux-ppc64le-prod:
vmalert-linux-386-prod: vmalert-linux-386-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-386 APP_NAME=vmalert $(MAKE) app-via-docker-linux-386
vmalert-linux-s390x-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-s390x
vmalert-darwin-amd64-prod: vmalert-darwin-amd64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64 APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64

View File

@@ -31,7 +31,7 @@ type Group struct {
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource. // EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155 // see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"` EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
Limit *int `yaml:"limit,omitempty"` Limit int `yaml:"limit,omitempty"`
Rules []Rule `yaml:"rules"` Rules []Rule `yaml:"rules"`
Concurrency int `yaml:"concurrency"` Concurrency int `yaml:"concurrency"`
// Labels is a set of label value pairs, that will be added to every rule. // Labels is a set of label value pairs, that will be added to every rule.
@@ -81,15 +81,18 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
if g.Interval.Duration() < 0 { if g.Interval.Duration() < 0 {
return fmt.Errorf("interval shouldn't be lower than 0") return fmt.Errorf("interval shouldn't be lower than 0")
} }
// if `eval_offset` is set, the group interval must be specified explicitly(instead of inherited from global evaluationInterval flag) and must bigger than offset. if g.EvalOffset.Duration() < 0 {
if g.EvalOffset.Duration().Abs() > g.Interval.Duration() { return fmt.Errorf("eval_offset shouldn't be lower than 0")
return fmt.Errorf("the abs value of eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration()) }
// if `eval_offset` is set, interval won't use global evaluationInterval flag and must bigger than offset.
if g.EvalOffset.Duration() > g.Interval.Duration() {
return fmt.Errorf("eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
} }
if g.EvalOffset != nil && g.EvalDelay != nil { if g.EvalOffset != nil && g.EvalDelay != nil {
return fmt.Errorf("eval_offset cannot be used with eval_delay") return fmt.Errorf("eval_offset cannot be used with eval_delay")
} }
if g.Limit != nil && *g.Limit < 0 { if g.Limit < 0 {
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", *g.Limit) return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
} }
if g.Concurrency < 0 { if g.Concurrency < 0 {
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency) return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)
@@ -113,15 +116,15 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
// because correct types must be inherited after unmarshalling. // because correct types must be inherited after unmarshalling.
exprValidator := g.Type.ValidateExpr exprValidator := g.Type.ValidateExpr
if err := exprValidator(r.Expr); err != nil { if err := exprValidator(r.Expr); err != nil {
return fmt.Errorf("invalid expression for rule %q: %w", ruleName, err) return fmt.Errorf("invalid expression for rule %q: %w", ruleName, err)
} }
} }
if validateTplFn != nil { if validateTplFn != nil {
if err := validateTplFn(r.Annotations); err != nil { if err := validateTplFn(r.Annotations); err != nil {
return fmt.Errorf("invalid annotations for rule %q: %w", ruleName, err) return fmt.Errorf("invalid annotations for rule %q: %w", ruleName, err)
} }
if err := validateTplFn(r.Labels); err != nil { if err := validateTplFn(r.Labels); err != nil {
return fmt.Errorf("invalid labels for rule %q: %w", ruleName, err) return fmt.Errorf("invalid labels for rule %q: %w", ruleName, err)
} }
} }
} }
@@ -222,9 +225,6 @@ func (r *Rule) Validate() error {
if r.Expr == "" { if r.Expr == "" {
return fmt.Errorf("expression can't be empty") return fmt.Errorf("expression can't be empty")
} }
if _, ok := r.Labels["__name__"]; ok {
return fmt.Errorf("invalid rule label __name__")
}
return checkOverflow(r.XXX, "rule") return checkOverflow(r.XXX, "rule")
} }

View File

@@ -116,12 +116,12 @@ func TestParse_Failure(t *testing.T) {
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval") f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token") f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
f([]string{"testdata/dir/rules0-bad.rules"}, "invalid annotations") f([]string{"testdata/dir/rules0-bad.rules"}, "error parsing annotation")
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file") f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined") f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set") f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
f([]string{"testdata/dir/rules4-bad.rules"}, "either `record` or `alert` must be set") f([]string{"testdata/dir/rules4-bad.rules"}, "either `record` or `alert` must be set")
f([]string{"testdata/rules/rules1-bad.rules"}, "bad GraphiteQL expr") f([]string{"testdata/rules/rules1-bad.rules"}, "bad graphite expr")
f([]string{"testdata/rules/vlog-rules0-bad.rules"}, "bad LogsQL expr") f([]string{"testdata/rules/vlog-rules0-bad.rules"}, "bad LogsQL expr")
f([]string{"testdata/dir/rules6-bad.rules"}, "missing ':' in header") f([]string{"testdata/dir/rules6-bad.rules"}, "missing ':' in header")
f([]string{"testdata/rules/rules-multi-doc-bad.rules"}, "unknown fields") f([]string{"testdata/rules/rules-multi-doc-bad.rules"}, "unknown fields")
@@ -136,9 +136,6 @@ func TestRuleValidate(t *testing.T) {
if err := (&Rule{Alert: "alert"}).Validate(); err == nil { if err := (&Rule{Alert: "alert"}).Validate(); err == nil {
t.Fatalf("expected empty expr error") t.Fatalf("expected empty expr error")
} }
if err := (&Rule{Record: "record", Expr: "sum(test)", Labels: map[string]string{"__name__": "test"}}).Validate(); err == nil {
t.Fatalf("invalid rule label; got %s", err)
}
if err := (&Rule{Alert: "alert", Expr: "test>0"}).Validate(); err != nil { if err := (&Rule{Alert: "alert", Expr: "test>0"}).Validate(); err != nil {
t.Fatalf("expected valid rule; got %s", err) t.Fatalf("expected valid rule; got %s", err)
} }
@@ -179,21 +176,14 @@ func TestGroupValidate_Failure(t *testing.T) {
}, false, "interval shouldn't be lower than 0") }, false, "interval shouldn't be lower than 0")
f(&Group{ f(&Group{
Name: "too big eval_offset", Name: "wrong eval_offset",
Interval: promutil.NewDuration(time.Minute), Interval: promutil.NewDuration(time.Minute),
EvalOffset: promutil.NewDuration(2 * time.Minute), EvalOffset: promutil.NewDuration(2 * time.Minute),
}, false, "eval_offset should be smaller than interval") }, false, "eval_offset should be smaller than interval")
f(&Group{
Name: "too big negative eval_offset",
Interval: promutil.NewDuration(time.Minute),
EvalOffset: promutil.NewDuration(-2 * time.Minute),
}, false, "eval_offset should be smaller than interval")
limit := -1
f(&Group{ f(&Group{
Name: "wrong limit", Name: "wrong limit",
Limit: &limit, Limit: -1,
}, false, "invalid limit") }, false, "invalid limit")
f(&Group{ f(&Group{
@@ -283,7 +273,7 @@ func TestGroupValidate_Failure(t *testing.T) {
Expr: "up | 0", Expr: "up | 0",
}, },
}, },
}, true, "bad MetricsQL expr") }, true, "bad prometheus expr")
f(&Group{ f(&Group{
Name: "test graphite expr", Name: "test graphite expr",
@@ -293,7 +283,7 @@ func TestGroupValidate_Failure(t *testing.T) {
"description": "some-description", "description": "some-description",
}}, }},
}, },
}, true, "bad GraphiteQL expr") }, true, "bad graphite expr")
f(&Group{ f(&Group{
Name: "test vlogs expr", Name: "test vlogs expr",
@@ -327,7 +317,7 @@ func TestGroupValidate_Failure(t *testing.T) {
Expr: "sum(up == 0 ) by (host)", Expr: "sum(up == 0 ) by (host)",
}, },
}, },
}, true, "bad GraphiteQL expr") }, true, "bad graphite expr")
f(&Group{ f(&Group{
Name: "test vlogs with prometheus exp", Name: "test vlogs with prometheus exp",
@@ -351,7 +341,8 @@ func TestGroupValidate_Failure(t *testing.T) {
For: promutil.NewDuration(10 * time.Millisecond), For: promutil.NewDuration(10 * time.Millisecond),
}, },
}, },
}, true, "bad MetricsQL expr") }, true, "bad prometheus expr")
} }
func TestGroupValidate_Success(t *testing.T) { func TestGroupValidate_Success(t *testing.T) {

View File

@@ -2,7 +2,6 @@ package config
import ( import (
"fmt" "fmt"
"slices"
"strings" "strings"
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage" "github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
@@ -66,23 +65,24 @@ func (t *Type) ValidateExpr(expr string) error {
switch t.String() { switch t.String() {
case "graphite": case "graphite":
if _, err := graphiteql.Parse(expr); err != nil { if _, err := graphiteql.Parse(expr); err != nil {
return fmt.Errorf("bad GraphiteQL expr: %q, err: %w", expr, err) return fmt.Errorf("bad graphite expr: %q, err: %w", expr, err)
} }
case "prometheus": case "prometheus":
if _, err := metricsql.Parse(expr); err != nil { if _, err := metricsql.Parse(expr); err != nil {
return fmt.Errorf("bad MetricsQL expr: %q, err: %w", expr, err) return fmt.Errorf("bad prometheus expr: %q, err: %w", expr, err)
} }
case "vlogs": case "vlogs":
q, err := logstorage.ParseStatsQuery(expr, 0) q, err := logstorage.ParseStatsQuery(expr, 0)
if err != nil { if err != nil {
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err) return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
} }
labels, err := q.GetStatsLabels() fields, _ := q.GetStatsByFields()
if err != nil { for i := range fields {
return fmt.Errorf("cannot obtain labels from LogsQL expr: %q, err: %w", expr, err) // VictoriaLogs inserts `_time` field as a label in result when query with `stats by (_time:step)`,
} // making the result meaningless and may lead to cardinality issues.
if slices.Contains(labels, "_time") { if fields[i] == "_time" {
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr) return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
}
} }
default: default:
return fmt.Errorf("unknown datasource type=%q", t.Name) return fmt.Errorf("unknown datasource type=%q", t.Name)

View File

@@ -5,7 +5,6 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"maps"
"net/http" "net/http"
"net/url" "net/url"
"strings" "strings"
@@ -92,7 +91,9 @@ func (c *Client) Clone() *Client {
ns.extraHeaders = make([]keyValue, len(c.extraHeaders)) ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
copy(ns.extraHeaders, c.extraHeaders) copy(ns.extraHeaders, c.extraHeaders)
} }
maps.Copy(ns.extraParams, c.extraParams) for k, v := range c.extraParams {
ns.extraParams[k] = v
}
return ns return ns
} }
@@ -172,26 +173,22 @@ func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result,
return Result{}, nil, fmt.Errorf("second attempt: %w", err) return Result{}, nil, fmt.Errorf("second attempt: %w", err)
} }
} }
defer func() { _ = resp.Body.Close() }()
// Process the received response. // Process the received response.
var parseFn func(resp *http.Response) (Result, error) var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType { switch c.dataSourceType {
case datasourcePrometheus: case datasourcePrometheus:
parseFn = parsePrometheusInstantResponse parseFn = parsePrometheusResponse
case datasourceGraphite: case datasourceGraphite:
parseFn = parseGraphiteResponse parseFn = parseGraphiteResponse
case datasourceVLogs: case datasourceVLogs:
parseFn = parseVLogsInstantResponse parseFn = parseVLogsResponse
default: default:
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType) logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
} }
result, err := parseFn(req, resp)
result, err := parseFn(resp) _ = resp.Body.Close()
if err != nil { return result, req, err
return Result{}, nil, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
}
return result, req, nil
} }
// QueryRange executes the given query on the given time range. // QueryRange executes the given query on the given time range.
@@ -232,23 +229,19 @@ func (c *Client) QueryRange(ctx context.Context, query string, start, end time.T
return res, fmt.Errorf("second attempt: %w", err) return res, fmt.Errorf("second attempt: %w", err)
} }
} }
defer func() { _ = resp.Body.Close() }()
// Process the received response. // Process the received response.
var parseFn func(resp *http.Response) (Result, error) var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType { switch c.dataSourceType {
case datasourcePrometheus: case datasourcePrometheus:
parseFn = parsePrometheusRangeResponse parseFn = parsePrometheusResponse
case datasourceVLogs: case datasourceVLogs:
parseFn = parseVLogsRangeResponse parseFn = parseVLogsResponse
default: default:
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType) logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
} }
res, err = parseFn(req, resp)
res, err = parseFn(resp) _ = resp.Body.Close()
if err != nil {
return Result{}, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
}
return res, err return res, err
} }

View File

@@ -33,10 +33,10 @@ func (r graphiteResponse) metrics() []Metric {
return ms return ms
} }
func parseGraphiteResponse(resp *http.Response) (Result, error) { func parseGraphiteResponse(req *http.Request, resp *http.Response) (Result, error) {
r := &graphiteResponse{} r := &graphiteResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil { if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
return Result{}, fmt.Errorf("error parsing graphite metrics: %w", err) return Result{}, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
} }
return Result{Data: r.metrics()}, nil return Result{Data: r.metrics()}, nil
} }

View File

@@ -34,7 +34,7 @@ type promResponse struct {
// Stats supported by VictoriaMetrics since v1.90 // Stats supported by VictoriaMetrics since v1.90
Stats struct { Stats struct {
SeriesFetched *string `json:"seriesFetched,omitempty"` SeriesFetched *string `json:"seriesFetched,omitempty"`
} `json:"stats"` } `json:"stats,omitempty"`
// IsPartial supported by VictoriaMetrics // IsPartial supported by VictoriaMetrics
IsPartial *bool `json:"isPartial,omitempty"` IsPartial *bool `json:"isPartial,omitempty"`
} }
@@ -172,26 +172,17 @@ const (
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar" rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
) )
func parsePromResponse(resp *http.Response) (*promResponse, error) { func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
r := &promResponse{} r := &promResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil { if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err) return res, fmt.Errorf("error parsing response from %s: %w", req.URL.Redacted(), err)
} }
if r.Status == statusError { if r.Status == statusError {
return nil, fmt.Errorf("response error %q: %s", r.ErrorType, r.Error) return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
} }
if r.Status != statusSuccess { if r.Status != statusSuccess {
return nil, fmt.Errorf("unknown response status %q", r.Status) return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
} }
return r, nil
}
func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error) {
r, err := parsePromResponse(resp)
if err != nil {
return res, fmt.Errorf("failed to parse response: %w", err)
}
var parseFn func() ([]Metric, error) var parseFn func() ([]Metric, error)
switch r.Data.ResultType { switch r.Data.ResultType {
case rtVector: case rtVector:
@@ -200,6 +191,12 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result)) return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
} }
parseFn = pi.metrics parseFn = pi.metrics
case rtMatrix:
var pr promRange
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
return res, err
}
parseFn = pr.metrics
case rScalar: case rScalar:
var ps promScalar var ps promScalar
if err := json.Unmarshal(r.Data.Result, &ps); err != nil { if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
@@ -209,6 +206,7 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
default: default:
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType) return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
} }
ms, err := parseFn() ms, err := parseFn()
if err != nil { if err != nil {
return res, err return res, err
@@ -224,34 +222,6 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
return res, nil return res, nil
} }
func parsePrometheusRangeResponse(resp *http.Response) (res Result, err error) {
r, err := parsePromResponse(resp)
if err != nil {
return res, fmt.Errorf("failed to parse response: %w", err)
}
if r.Data.ResultType != rtMatrix {
return res, fmt.Errorf("unexpected result type %q; expected result type %q", r.Data.ResultType, rtMatrix)
}
var pr promRange
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
return res, err
}
ms, err := pr.metrics()
if err != nil {
return res, err
}
res = Result{Data: ms, IsPartial: r.IsPartial}
if r.Stats.SeriesFetched != nil {
intV, err := strconv.Atoi(*r.Stats.SeriesFetched)
if err != nil {
return res, fmt.Errorf("failed to convert stats.seriesFetched to int: %w", err)
}
res.SeriesFetched = &intV
}
return res, nil
}
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) { func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
if c.appendTypePrefix { if c.appendTypePrefix {
r.URL.Path += "/prometheus" r.URL.Path += "/prometheus"

View File

@@ -65,23 +65,21 @@ func TestVMInstantQuery(t *testing.T) {
case 3: case 3:
w.Write([]byte(`{"status":"unknown"}`)) w.Write([]byte(`{"status":"unknown"}`))
case 4: case 4:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector"}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
case 5: case 5:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
case 6:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
case 7: case 6:
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
case 8: case 7:
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
case 9: case 8:
w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`)) w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
} }
}) })
mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) { mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) {
c++ c++
switch c { switch c {
case 10: case 9:
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`)) w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
} }
}) })
@@ -104,9 +102,9 @@ func TestVMInstantQuery(t *testing.T) {
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err) t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
} }
switch c { switch c {
case 11: case 10:
w.Write([]byte("[]")) w.Write([]byte("[]"))
case 12: case 11:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
} }
}) })
@@ -125,7 +123,6 @@ func TestVMInstantQuery(t *testing.T) {
ts := time.Now() ts := time.Now()
expErr := func(query, err string) { expErr := func(query, err string) {
t.Helper()
_, _, gotErr := pq.Query(ctx, query, ts) _, _, gotErr := pq.Query(ctx, query, ts)
if gotErr == nil { if gotErr == nil {
t.Fatalf("expected %q got nil", err) t.Fatalf("expected %q got nil", err)
@@ -138,11 +135,10 @@ func TestVMInstantQuery(t *testing.T) {
expErr(vmQuery, "500") // 0 expErr(vmQuery, "500") // 0
expErr(vmQuery, "error parsing response") // 1 expErr(vmQuery, "error parsing response") // 1
expErr(vmQuery, "response error") // 2 expErr(vmQuery, "response error") // 2
expErr(vmQuery, "unknown response status") // 3 expErr(vmQuery, "unknown status") // 3
expErr(vmQuery, "unexpected end of JSON input") // 4 expErr(vmQuery, "unexpected end of JSON input") // 4
expErr(vmQuery, "unknown result type") // 5
res, _, err := pq.Query(ctx, vmQuery, ts) // 6 - vector res, _, err := pq.Query(ctx, vmQuery, ts) // 5 - vector
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
@@ -163,7 +159,7 @@ func TestVMInstantQuery(t *testing.T) {
} }
metricsEqual(t, res.Data, expected) metricsEqual(t, res.Data, expected)
res, req, err := pq.Query(ctx, vmQuery, ts) // 7 - scalar res, req, err := pq.Query(ctx, vmQuery, ts) // 6 - scalar
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
@@ -188,7 +184,7 @@ func TestVMInstantQuery(t *testing.T) {
res.SeriesFetched) res.SeriesFetched)
} }
res, _, err = pq.Query(ctx, vmQuery, ts) // 8 - scalar with stats res, _, err = pq.Query(ctx, vmQuery, ts) // 7 - scalar with stats
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
@@ -209,7 +205,7 @@ func TestVMInstantQuery(t *testing.T) {
*res.SeriesFetched) *res.SeriesFetched)
} }
res, _, err = pq.Query(ctx, vmQuery, ts) // 9 res, _, err = pq.Query(ctx, vmQuery, ts) // 8
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
@@ -220,7 +216,7 @@ func TestVMInstantQuery(t *testing.T) {
// test graphite // test graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)}) gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
res, _, err = gq.Query(ctx, queryRender, ts) // 10 - graphite res, _, err = gq.Query(ctx, queryRender, ts) // 9 - graphite
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
@@ -240,9 +236,9 @@ func TestVMInstantQuery(t *testing.T) {
vlogs := datasourceVLogs vlogs := datasourceVLogs
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second}) pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
expErr(vlogsQuery, "error parsing response") // 11 expErr(vlogsQuery, "error parsing response") // 10
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 12 res, _, err = pq.Query(ctx, vlogsQuery, ts) // 11
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
@@ -394,8 +390,6 @@ func TestVMRangeQuery(t *testing.T) {
switch c { switch c {
case 0: case 0:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
case 1:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[1583786142, "1"]}}`))
} }
}) })
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) { mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
@@ -428,7 +422,7 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step) t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
} }
switch c { switch c {
case 2: case 1:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`)) w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
} }
}) })
@@ -452,13 +446,13 @@ func TestVMRangeQuery(t *testing.T) {
start, end := time.Now().Add(-time.Minute), time.Now() start, end := time.Now().Add(-time.Minute), time.Now()
res, err := pq.QueryRange(ctx, vmQuery, start, end) // case 0 res, err := pq.QueryRange(ctx, vmQuery, start, end)
if err != nil { if err != nil {
t.Fatalf("unexpected %s", err) t.Fatalf("unexpected %s", err)
} }
m := res.Data m := res.Data
if len(m) != 1 { if len(m) != 1 {
t.Fatalf("expected 1 metric got %d in %+v", len(m), m) t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
} }
expected := Metric{ expected := Metric{
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}}, Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}},
@@ -469,9 +463,6 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("unexpected metric %+v want %+v", m[0], expected) t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
} }
_, err = pq.QueryRange(ctx, vmQuery, start, end) // case 1
expectError(t, err, "unexpected result type")
// test unsupported graphite // test unsupported graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)}) gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
@@ -772,7 +763,7 @@ func TestHeaders(t *testing.T) {
// basic auth // basic auth
f(func() *Client { f(func() *Client {
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "", "bar", "")) cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "bar", ""))
if err != nil { if err != nil {
t.Fatalf("Error get auth config: %s", err) t.Fatalf("Error get auth config: %s", err)
} }
@@ -817,7 +808,7 @@ func TestHeaders(t *testing.T) {
// custom header overrides basic auth // custom header overrides basic auth
f(func() *Client { f(func() *Client {
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "", "bar", "")) cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "bar", ""))
if err != nil { if err != nil {
t.Fatalf("Error get auth config: %s", err) t.Fatalf("Error get auth config: %s", err)
} }

View File

@@ -40,28 +40,8 @@ func (c *Client) setVLogsRangeReqParams(r *http.Request, query string, start, en
c.setReqParams(r, query) c.setReqParams(r, query)
} }
func parseVLogsInstantResponse(resp *http.Response) (res Result, err error) { func parseVLogsResponse(req *http.Request, resp *http.Response) (res Result, err error) {
res, err = parsePrometheusInstantResponse(resp) res, err = parsePrometheusResponse(req, resp)
if err != nil {
return Result{}, err
}
for i := range res.Data {
m := &res.Data[i]
for j := range m.Labels {
// reserve the stats func result name with a new label `stats_result` instead of dropping it,
// since there could be multiple stats results in a single query, for instance:
// _time:5m | stats quantile(0.5, request_duration_seconds) p50, quantile(0.9, request_duration_seconds) p90
if m.Labels[j].Name == "__name__" {
m.Labels[j].Name = "stats_result"
break
}
}
}
return
}
func parseVLogsRangeResponse(resp *http.Response) (res Result, err error) {
res, err = parsePrometheusRangeResponse(resp)
if err != nil { if err != nil {
return Result{}, err return Result{}, err
} }

View File

@@ -87,7 +87,6 @@ func (m *Metric) DelLabel(key string) {
for i, l := range m.Labels { for i, l := range m.Labels {
if l.Name == key { if l.Name == key {
m.Labels = append(m.Labels[:i], m.Labels[i+1:]...) m.Labels = append(m.Labels[:i], m.Labels[i+1:]...)
break
} }
} }
} }
@@ -133,9 +132,12 @@ func (ls Labels) String() string {
// a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1 // a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1
// a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0 // a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0
func LabelCompare(a, b Labels) int { func LabelCompare(a, b Labels) int {
l := min(len(b), len(a)) l := len(a)
if len(b) < l {
l = len(b)
}
for i := range l { for i := 0; i < l; i++ {
if a[i].Name != b[i].Name { if a[i].Name != b[i].Name {
if a[i].Name < b[i].Name { if a[i].Name < b[i].Name {
return -1 return -1

View File

@@ -27,7 +27,6 @@ var (
"Multiple headers must be delimited by '^^': -datasource.headers='header1:value1^^header2:value2'") "Multiple headers must be delimited by '^^': -datasource.headers='header1:value1^^header2:value2'")
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url") basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url")
basicAuthUsernameFile = flag.String("datasource.basicAuth.usernameFile", "", "Optional path to basic auth username to use for -datasource.url")
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url") basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url")
basicAuthPasswordFile = flag.String("datasource.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -datasource.url") basicAuthPasswordFile = flag.String("datasource.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -datasource.url")
@@ -64,7 +63,6 @@ func InitSecretFlags() {
if !*showDatasourceURL { if !*showDatasourceURL {
flagutil.RegisterSecretFlag("datasource.url") flagutil.RegisterSecretFlag("datasource.url")
} }
flagutil.RegisterSecretFlag("datasource.headers")
} }
// ShowDatasourceURL whether to show -datasource.url with sensitive information // ShowDatasourceURL whether to show -datasource.url with sensitive information
@@ -107,7 +105,7 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
return nil, fmt.Errorf("cannot parse JSON for -datasource.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err) return nil, fmt.Errorf("cannot parse JSON for -datasource.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
} }
authCfg, err := vmalertutil.AuthConfig( authCfg, err := vmalertutil.AuthConfig(
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthUsernameFile, *basicAuthPassword, *basicAuthPasswordFile), vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
vmalertutil.WithBearer(*bearerToken, *bearerTokenFile), vmalertutil.WithBearer(*bearerToken, *bearerTokenFile),
vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams), vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
vmalertutil.WithHeaders(*headers)) vmalertutil.WithHeaders(*headers))

View File

@@ -13,7 +13,7 @@ func BenchmarkPromInstantUnmarshal(b *testing.B) {
// BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op // BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op
b.Run("Instant std+fastjson", func(b *testing.B) { b.Run("Instant std+fastjson", func(b *testing.B) {
for range b.N { for i := 0; i < b.N; i++ {
var pi promInstant var pi promInstant
err = pi.Unmarshal(data) err = pi.Unmarshal(data)
if err != nil { if err != nil {

View File

@@ -7,6 +7,7 @@ import (
"net/url" "net/url"
"os" "os"
"sort" "sort"
"strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
@@ -56,7 +57,7 @@ absolute path to all .tpl files in root.
-rule.templates="dir/**/*.tpl". Includes all the .tpl files in "dir" subfolders recursively. -rule.templates="dir/**/*.tpl". Includes all the .tpl files in "dir" subfolders recursively.
`) `)
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule', '-rule.templates' and '-notifier.config' files. "+ configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule' or '-notifier.config' files. "+
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.") "By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "Address to listen for incoming http requests. See also -tls and -httpListenAddr.useProxyProtocol") httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "Address to listen for incoming http requests. See also -tls and -httpListenAddr.useProxyProtocol")
@@ -76,12 +77,15 @@ absolute path to all .tpl files in root.
`Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+ `Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+
`If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`) `If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+ externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
"In case of conflicts, original labels are kept with prefix 'exported_'.") "In case of conflicts, original labels are kept with prefix `exported_`.")
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.") dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
) )
var extURL *url.URL var (
alertURLGeneratorFn notifier.AlertURLGenerator
extURL *url.URL
)
func main() { func main() {
// Write flags and help message to stdout, since it is easier to grep or pipe. // Write flags and help message to stdout, since it is easier to grep or pipe.
@@ -117,7 +121,7 @@ func main() {
return return
} }
err = notifier.InitAlertURLGeneratorFn(extURL, *externalAlertSource, *validateTemplates) alertURLGeneratorFn, err = getAlertURLGenerator(extURL, *externalAlertSource, *validateTemplates)
if err != nil { if err != nil {
logger.Fatalf("failed to init `external.alert.source`: %s", err) logger.Fatalf("failed to init `external.alert.source`: %s", err)
} }
@@ -159,7 +163,7 @@ func main() {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
manager, err := newManager(ctx) manager, err := newManager(ctx)
if err != nil { if err != nil {
logger.Fatalf("failed to create manager: %s", err) logger.Fatalf("failed to init: %s", err)
} }
logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";")) logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";"))
groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions) groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)
@@ -224,13 +228,14 @@ func newManager(ctx context.Context) (*manager, error) {
labels[s[:n]] = s[n+1:] labels[s[:n]] = s[n+1:]
} }
err = notifier.Init(labels, *externalURL) nts, err := notifier.Init(alertURLGeneratorFn, labels, *externalURL)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to init notifier: %w", err) return nil, fmt.Errorf("failed to init notifier: %w", err)
} }
manager := &manager{ manager := &manager{
groups: make(map[uint64]*rule.Group), groups: make(map[uint64]*rule.Group),
querierBuilder: q, querierBuilder: q,
notifiers: nts,
labels: labels, labels: labels,
} }
rw, err := remotewrite.Init(ctx) rw, err := remotewrite.Init(ctx)
@@ -287,6 +292,35 @@ func getHostnameAsExternalURL(addr string, isSecure bool) (*url.URL, error) {
return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port)) return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
} }
func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, validateTemplate bool) (notifier.AlertURLGenerator, error) {
if externalAlertSource == "" {
return func(a notifier.Alert) string {
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, paramGroupID, gID, paramAlertID, aID)
}, nil
}
if validateTemplate {
if err := notifier.ValidateTemplates(map[string]string{
"tpl": externalAlertSource,
}); err != nil {
return nil, fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
}
}
m := map[string]string{
"tpl": externalAlertSource,
}
return func(alert notifier.Alert) string {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
}
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
if err != nil {
logger.Errorf("cannot template alert source: %s", err)
}
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
}, nil
}
func usage() { func usage() {
const s = ` const s = `
vmalert processes alerts and recording rules. vmalert processes alerts and recording rules.

View File

@@ -49,6 +49,30 @@ func TestGetExternalURL(t *testing.T) {
} }
} }
func TestGetAlertURLGenerator(t *testing.T) {
testAlert := notifier.Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
u, _ := url.Parse("https://victoriametrics.com/path")
fn, err := getAlertURLGenerator(u, "", false)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", paramGroupID, paramAlertID)
if exp != fn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
}
_, err = getAlertURLGenerator(nil, "foo?{{invalid}}", true)
if err == nil {
t.Fatalf("expected template validation error got nil")
}
fn, err = getAlertURLGenerator(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != fn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
}
}
func TestConfigReload(t *testing.T) { func TestConfigReload(t *testing.T) {
originalRulePath := *rulePath originalRulePath := *rulePath
originalExternalURL := extURL originalExternalURL := extURL
@@ -96,10 +120,9 @@ groups:
querierBuilder: &datasource.FakeQuerier{}, querierBuilder: &datasource.FakeQuerier{},
groups: make(map[uint64]*rule.Group), groups: make(map[uint64]*rule.Group),
labels: map[string]string{}, labels: map[string]string{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
rw: &remotewrite.Client{}, rw: &remotewrite.Client{},
} }
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
syncCh := make(chan struct{}) syncCh := make(chan struct{})
sighupCh := procutil.NewSighupChan() sighupCh := procutil.NewSighupChan()

View File

@@ -3,7 +3,6 @@ package main
import ( import (
"context" "context"
"fmt" "fmt"
"strconv"
"sync" "sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
@@ -17,6 +16,7 @@ import (
// manager controls group states // manager controls group states
type manager struct { type manager struct {
querierBuilder datasource.QuerierBuilder querierBuilder datasource.QuerierBuilder
notifiers func() []notifier.Notifier
rw remotewrite.RWClient rw remotewrite.RWClient
// remote read builder. // remote read builder.
@@ -29,8 +29,25 @@ type manager struct {
groups map[uint64]*rule.Group groups map[uint64]*rule.Group
} }
// groupAPI generates apiGroup object from group by its ID(hash) // ruleAPI generates apiRule object from alert by its ID(hash)
func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) { func (m *manager) ruleAPI(gID, rID uint64) (apiRule, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
g, ok := m.groups[gID]
if !ok {
return apiRule{}, fmt.Errorf("can't find group with id %d", gID)
}
for _, rule := range g.Rules {
if rule.ID() == rID {
return ruleToAPI(rule), nil
}
}
return apiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
}
// alertAPI generates apiAlert object from alert by its ID(hash)
func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
m.groupsMu.RLock() m.groupsMu.RLock()
defer m.groupsMu.RUnlock() defer m.groupsMu.RUnlock()
@@ -38,47 +55,13 @@ func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
if !ok { if !ok {
return nil, fmt.Errorf("can't find group with id %d", gID) return nil, fmt.Errorf("can't find group with id %d", gID)
} }
return g.ToAPI(), nil
}
// ruleAPI generates apiRule object from alert by its ID(hash)
func (m *manager) ruleAPI(gID, rID uint64) (rule.ApiRule, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
group, ok := m.groups[gID]
if !ok {
return rule.ApiRule{}, fmt.Errorf("can't find group with id %d", gID)
}
g := group.ToAPI()
ruleID := strconv.FormatUint(rID, 10)
for _, r := range g.Rules { for _, r := range g.Rules {
if r.ID == ruleID { ar, ok := r.(*rule.AlertingRule)
return r, nil if !ok {
}
}
return rule.ApiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
}
// alertAPI generates apiAlert object from alert by its ID(hash)
func (m *manager) alertAPI(gID, aID uint64) (*rule.ApiAlert, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
group, ok := m.groups[gID]
if !ok {
return nil, fmt.Errorf("can't find group with id %d", gID)
}
g := group.ToAPI()
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
continue continue
} }
alertID := strconv.FormatUint(aID, 10) if apiAlert := alertToAPI(ar, aID); apiAlert != nil {
for _, a := range r.Alerts { return apiAlert, nil
if a.ID == alertID {
return a, nil
}
} }
} }
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name) return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)
@@ -98,18 +81,20 @@ func (m *manager) close() {
m.wg.Wait() m.wg.Wait()
} }
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) { func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
m.wg.Add(1)
id := g.GetID() id := g.GetID()
g.Init() g.Init()
m.wg.Go(func() { go func() {
defer m.wg.Done()
if restore { if restore {
g.Start(ctx, m.rw, m.rr) g.Start(ctx, m.notifiers, m.rw, m.rr)
} else { } else {
g.Start(ctx, m.rw, nil) g.Start(ctx, m.notifiers, m.rw, nil)
} }
}) }()
m.groups[id] = g m.groups[id] = g
return nil
} }
func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error { func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
@@ -118,7 +103,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
for _, cfg := range groupsCfg { for _, cfg := range groupsCfg {
for _, r := range cfg.Rules { for _, r := range cfg.Rules {
if rrPresent && arPresent { if rrPresent && arPresent {
break continue
} }
if r.Record != "" { if r.Record != "" {
rrPresent = true rrPresent = true
@@ -134,7 +119,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
if rrPresent && m.rw == nil { if rrPresent && m.rw == nil {
return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set") return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set")
} }
if arPresent && notifier.GetTargets() == nil { if arPresent && m.notifiers == nil {
return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set") return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set")
} }
@@ -161,22 +146,25 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
} }
} }
for _, ng := range groupsRegistry { for _, ng := range groupsRegistry {
m.startGroup(ctx, ng, restore) if err := m.startGroup(ctx, ng, restore); err != nil {
m.groupsMu.Unlock()
return err
}
} }
m.groupsMu.Unlock() m.groupsMu.Unlock()
if len(toUpdate) > 0 { if len(toUpdate) > 0 {
var wg sync.WaitGroup var wg sync.WaitGroup
for _, item := range toUpdate { for _, item := range toUpdate {
oldG := item.old wg.Add(1)
newG := item.new // cancel evaluation so the Update will be applied as fast as possible.
wg.Go(func() { // it is important to call InterruptEval before the update, because cancel fn
// cancel evaluation so the Update will be applied as fast as possible. // can be re-assigned during the update.
// it is important to call InterruptEval before the update, because cancel fn item.old.InterruptEval()
// can be re-assigned during the update. go func(oldGroup *rule.Group, newGroup *rule.Group) {
oldG.InterruptEval() oldGroup.UpdateWith(newGroup)
oldG.UpdateWith(newG) wg.Done()
}) }(item.old, item.new)
} }
wg.Wait() wg.Wait()
} }

View File

@@ -40,11 +40,10 @@ func TestManagerEmptyRulesDir(t *testing.T) {
// execution of configuration update. // execution of configuration update.
// Should be executed with -race flag // Should be executed with -race flag
func TestManagerUpdateConcurrent(t *testing.T) { func TestManagerUpdateConcurrent(t *testing.T) {
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
m := &manager{ m := &manager{
groups: make(map[uint64]*rule.Group), groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{}, querierBuilder: &datasource.FakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
} }
paths := []string{ paths := []string{
"config/testdata/dir/rules0-good.rules", "config/testdata/dir/rules0-good.rules",
@@ -65,11 +64,13 @@ func TestManagerUpdateConcurrent(t *testing.T) {
const workers = 500 const workers = 500
const iterations = 10 const iterations = 10
var wg sync.WaitGroup wg := sync.WaitGroup{}
for n := range workers { wg.Add(workers)
wg.Go(func() { for i := 0; i < workers; i++ {
go func(n int) {
defer wg.Done()
r := rand.New(rand.NewSource(int64(n))) r := rand.New(rand.NewSource(int64(n)))
for range iterations { for i := 0; i < iterations; i++ {
rnd := r.Intn(len(paths)) rnd := r.Intn(len(paths))
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true) cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
if err != nil { // update can fail and this is expected if err != nil { // update can fail and this is expected
@@ -77,7 +78,7 @@ func TestManagerUpdateConcurrent(t *testing.T) {
} }
_ = m.update(context.Background(), cfg, false) _ = m.update(context.Background(), cfg, false)
} }
}) }(i)
} }
wg.Wait() wg.Wait()
} }
@@ -126,9 +127,8 @@ func TestManagerUpdate_Success(t *testing.T) {
m := &manager{ m := &manager{
groups: make(map[uint64]*rule.Group), groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{}, querierBuilder: &datasource.FakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
} }
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
cfgInit := loadCfg(t, []string{initPath}, true, true) cfgInit := loadCfg(t, []string{initPath}, true, true)
if err := m.update(ctx, cfgInit, false); err != nil { if err := m.update(ctx, cfgInit, false); err != nil {
@@ -259,7 +259,7 @@ func compareGroups(t *testing.T, a, b *rule.Group) {
for i, r := range a.Rules { for i, r := range a.Rules {
got, want := r, b.Rules[i] got, want := r, b.Rules[i]
if a.CreateID() != b.CreateID() { if a.CreateID() != b.CreateID() {
t.Fatalf("expected to have rule %d; got %d", want.ID(), got.ID()) t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
} }
if err := rule.CompareRules(t, want, got); err != nil { if err := rule.CompareRules(t, want, got); err != nil {
t.Fatalf("comparison error: %s", err) t.Fatalf("comparison error: %s", err)
@@ -277,8 +277,7 @@ func TestManagerUpdate_Failure(t *testing.T) {
rw: rw, rw: rw,
} }
if notifiers != nil { if notifiers != nil {
_, cleanup := notifier.InitFakeNotifier() m.notifiers = func() []notifier.Notifier { return notifiers }
defer cleanup()
} }
err := m.update(context.Background(), []config.Group{cfg}, false) err := m.update(context.Background(), []config.Group{cfg}, false)
if err == nil { if err == nil {

View File

@@ -80,15 +80,14 @@ func (as AlertState) String() string {
// AlertTplData is used to execute templating // AlertTplData is used to execute templating
type AlertTplData struct { type AlertTplData struct {
Type string Type string
Labels map[string]string Labels map[string]string
Value float64 Value float64
Expr string Expr string
AlertID uint64 AlertID uint64
GroupID uint64 GroupID uint64
ActiveAt time.Time ActiveAt time.Time
For time.Duration For time.Duration
IsPartial bool
} }
var tplHeaders = []string{ var tplHeaders = []string{
@@ -102,7 +101,6 @@ var tplHeaders = []string{
"{{ $groupID := .GroupID }}", "{{ $groupID := .GroupID }}",
"{{ $activeAt := .ActiveAt }}", "{{ $activeAt := .ActiveAt }}",
"{{ $for := .For }}", "{{ $for := .For }}",
"{{ $isPartial := .IsPartial }}",
} }
// ExecTemplate executes the Alert template for given // ExecTemplate executes the Alert template for given
@@ -168,8 +166,8 @@ func templateAnnotations(annotations map[string]string, data AlertTplData, tmpl
ctmpl, _ := tmpl.Clone() ctmpl, _ := tmpl.Clone()
ctmpl = ctmpl.Option("missingkey=zero") ctmpl = ctmpl.Option("missingkey=zero")
if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil { if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil {
r[key] = err.Error() r[key] = text
eg.Add(fmt.Errorf("(key: %q, value: %q): %w", key, text, err)) eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
continue continue
} }
r[key] = buf.String() r[key] = buf.String()
@@ -186,13 +184,13 @@ type tplData struct {
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error { func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
tpl, err := tpl.Parse(text) tpl, err := tpl.Parse(text)
if err != nil { if err != nil {
return fmt.Errorf("error parsing template: %w", err) return fmt.Errorf("error parsing annotation template: %w", err)
} }
if !execute { if !execute {
return nil return nil
} }
if err = tpl.Execute(dst, data); err != nil { if err = tpl.Execute(dst, data); err != nil {
return fmt.Errorf("error evaluating template: %w", err) return fmt.Errorf("error evaluating annotation template: %w", err)
} }
return nil return nil
} }

View File

@@ -20,7 +20,7 @@ func TestAlertExecTemplate(t *testing.T) {
) )
extLabels["cluster"] = extCluster extLabels["cluster"] = extCluster
extLabels["dc"] = extDC extLabels["dc"] = extDC
err := Init(extLabels, extURL) _, err := Init(nil, extLabels, extURL)
checkErr(t, err) checkErr(t, err)
f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) { f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) {

View File

@@ -3,7 +3,6 @@ package notifier
import ( import (
"bytes" "bytes"
"context" "context"
"errors"
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
@@ -14,6 +13,7 @@ import (
"github.com/VictoriaMetrics/metrics" "github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
@@ -22,11 +22,10 @@ import (
// AlertManager represents integration provider with Prometheus alert manager // AlertManager represents integration provider with Prometheus alert manager
// https://github.com/prometheus/alertmanager // https://github.com/prometheus/alertmanager
type AlertManager struct { type AlertManager struct {
addr *url.URL addr *url.URL
argFunc AlertURLGenerator argFunc AlertURLGenerator
client *http.Client client *http.Client
timeout time.Duration timeout time.Duration
lastError string
authCfg *promauth.Config authCfg *promauth.Config
// stores already parsed RelabelConfigs object // stores already parsed RelabelConfigs object
@@ -72,42 +71,24 @@ func (am AlertManager) Addr() string {
return am.addr.Redacted() return am.addr.Redacted()
} }
func (am *AlertManager) LastError() string {
return am.lastError
}
// Send an alert or resolve message // Send an alert or resolve message
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error { func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[string]string) error {
if len(alerts) != len(alertLabels) {
return fmt.Errorf("mismatched number of alerts and label sets after global alert relabeling")
}
am.metrics.alertsSent.Add(len(alerts)) am.metrics.alertsSent.Add(len(alerts))
startTime := time.Now() startTime := time.Now()
err := am.send(ctx, alerts, alertLabels, headers) err := am.send(ctx, alerts, headers)
am.metrics.alertsSendDuration.UpdateDuration(startTime) am.metrics.alertsSendDuration.UpdateDuration(startTime)
if err != nil { if err != nil {
// the context can be cancelled on graceful shutdown
// or on group update. So no need to handle the error as usual.
if errors.Is(err, context.Canceled) {
return nil
}
am.metrics.alertsSendErrors.Add(len(alerts)) am.metrics.alertsSendErrors.Add(len(alerts))
am.lastError = err.Error()
} else {
am.lastError = ""
} }
return err return err
} }
func (am *AlertManager) send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error { func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[string]string) error {
b := &bytes.Buffer{} b := &bytes.Buffer{}
alertsToSend := make([]Alert, 0, len(alerts)) alertsToSend := make([]Alert, 0, len(alerts))
lblss := make([][]prompb.Label, 0, len(alerts)) lblss := make([][]prompb.Label, 0, len(alerts))
for i, a := range alerts { for _, a := range alerts {
lbls := alertLabels[i] lbls := a.applyRelabelingIfNeeded(am.relabelConfigs)
if am.relabelConfigs != nil {
lbls = am.relabelConfigs.Apply(lbls, 0)
}
if len(lbls) == 0 { if len(lbls) == 0 {
continue continue
} }
@@ -171,6 +152,11 @@ const alertManagerPath = "/api/v2/alerts"
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig, func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration, relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
) (*AlertManager, error) { ) (*AlertManager, error) {
if err := httputil.CheckURL(alertManagerURL); err != nil {
return nil, fmt.Errorf("invalid alertmanager URL: %w", err)
}
tls := &promauth.TLSConfig{} tls := &promauth.TLSConfig{}
if authCfg.TLSConfig != nil { if authCfg.TLSConfig != nil {
tls = authCfg.TLSConfig tls = authCfg.TLSConfig
@@ -191,7 +177,7 @@ func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg proma
} }
aCfg, err := vmalertutil.AuthConfig( aCfg, err := vmalertutil.AuthConfig(
vmalertutil.WithBasicAuth(ba.Username, ba.UsernameFile, ba.Password.String(), ba.PasswordFile), vmalertutil.WithBasicAuth(ba.Username, ba.Password.String(), ba.PasswordFile),
vmalertutil.WithBearer(authCfg.BearerToken.String(), authCfg.BearerTokenFile), vmalertutil.WithBearer(authCfg.BearerToken.String(), authCfg.BearerTokenFile),
vmalertutil.WithOAuth(oauth.ClientID, oauth.ClientSecret.String(), oauth.ClientSecretFile, oauth.TokenURL, strings.Join(oauth.Scopes, ";"), oauth.EndpointParams), vmalertutil.WithOAuth(oauth.ClientID, oauth.ClientSecret.String(), oauth.ClientSecretFile, oauth.TokenURL, strings.Join(oauth.Scopes, ";"), oauth.EndpointParams),
vmalertutil.WithHeaders(strings.Join(authCfg.Headers, "^^")), vmalertutil.WithHeaders(strings.Join(authCfg.Headers, "^^")),

View File

@@ -11,7 +11,6 @@ import (
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
) )
@@ -146,11 +145,11 @@ func TestAlertManager_Send(t *testing.T) {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil { if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
t.Fatalf("expected connection error got nil") t.Fatalf("expected connection error got nil")
} }
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil { if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
t.Fatalf("expected wrong http code error got nil") t.Fatalf("expected wrong http code error got nil")
} }
@@ -161,7 +160,7 @@ func TestAlertManager_Send(t *testing.T) {
End: time.Now().UTC(), End: time.Now().UTC(),
Labels: map[string]string{"alertname": "alert0"}, Labels: map[string]string{"alertname": "alert0"},
Annotations: map[string]string{"a": "b", "c": "d"}, Annotations: map[string]string{"a": "b", "c": "d"},
}}, [][]prompb.Label{{{Name: "alertname", Value: "alert0"}}}, map[string]string{headerKey: "bar"}); err != nil { }}, map[string]string{headerKey: "bar"}); err != nil {
t.Fatalf("unexpected error %s", err) t.Fatalf("unexpected error %s", err)
} }
@@ -175,7 +174,7 @@ func TestAlertManager_Send(t *testing.T) {
Name: "alert2", Name: "alert2",
Labels: map[string]string{"rule": "test", "tenant": "1"}, Labels: map[string]string{"rule": "test", "tenant": "1"},
}, },
}, [][]prompb.Label{{{Name: "rule", Value: "test"}, {Name: "tenant", Value: "0"}}, {{Name: "rule", Value: "test"}, {Name: "tenant", Value: "1"}}}, map[string]string{headerKey: "bar"}); err != nil { }, map[string]string{headerKey: "bar"}); err != nil {
t.Fatalf("unexpected error %s", err) t.Fatalf("unexpected error %s", err)
} }
@@ -188,7 +187,7 @@ func TestAlertManager_Send(t *testing.T) {
Name: "alert2", Name: "alert2",
Labels: map[string]string{}, Labels: map[string]string{},
}, },
}, [][]prompb.Label{{{Name: "rule", Value: "test"}}, {{}}}, map[string]string{}); err != nil { }, map[string]string{}); err != nil {
t.Fatalf("unexpected error %s", err) t.Fatalf("unexpected error %s", err)
} }

View File

@@ -27,9 +27,15 @@ type Config struct {
// PathPrefix is added to URL path before adding alertManagerPath value // PathPrefix is added to URL path before adding alertManagerPath value
PathPrefix string `yaml:"path_prefix,omitempty"` PathPrefix string `yaml:"path_prefix,omitempty"`
ConsulSDConfigs []ConsulSDConfigs `yaml:"consul_sd_configs,omitempty"` // ConsulSDConfigs contains list of settings for service discovery via Consul
DNSSDConfigs []DNSSDConfigs `yaml:"dns_sd_configs,omitempty"` // see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"` ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"`
// DNSSDConfigs contains list of settings for service discovery via DNS.
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"`
// StaticConfigs contains list of static targets
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
// HTTPClientConfig contains HTTP configuration for Notifier clients // HTTPClientConfig contains HTTP configuration for Notifier clients
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"` HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
@@ -56,29 +62,14 @@ type Config struct {
parsedAlertRelabelConfigs *promrelabel.ParsedConfigs parsedAlertRelabelConfigs *promrelabel.ParsedConfigs
} }
// staticConfig contains list of static targets in the following form: // StaticConfig contains list of static targets in the following form:
// //
// targets: // targets:
// [ - '<host>' ] // [ - '<host>' ]
type StaticConfig struct { type StaticConfig struct {
Targets []string `yaml:"targets"` Targets []string `yaml:"targets"`
// HTTPClientConfig contains HTTP configuration for the Targets // HTTPClientConfig contains HTTP configuration for the Targets
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"` HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
}
// ConsulSDConfigs contains list of settings for service discovery via Consul,
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
type ConsulSDConfigs struct {
consul.SDConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
}
// DNSSDConfigs contains list of settings for service discovery via DNS,
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
type DNSSDConfigs struct {
dns.SDConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
} }
// UnmarshalYAML implements the yaml.Unmarshaler interface. // UnmarshalYAML implements the yaml.Unmarshaler interface.
@@ -104,31 +95,6 @@ func (cfg *Config) UnmarshalYAML(unmarshal func(any) error) error {
} }
cfg.parsedAlertRelabelConfigs = arCfg cfg.parsedAlertRelabelConfigs = arCfg
for _, s := range cfg.StaticConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in static_config: %w", err)
}
}
}
for _, s := range cfg.ConsulSDConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in consul_sd_config: %w", err)
}
}
}
for _, s := range cfg.DNSSDConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in dns_sd_config: %w", err)
}
}
}
b, err := yaml.Marshal(cfg) b, err := yaml.Marshal(cfg)
if err != nil { if err != nil {
return fmt.Errorf("failed to marshal configuration for checksum: %w", err) return fmt.Errorf("failed to marshal configuration for checksum: %w", err)

View File

@@ -35,6 +35,4 @@ func TestParseConfig_Failure(t *testing.T) {
f("testdata/unknownFields.bad.yaml", "unknown field") f("testdata/unknownFields.bad.yaml", "unknown field")
f("non-existing-file", "error reading") f("non-existing-file", "error reading")
f("testdata/consul.bad.yaml", "failed to parse alert_relabel_configs in consul_sd_config")
f("testdata/dns.bad.yaml", "failed to parse alert relabeling config")
} }

View File

@@ -8,7 +8,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
@@ -29,7 +28,11 @@ type configWatcher struct {
targets map[TargetType][]Target targets map[TargetType][]Target
} }
func newWatcher(cfg *Config, gen AlertURLGenerator) (*configWatcher, error) { func newWatcher(path string, gen AlertURLGenerator) (*configWatcher, error) {
cfg, err := parseConfig(path)
if err != nil {
return nil, err
}
cw := &configWatcher{ cw := &configWatcher{
cfg: cfg, cfg: cfg,
wg: sync.WaitGroup{}, wg: sync.WaitGroup{},
@@ -85,15 +88,18 @@ func (cw *configWatcher) reload(path string) error {
return cw.start() return cw.start()
} }
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn getTargets) error { func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg) targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
for _, err := range errors { for _, err := range errors {
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err) return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
} }
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn) cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
cw.wg.Go(func() { cw.wg.Add(1)
go func() {
defer cw.wg.Done()
ticker := time.NewTicker(interval) ticker := time.NewTicker(interval)
defer ticker.Stop() defer ticker.Stop()
@@ -103,77 +109,62 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn
return return
case <-ticker.C: case <-ticker.C:
} }
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg) targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
for _, err := range errors { for _, err := range errors {
logger.Errorf("failed to init notifier for %q: %s", typeK, err) logger.Errorf("failed to init notifier for %q: %w", typeK, err)
} }
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn) cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
} }
}) }()
return nil return nil
} }
type targetMetadata struct { func getTargetMetadata(labelsFn getLabels, cfg *Config) (map[string]*promutil.Labels, []error) {
*promutil.Labels metaLabels, err := labelsFn()
alertRelabelConfigs *promrelabel.ParsedConfigs
}
func getTargetMetadata(targetsFn getTargets, cfg *Config) (map[string]targetMetadata, []error) {
metaLabelsList, alertRelabelCfgs, err := targetsFn()
if err != nil { if err != nil {
return nil, []error{fmt.Errorf("failed to get labels: %w", err)} return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
} }
targetMts := make(map[string]targetMetadata, len(metaLabelsList)) targetMetadata := make(map[string]*promutil.Labels, len(metaLabels))
var errors []error var errors []error
duplicates := make(map[string]struct{}) duplicates := make(map[string]struct{})
for i := range metaLabelsList { for _, labels := range metaLabels {
metaLabels := metaLabelsList[i] target := labels.Get("__address__")
alertRelabelCfg := alertRelabelCfgs[i] u, processedLabels, err := parseLabels(target, labels, cfg)
for _, labels := range metaLabels { if err != nil {
target := labels.Get("__address__") errors = append(errors, err)
u, processedLabels, err := parseLabels(target, labels, cfg) continue
if err != nil {
errors = append(errors, err)
continue
}
if len(u) == 0 {
continue
}
// check for duplicated targets
// targets with same address but different alert_relabel_configs are still considered duplicates since it's mostly due to misconfiguration and could cause duplicated notifications.
if _, ok := duplicates[u]; ok {
if !*suppressDuplicateTargetErrors {
logger.Errorf("skipping duplicate target with identical address %q; "+
"make sure service discovery and relabeling is set up properly; "+
"original labels: %s; resulting labels: %s",
u, labels, processedLabels)
}
continue
}
duplicates[u] = struct{}{}
targetMts[u] = targetMetadata{
Labels: processedLabels,
alertRelabelConfigs: alertRelabelCfg,
}
} }
if len(u) == 0 {
continue
}
if _, ok := duplicates[u]; ok { // check for duplicates
if !*suppressDuplicateTargetErrors {
logger.Errorf("skipping duplicate target with identical address %q; "+
"make sure service discovery and relabeling is set up properly; "+
"original labels: %s; resulting labels: %s",
u, labels, processedLabels)
}
continue
}
duplicates[u] = struct{}{}
targetMetadata[u] = processedLabels
} }
return targetMts, errors return targetMetadata, errors
} }
type getTargets func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) type getLabels func() ([]*promutil.Labels, error)
func (cw *configWatcher) start() error { func (cw *configWatcher) start() error {
if len(cw.cfg.StaticConfigs) > 0 { if len(cw.cfg.StaticConfigs) > 0 {
var targets []Target var targets []Target
for i, cfg := range cw.cfg.StaticConfigs { for _, cfg := range cw.cfg.StaticConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.StaticConfigs[i].AlertRelabelConfigs)
httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig) httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig)
for _, target := range cfg.Targets { for _, target := range cfg.Targets {
address, labels, err := parseLabels(target, nil, cw.cfg) address, labels, err := parseLabels(target, nil, cw.cfg)
if err != nil { if err != nil {
return fmt.Errorf("failed to parse labels for target %q: %w", target, err) return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
} }
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, alertRelabelConfig, cw.cfg.Timeout.Duration()) notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
if err != nil { if err != nil {
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err) return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
} }
@@ -187,20 +178,17 @@ func (cw *configWatcher) start() error {
} }
if len(cw.cfg.ConsulSDConfigs) > 0 { if len(cw.cfg.ConsulSDConfigs) > 0 {
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) { err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]*promutil.Labels, error) {
var labels [][]*promutil.Labels var labels []*promutil.Labels
var alertRelabelConfigs []*promrelabel.ParsedConfigs
for i := range cw.cfg.ConsulSDConfigs { for i := range cw.cfg.ConsulSDConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.ConsulSDConfigs[i].AlertRelabelConfigs)
sdc := &cw.cfg.ConsulSDConfigs[i] sdc := &cw.cfg.ConsulSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir) targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("got labels err: %w", err) return nil, fmt.Errorf("got labels err: %w", err)
} }
labels = append(labels, targetLabels) labels = append(labels, targetLabels...)
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
} }
return labels, alertRelabelConfigs, nil return labels, nil
}) })
if err != nil { if err != nil {
return fmt.Errorf("failed to start consulSD discovery: %w", err) return fmt.Errorf("failed to start consulSD discovery: %w", err)
@@ -208,21 +196,17 @@ func (cw *configWatcher) start() error {
} }
if len(cw.cfg.DNSSDConfigs) > 0 { if len(cw.cfg.DNSSDConfigs) > 0 {
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) { err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]*promutil.Labels, error) {
var labels [][]*promutil.Labels var labels []*promutil.Labels
var alertRelabelConfigs []*promrelabel.ParsedConfigs
for i := range cw.cfg.DNSSDConfigs { for i := range cw.cfg.DNSSDConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.DNSSDConfigs[i].AlertRelabelConfigs)
sdc := &cw.cfg.DNSSDConfigs[i] sdc := &cw.cfg.DNSSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir) targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("got labels err: %w", err) return nil, fmt.Errorf("got labels err: %w", err)
} }
labels = append(labels, targetLabels) labels = append(labels, targetLabels...)
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
} }
return labels, alertRelabelConfigs, nil return labels, nil
}) })
if err != nil { if err != nil {
return fmt.Errorf("failed to start DNSSD discovery: %w", err) return fmt.Errorf("failed to start DNSSD discovery: %w", err)
@@ -256,30 +240,30 @@ func (cw *configWatcher) setTargets(key TargetType, targets []Target) {
cw.targetsMu.Unlock() cw.targetsMu.Unlock()
} }
func (cw *configWatcher) updateTargets(key TargetType, targetMts map[string]targetMetadata, cfg *Config, genFn AlertURLGenerator) { func (cw *configWatcher) updateTargets(key TargetType, targetMetadata map[string]*promutil.Labels, cfg *Config, genFn AlertURLGenerator) {
cw.targetsMu.Lock() cw.targetsMu.Lock()
defer cw.targetsMu.Unlock() defer cw.targetsMu.Unlock()
oldTargets := cw.targets[key] oldTargets := cw.targets[key]
var updatedTargets []Target var updatedTargets []Target
for _, ot := range oldTargets { for _, ot := range oldTargets {
if _, ok := targetMts[ot.Addr()]; !ok { if _, ok := targetMetadata[ot.Addr()]; !ok {
// if target not exists in currentTargets, close it // if target not exists in currentTargets, close it
ot.Close() ot.Close()
} else { } else {
updatedTargets = append(updatedTargets, ot) updatedTargets = append(updatedTargets, ot)
delete(targetMts, ot.Addr()) delete(targetMetadata, ot.Addr())
} }
} }
// create new resources for the new targets // create new resources for the new targets
for addr, metadata := range targetMts { for addr, labels := range targetMetadata {
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, metadata.alertRelabelConfigs, cfg.Timeout.Duration()) am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
if err != nil { if err != nil {
logger.Errorf("failed to init %s notifier with addr %q: %s", key, addr, err) logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
continue continue
} }
updatedTargets = append(updatedTargets, Target{ updatedTargets = append(updatedTargets, Target{
Notifier: am, Notifier: am,
Labels: metadata.Labels, Labels: labels,
}) })
} }

View File

@@ -7,7 +7,6 @@ import (
"net/http/httptest" "net/http/httptest"
"os" "os"
"sync" "sync"
"sync/atomic"
"testing" "testing"
"time" "time"
@@ -29,11 +28,7 @@ static_configs:
- localhost:9093 - localhost:9093
- localhost:9094 - localhost:9094
`) `)
cfg, err := parseConfig(f.Name()) cw, err := newWatcher(f.Name(), nil)
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
if err != nil { if err != nil {
t.Fatalf("failed to start config watcher: %s", err) t.Fatalf("failed to start config watcher: %s", err)
} }
@@ -88,64 +83,33 @@ consul_sd_configs:
- server: %s - server: %s
services: services:
- alertmanager - alertmanager
- server: %s `, consulSDServer.URL))
services:
- alertmanager
alert_relabel_configs:
- target_label: "foo"
replacement: "tar"
`, consulSDServer.URL, consulSDServer.URL))
cfg, err := parseConfig(consulSDFile.Name()) cw, err := newWatcher(consulSDFile.Name(), nil)
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
if err != nil { if err != nil {
t.Fatalf("failed to start config watcher: %s", err) t.Fatalf("failed to start config watcher: %s", err)
} }
defer cw.mustStop() defer cw.mustStop()
if len(cw.notifiers()) != 3 { if len(cw.notifiers()) != 2 {
t.Fatalf("expected to get 3 notifiers; got %d", len(cw.notifiers())) t.Fatalf("expected to get 2 notifiers; got %d", len(cw.notifiers()))
} }
expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1) expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1)
expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2) expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2)
expAddr3 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService3)
n1, n2, n3 := cw.notifiers()[0], cw.notifiers()[1], cw.notifiers()[2] n1, n2 := cw.notifiers()[0], cw.notifiers()[1]
if n1.Addr() != expAddr1 { if n1.Addr() != expAddr1 {
t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr()) t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr())
} }
if n2.Addr() != expAddr2 { if n2.Addr() != expAddr2 {
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr()) t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
} }
if n3.Addr() != expAddr3 {
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
}
if n1.(*AlertManager).relabelConfigs.String() != "" {
t.Fatalf("unexpected relabel configs: %q", n1.(*AlertManager).relabelConfigs.String())
}
if n2.(*AlertManager).relabelConfigs.String() != "" {
t.Fatalf("unexpected relabel configs: %q", n2.(*AlertManager).relabelConfigs.String())
}
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
}
f := func() bool { return len(cw.notifiers()) == 1 } f := func() bool { return len(cw.notifiers()) == 1 }
if !waitFor(f, time.Second) { if !waitFor(f, time.Second) {
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers())) t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
} }
n3 = cw.notifiers()[0]
if n3.Addr() != expAddr3 {
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
}
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
}
} }
// TestConfigWatcherReloadConcurrent supposed to test concurrent // TestConfigWatcherReloadConcurrent supposed to test concurrent
@@ -200,11 +164,7 @@ consul_sd_configs:
"unknownFields.bad.yaml", "unknownFields.bad.yaml",
} }
cfg, err := parseConfig(paths[0]) cw, err := newWatcher(paths[0], nil)
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
if err != nil { if err != nil {
t.Fatalf("failed to start config watcher: %s", err) t.Fatalf("failed to start config watcher: %s", err)
} }
@@ -212,16 +172,18 @@ consul_sd_configs:
const workers = 500 const workers = 500
const iterations = 10 const iterations = 10
var wg sync.WaitGroup wg := sync.WaitGroup{}
for n := range workers { wg.Add(workers)
wg.Go(func() { for i := 0; i < workers; i++ {
go func(n int) {
defer wg.Done()
r := rand.New(rand.NewSource(int64(n))) r := rand.New(rand.NewSource(int64(n)))
for range iterations { for i := 0; i < iterations; i++ {
rnd := r.Intn(len(paths)) rnd := r.Intn(len(paths))
_ = cw.reload(paths[rnd]) // update can fail and this is expected _ = cw.reload(paths[rnd]) // update can fail and this is expected
_ = cw.notifiers() _ = cw.notifiers()
} }
}) }(i)
} }
wg.Wait() wg.Wait()
} }
@@ -240,11 +202,10 @@ func checkErr(t *testing.T, err error) {
const ( const (
fakeConsulService1 = "127.0.0.1:9093" fakeConsulService1 = "127.0.0.1:9093"
fakeConsulService2 = "127.0.0.1:9095" fakeConsulService2 = "127.0.0.1:9095"
fakeConsulService3 = "127.0.0.1:9097"
) )
func newFakeConsulServer() *httptest.Server { func newFakeConsulServer() *httptest.Server {
var requestCount atomic.Int32 requestCount := 0
mux := http.NewServeMux() mux := http.NewServeMux()
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) { mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`)) rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
@@ -259,7 +220,7 @@ func newFakeConsulServer() *httptest.Server {
}`)) }`))
}) })
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) { mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
if requestCount.Load() == 0 { if requestCount == 0 {
rw.Header().Set("X-Consul-Index", "1") rw.Header().Set("X-Consul-Index", "1")
rw.Write([]byte(` rw.Write([]byte(`
[ [
@@ -399,7 +360,7 @@ func newFakeConsulServer() *httptest.Server {
} }
]`)) ]`))
} }
requestCount.Add(1) requestCount++
}) })
return httptest.NewServer(mux) return httptest.NewServer(mux)

View File

@@ -5,8 +5,6 @@ import (
"fmt" "fmt"
"sync" "sync"
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
) )
// FakeNotifier is a mock notifier // FakeNotifier is a mock notifier
@@ -17,32 +15,14 @@ type FakeNotifier struct {
counter int counter int
} }
// InitFakeNotifier initializes global notifier to FakeNotifier,
// and returns a cleanup function to restore the original getActiveNotifiers.
func InitFakeNotifier() (*FakeNotifier, func()) {
originalGetActiveNotifiers := getActiveNotifiers
fn := &FakeNotifier{}
getActiveNotifiers = func() []Notifier {
return []Notifier{fn}
}
return fn, func() {
getActiveNotifiers = originalGetActiveNotifiers
}
}
// Close does nothing // Close does nothing
func (*FakeNotifier) Close() {} func (*FakeNotifier) Close() {}
// LastError returns last error message
func (*FakeNotifier) LastError() string {
return ""
}
// Addr returns "" // Addr returns ""
func (*FakeNotifier) Addr() string { return "" } func (*FakeNotifier) Addr() string { return "" }
// Send sets alerts and increases counter // Send sets alerts and increases counter
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error { func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error {
fn.Lock() fn.Lock()
defer fn.Unlock() defer fn.Unlock()
fn.counter += len(alerts) fn.counter += len(alerts)

View File

@@ -1,22 +1,14 @@
package notifier package notifier
import ( import (
"context"
"flag" "flag"
"fmt" "fmt"
"net/url" "net/url"
"strconv"
"strings" "strings"
"sync"
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
) )
@@ -36,7 +28,6 @@ var (
"For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -notifier.url. "+ "For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -notifier.url. "+
"Multiple headers must be delimited by '^^': -notifier.headers='header1:value1^^header2:value2,header3:value3'") "Multiple headers must be delimited by '^^': -notifier.headers='header1:value1^^header2:value2,header3:value3'")
basicAuthUsername = flagutil.NewArrayString("notifier.basicAuth.username", "Optional basic auth username for -notifier.url") basicAuthUsername = flagutil.NewArrayString("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
basicAuthUsernameFile = flagutil.NewArrayString("notifier.basicAuth.usernameFile", "Optional path to basic auth username file for -notifier.url")
basicAuthPassword = flagutil.NewArrayString("notifier.basicAuth.password", "Optional basic auth password for -notifier.url") basicAuthPassword = flagutil.NewArrayString("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
basicAuthPasswordFile = flagutil.NewArrayString("notifier.basicAuth.passwordFile", "Optional path to basic auth password file for -notifier.url") basicAuthPasswordFile = flagutil.NewArrayString("notifier.basicAuth.passwordFile", "Optional path to basic auth password file for -notifier.url")
@@ -66,61 +57,11 @@ var (
sendTimeout = flagutil.NewArrayDuration("notifier.sendTimeout", 10*time.Second, "Timeout when sending alerts to the corresponding -notifier.url") sendTimeout = flagutil.NewArrayDuration("notifier.sendTimeout", 10*time.Second, "Timeout when sending alerts to the corresponding -notifier.url")
) )
// AlertURLGeneratorFn returns a URL to the passed alert object. // cw holds a configWatcher for configPath configuration file
// Call InitAlertURLGeneratorFn before using this function. // configWatcher provides a list of Notifier objects discovered
var AlertURLGeneratorFn AlertURLGenerator // from static config or via service discovery.
// cw is not nil only if configPath is provided.
// InitAlertURLGeneratorFn populates AlertURLGeneratorFn var cw *configWatcher
func InitAlertURLGeneratorFn(externalURL *url.URL, externalAlertSource string, validateTemplate bool) error {
if externalAlertSource == "" {
AlertURLGeneratorFn = func(a Alert) string {
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, "group_id", gID, "alert_id", aID)
}
return nil
}
if validateTemplate {
if err := ValidateTemplates(map[string]string{
"tpl": externalAlertSource,
}); err != nil {
return fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
}
}
m := map[string]string{
"tpl": externalAlertSource,
}
AlertURLGeneratorFn = func(alert Alert) string {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
}
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
if err != nil {
logger.Errorf("cannot template alert source: %s", err)
}
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
}
return nil
}
var (
// getActiveNotifiers returns the current list of Notifier objects.
getActiveNotifiers func() []Notifier
// globalRelabelCfg stores the parsed alert relabeling config from the config file if there is
globalRelabelCfg *promrelabel.ParsedConfigs
// cw holds a configWatcher for configPath configuration file
// configWatcher provides a list of Notifier objects discovered
// from static config or via service discovery.
// cw is not nil only if configPath is provided.
cw *configWatcher
// externalLabels is a global variable for holding external labels configured via flags
// It is supposed to be inited via Init function only.
externalLabels map[string]string
// externalURL is a global variable for holding external URL value configured via flag
// It is supposed to be inited via Init function only.
externalURL string
)
// Reload checks the changes in configPath configuration file // Reload checks the changes in configPath configuration file
// and applies changes if any. // and applies changes if any.
@@ -131,62 +72,66 @@ func Reload() error {
return cw.reload(*configPath) return cw.reload(*configPath)
} }
var staticNotifiersFn func() []Notifier
var (
// externalLabels is a global variable for holding external labels configured via flags
// It is supposed to be inited via Init function only.
externalLabels map[string]string
// externalURL is a global variable for holding external URL value configured via flag
// It is supposed to be inited via Init function only.
externalURL string
)
// Init returns a function for retrieving actual list of Notifier objects.
// Init works in two mods: // Init works in two mods:
// - configuration via flags (for backward compatibility). Is always static // - configuration via flags (for backward compatibility). Is always static
// and don't support live reloads. // and don't support live reloads.
// - configuration via file. Supports live reloads and service discovery. // - configuration via file. Supports live reloads and service discovery.
// //
// Init returns an error if both mods are used. // Init returns an error if both mods are used.
func Init(extLabels map[string]string, extURL string) error { func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (func() []Notifier, error) {
externalURL = extURL externalURL = extURL
externalLabels = extLabels externalLabels = extLabels
_, err := url.Parse(externalURL) _, err := url.Parse(externalURL)
if err != nil { if err != nil {
return fmt.Errorf("failed to parse external URL: %w", err) return nil, fmt.Errorf("failed to parse external URL: %w", err)
} }
if *blackHole { if *blackHole {
if len(*addrs) > 0 || *configPath != "" { if len(*addrs) > 0 || *configPath != "" {
return fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified") return nil, fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
} }
notifier := newBlackHoleNotifier() notifier := newBlackHoleNotifier()
getActiveNotifiers = func() []Notifier { staticNotifiersFn = func() []Notifier {
return []Notifier{notifier} return []Notifier{notifier}
} }
return nil return staticNotifiersFn, nil
} }
if *configPath == "" && len(*addrs) == 0 { if *configPath == "" && len(*addrs) == 0 {
return nil return nil, nil
} }
if *configPath != "" && len(*addrs) > 0 { if *configPath != "" && len(*addrs) > 0 {
return fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified") return nil, fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
} }
if len(*addrs) > 0 { if len(*addrs) > 0 {
notifiers, err := notifiersFromFlags(AlertURLGeneratorFn) notifiers, err := notifiersFromFlags(gen)
if err != nil { if err != nil {
return fmt.Errorf("failed to create notifier from flag values: %w", err) return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
} }
getActiveNotifiers = func() []Notifier { staticNotifiersFn = func() []Notifier {
return notifiers return notifiers
} }
return nil return staticNotifiersFn, nil
} }
cfg, err := parseConfig(*configPath) cw, err = newWatcher(*configPath, gen)
if err != nil { if err != nil {
return err return nil, fmt.Errorf("failed to init config watcher: %w", err)
} }
if cfg.AlertRelabelConfigs != nil { return cw.notifiers, nil
globalRelabelCfg = cfg.parsedAlertRelabelConfigs
}
cw, err = newWatcher(cfg, AlertURLGeneratorFn)
if err != nil {
return fmt.Errorf("failed to init config watcher: %w", err)
}
getActiveNotifiers = cw.notifiers
return nil
} }
// InitSecretFlags must be called after flag.Parse and before any logging // InitSecretFlags must be called after flag.Parse and before any logging
@@ -194,7 +139,6 @@ func InitSecretFlags() {
if !*showNotifierURL { if !*showNotifierURL {
flagutil.RegisterSecretFlag("notifier.url") flagutil.RegisterSecretFlag("notifier.url")
} }
flagutil.RegisterSecretFlag("notifier.headers")
} }
func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) { func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
@@ -215,7 +159,6 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
}, },
BasicAuth: &promauth.BasicAuthConfig{ BasicAuth: &promauth.BasicAuthConfig{
Username: basicAuthUsername.GetOptionalArg(i), Username: basicAuthUsername.GetOptionalArg(i),
UsernameFile: basicAuthUsernameFile.GetOptionalArg(i),
Password: promauth.NewSecret(basicAuthPassword.GetOptionalArg(i)), Password: promauth.NewSecret(basicAuthPassword.GetOptionalArg(i)),
PasswordFile: basicAuthPasswordFile.GetOptionalArg(i), PasswordFile: basicAuthPasswordFile.GetOptionalArg(i),
}, },
@@ -232,9 +175,6 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
Headers: []string{headers.GetOptionalArg(i)}, Headers: []string{headers.GetOptionalArg(i)},
} }
if err := httputil.CheckURL(addr); err != nil {
return nil, fmt.Errorf("invalid notifier.url %q: %w", addr, err)
}
addr = strings.TrimSuffix(addr, "/") addr = strings.TrimSuffix(addr, "/")
am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i)) am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i))
if err != nil { if err != nil {
@@ -266,58 +206,23 @@ const (
// GetTargets returns list of static or discovered targets // GetTargets returns list of static or discovered targets
// via notifier configuration. // via notifier configuration.
//
// Must be called after Init.
func GetTargets() map[TargetType][]Target { func GetTargets() map[TargetType][]Target {
if getActiveNotifiers == nil { var targets = make(map[TargetType][]Target)
return nil
if staticNotifiersFn != nil {
for _, ns := range staticNotifiersFn() {
targets[TargetStatic] = append(targets[TargetStatic], Target{
Notifier: ns,
})
}
} }
targets := make(map[TargetType][]Target)
// use cached targets from configWatcher instead of getActiveNotifiers for the extra target labels
if cw != nil { if cw != nil {
cw.targetsMu.RLock() cw.targetsMu.RLock()
for key, ns := range cw.targets { for key, ns := range cw.targets {
targets[key] = append(targets[key], ns...) targets[key] = append(targets[key], ns...)
} }
cw.targetsMu.RUnlock() cw.targetsMu.RUnlock()
return targets
}
// static notifiers don't have labels
for _, ns := range getActiveNotifiers() {
targets[TargetStatic] = append(targets[TargetStatic], Target{
Notifier: ns,
})
} }
return targets return targets
} }
// Send sends alerts to all active notifiers
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) chan error {
alertsToSend := make([]Alert, 0, len(alerts))
lblss := make([][]prompb.Label, 0, len(alerts))
// apply global relabel config first without modifying original alerts in alerts
for _, a := range alerts {
lbls := a.applyRelabelingIfNeeded(globalRelabelCfg)
if len(lbls) == 0 {
continue
}
alertsToSend = append(alertsToSend, a)
lblss = append(lblss, lbls)
}
wg := sync.WaitGroup{}
activeNotifiers := getActiveNotifiers()
errCh := make(chan error, len(activeNotifiers))
defer close(errCh)
for i := range activeNotifiers {
nt := activeNotifiers[i]
wg.Go(func() {
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
errCh <- fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err)
}
})
}
wg.Wait()
return errCh
}

View File

@@ -1,17 +1,9 @@
package notifier package notifier
import ( import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"os"
"testing" "testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
) )
func TestInit(t *testing.T) { func TestInit(t *testing.T) {
@@ -20,13 +12,14 @@ func TestInit(t *testing.T) {
*addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"} *addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"}
err := Init(nil, "") fn, err := Init(nil, nil, "")
if err != nil { if err != nil {
t.Fatalf("%s", err) t.Fatalf("%s", err)
} }
if len(getActiveNotifiers()) != 2 { nfs := fn()
t.Fatalf("expected to get 2 notifiers; got %d", len(getActiveNotifiers())) if len(nfs) != 2 {
t.Fatalf("expected to get 2 notifiers; got %d", len(nfs))
} }
targets := GetTargets() targets := GetTargets()
@@ -55,22 +48,19 @@ func TestInitNegative(t *testing.T) {
*blackHole = oldBlackHole *blackHole = oldBlackHole
}() }()
f := func(path string, addr []string, bh bool) { f := func(path, addr string, bh bool) {
*configPath = path *configPath = path
*addrs = flagutil.ArrayString(addr) *addrs = flagutil.ArrayString{addr}
*blackHole = bh *blackHole = bh
if err := Init(nil, ""); err == nil { if _, err := Init(nil, nil, ""); err == nil {
t.Fatalf("expected to get error; got nil instead") t.Fatalf("expected to get error; got nil instead")
} }
} }
// *configPath, *addrs and *blackhole are mutually exclusive // *configPath, *addrs and *blackhole are mutually exclusive
f("/dummy/path", []string{"127.0.0.1"}, false) f("/dummy/path", "127.0.0.1", false)
f("/dummy/path", []string{}, true) f("/dummy/path", "", true)
f("", []string{"127.0.0.1"}, true) f("", "127.0.0.1", true)
// addr cannot be ""
f("", []string{""}, false)
f("", []string{"127.0.0.1", ""}, false)
} }
func TestBlackHole(t *testing.T) { func TestBlackHole(t *testing.T) {
@@ -79,13 +69,14 @@ func TestBlackHole(t *testing.T) {
*blackHole = true *blackHole = true
err := Init(nil, "") fn, err := Init(nil, nil, "")
if err != nil { if err != nil {
t.Fatalf("%s", err) t.Fatalf("%s", err)
} }
if len(getActiveNotifiers()) != 1 { nfs := fn()
t.Fatalf("expected to get 1 notifier; got %d", len(getActiveNotifiers())) if len(nfs) != 1 {
t.Fatalf("expected to get 1 notifier; got %d", len(nfs))
} }
targets := GetTargets() targets := GetTargets()
@@ -100,114 +91,3 @@ func TestBlackHole(t *testing.T) {
t.Fatalf("expected to get \"blackhole\"; got %q instead", nf1.Addr()) t.Fatalf("expected to get \"blackhole\"; got %q instead", nf1.Addr())
} }
} }
func TestGetAlertURLGenerator(t *testing.T) {
oldAlertURLGeneratorFn := AlertURLGeneratorFn
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
testAlert := Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
u, _ := url.Parse("https://victoriametrics.com/path")
err := InitAlertURLGeneratorFn(u, "", false)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", "group_id", "alert_id")
if exp != AlertURLGeneratorFn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
}
err = InitAlertURLGeneratorFn(nil, "foo?{{invalid}}", true)
if err == nil {
t.Fatalf("expected template validation error got nil")
}
err = InitAlertURLGeneratorFn(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != AlertURLGeneratorFn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
}
}
func TestSendAlerts(t *testing.T) {
oldAlertURLGeneratorFn := AlertURLGeneratorFn
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
AlertURLGeneratorFn = func(alert Alert) string {
return ""
}
mux := http.NewServeMux()
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
t.Fatalf("should not be called")
})
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
var a []struct {
Labels map[string]string `json:"labels"`
}
if err := json.NewDecoder(r.Body).Decode(&a); err != nil {
t.Fatalf("can not unmarshal data into alert %s", err)
}
if len(a) != 2 {
t.Fatalf("expected 2 alert in array got %d", len(a))
}
if len(a[0].Labels) != 4 {
t.Fatalf("expected 4 labels got %d", len(a[0].Labels))
}
if a[0].Labels["env"] != "prod" {
t.Fatalf("expected env label to be prod during relabeling, got %s", a[0].Labels["env"])
}
if a[0].Labels["c"] != "baz" {
t.Fatalf("expected c label to be baz during relabeling, got %s", a[0].Labels["c"])
}
if len(a[1].Labels) != 1 {
t.Fatalf("expected 1 labels got %d", len(a[1].Labels))
}
})
srv := httptest.NewServer(mux)
defer srv.Close()
f, err := os.CreateTemp("", "")
if err != nil {
t.Fatal(err)
}
defer fs.MustRemovePath(f.Name())
rawConfig := `
static_configs:
- targets:
- %s
alert_relabel_configs:
- source_labels: [b]
target_label: "c"
alert_relabel_configs:
- source_labels: [a]
target_label: "b"
- target_label: "env"
replacement: "prod"
`
config := fmt.Sprintf(rawConfig, srv.URL+alertManagerPath)
writeToFile(f.Name(), config)
oldConfigPath := configPath
defer func() { configPath = oldConfigPath }()
*configPath = f.Name()
err = Init(nil, "")
if err != nil {
t.Fatalf("unexpected error when parse notifier config: %s", err)
}
firingAlerts := []Alert{
{
Name: "alert1",
Labels: map[string]string{"a": "baz"},
},
{
Name: "alert2",
Labels: map[string]string{},
},
}
errG := Send(context.Background(), firingAlerts, nil)
for err := range errG {
if err != nil {
t.Errorf("unexpected error when sending alerts: %s", err)
}
}
}

View File

@@ -1,21 +1,15 @@
package notifier package notifier
import ( import "context"
"context"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
// Notifier is a common interface for alert manager provider // Notifier is a common interface for alert manager provider
type Notifier interface { type Notifier interface {
// Send sends the given list of alerts. // Send sends the given list of alerts.
// Returns an error if fails to send the alerts. // Returns an error if fails to send the alerts.
// Must unblock if the given ctx is cancelled. // Must unblock if the given ctx is cancelled.
Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, notifierHeaders map[string]string) error Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) error
// Addr returns address where alerts are sent. // Addr returns address where alerts are sent.
Addr() string Addr() string
// LastError returns error, that occurred during last attempt to send data
LastError() string
// Close is a destructor for the Notifier // Close is a destructor for the Notifier
Close() Close()
} }

View File

@@ -1,10 +1,6 @@
package notifier package notifier
import ( import "context"
"context"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
// blackHoleNotifier is a Notifier stub, used when no notifications need // blackHoleNotifier is a Notifier stub, used when no notifications need
// to be sent. // to be sent.
@@ -14,7 +10,7 @@ type blackHoleNotifier struct {
} }
// Send will send no notifications, but increase the metric. // Send will send no notifications, but increase the metric.
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error { //nolint:revive func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error { //nolint:revive
bh.metrics.alertsSent.Add(len(alerts)) bh.metrics.alertsSent.Add(len(alerts))
return nil return nil
} }
@@ -29,11 +25,6 @@ func (bh *blackHoleNotifier) Close() {
bh.metrics.close() bh.metrics.close()
} }
// LastError return last notifier's error
func (bh *blackHoleNotifier) LastError() string {
return ""
}
// newBlackHoleNotifier creates a new blackHoleNotifier // newBlackHoleNotifier creates a new blackHoleNotifier
func newBlackHoleNotifier() *blackHoleNotifier { func newBlackHoleNotifier() *blackHoleNotifier {
address := "blackhole" address := "blackhole"

View File

@@ -5,7 +5,6 @@ import (
"testing" "testing"
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
metricset "github.com/VictoriaMetrics/metrics" metricset "github.com/VictoriaMetrics/metrics"
) )
@@ -17,7 +16,7 @@ func TestBlackHoleNotifier_Send(t *testing.T) {
Start: time.Now().UTC(), Start: time.Now().UTC(),
End: time.Now().UTC(), End: time.Now().UTC(),
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"}, Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
}}, [][]prompb.Label{{}}, nil); err != nil { }}, nil); err != nil {
t.Fatalf("unexpected error %s", err) t.Fatalf("unexpected error %s", err)
} }
@@ -35,7 +34,7 @@ func TestBlackHoleNotifier_Close(t *testing.T) {
Start: time.Now().UTC(), Start: time.Now().UTC(),
End: time.Now().UTC(), End: time.Now().UTC(),
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"}, Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
}}, [][]prompb.Label{{}}, nil); err != nil { }}, nil); err != nil {
t.Fatalf("unexpected error %s", err) t.Fatalf("unexpected error %s", err)
} }

View File

@@ -1,19 +0,0 @@
consul_sd_configs:
- server: localhost:8500
scheme: http
services:
- alertmanager
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "prod"
- server: localhost:8500
services:
- consul
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "(abc"
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"

View File

@@ -1,13 +0,0 @@
dns_sd_configs:
- names:
- cloudflare.com
type: 'A'
port: 9093
relabel_configs:
- source_labels: [__meta_dns_name]
replacement: '${1}'
target_label: dns_name
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "(abc"

View File

@@ -2,19 +2,12 @@ static_configs:
- targets: - targets:
- localhost:9093 - localhost:9093
- localhost:9095 - localhost:9095
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "static"
consul_sd_configs: consul_sd_configs:
- server: localhost:8500 - server: localhost:8500
scheme: http scheme: http
services: services:
- alertmanager - alertmanager
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "consul"
- server: localhost:8500 - server: localhost:8500
services: services:
- consul - consul
@@ -24,10 +17,6 @@ dns_sd_configs:
- cloudflare.com - cloudflare.com
type: 'A' type: 'A'
port: 9093 port: 9093
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "dns"
relabel_configs: relabel_configs:
- source_labels: [__meta_consul_tags] - source_labels: [__meta_consul_tags]
@@ -36,4 +25,4 @@ relabel_configs:
target_label: __scheme__ target_label: __scheme__
- source_labels: [__meta_dns_name] - source_labels: [__meta_dns_name]
replacement: '${1}' replacement: '${1}'
target_label: dns_name target_label: dns_name

View File

@@ -1,14 +1,22 @@
headers:
- 'CustomHeader: foo'
static_configs: static_configs:
- targets: - targets:
- http://192.168.0.101:9093 - localhost:9093
alert_relabel_configs: - localhost:9095
- target_label: "foo" - https://localhost:9093/test/api/v2/alerts
replacement: "aaa" basic_auth:
username: foo
password: bar
- targets: - targets:
- http://192.168.0.101:9093 - localhost:9096
alert_relabel_configs: - localhost:9097
- target_label: "foo" basic_auth:
replacement: "ccc" username: foo
password: baz
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"

View File

@@ -1,19 +0,0 @@
package notifier
// ApiNotifier represents a Notifier configuration for WEB view
type ApiNotifier struct {
// Kind is a Notifier type
Kind TargetType `json:"kind"`
// Targets is a list of Notifier targets
Targets []*ApiTarget `json:"targets"`
}
// ApiTarget represents a specific Notifier target for WEB view
type ApiTarget struct {
// Address is a URL for sending notifications
Address string `json:"address"`
// Labels is a list of labels to add to each sent notification
Labels map[string]string `json:"labels"`
// LastError contains the error faced while sending to notifier.
LastError string `json:"lastError"`
}

View File

@@ -14,9 +14,9 @@ import (
) )
var ( var (
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect. "+ addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect."+
"Remote read is used to restore alerts state. "+ "Remote read is used to restore alerts state."+
"This configuration makes sense only if vmalert was configured with '-remoteWrite.url' before and has been successfully persisted its state. "+ "This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state. "+
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+ "Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
"See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.") "See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.")
@@ -28,7 +28,6 @@ var (
"Multiple headers must be delimited by '^^': -remoteRead.headers='header1:value1^^header2:value2'") "Multiple headers must be delimited by '^^': -remoteRead.headers='header1:value1^^header2:value2'")
basicAuthUsername = flag.String("remoteRead.basicAuth.username", "", "Optional basic auth username for -remoteRead.url") basicAuthUsername = flag.String("remoteRead.basicAuth.username", "", "Optional basic auth username for -remoteRead.url")
basicAuthUsernameFile = flag.String("remoteRead.basicAuth.usernameFile", "", "Optional path to basic auth username to use for -remoteRead.url")
basicAuthPassword = flag.String("remoteRead.basicAuth.password", "", "Optional basic auth password for -remoteRead.url") basicAuthPassword = flag.String("remoteRead.basicAuth.password", "", "Optional basic auth password for -remoteRead.url")
basicAuthPasswordFile = flag.String("remoteRead.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -remoteRead.url") basicAuthPasswordFile = flag.String("remoteRead.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -remoteRead.url")
@@ -59,7 +58,6 @@ func InitSecretFlags() {
if !*showRemoteReadURL { if !*showRemoteReadURL {
flagutil.RegisterSecretFlag("remoteRead.url") flagutil.RegisterSecretFlag("remoteRead.url")
} }
flagutil.RegisterSecretFlag("remoteRead.headers")
} }
// Init creates a Querier from provided flag values. // Init creates a Querier from provided flag values.
@@ -82,7 +80,7 @@ func Init() (datasource.QuerierBuilder, error) {
return nil, fmt.Errorf("cannot parse JSON for -remoteRead.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err) return nil, fmt.Errorf("cannot parse JSON for -remoteRead.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
} }
authCfg, err := vmalertutil.AuthConfig( authCfg, err := vmalertutil.AuthConfig(
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthUsernameFile, *basicAuthPassword, *basicAuthPasswordFile), vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
vmalertutil.WithBearer(*bearerToken, *bearerTokenFile), vmalertutil.WithBearer(*bearerToken, *bearerTokenFile),
vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams), vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
vmalertutil.WithHeaders(*headers)) vmalertutil.WithHeaders(*headers))

View File

@@ -11,23 +11,16 @@ import (
"path" "path"
"strings" "strings"
"sync" "sync"
"sync/atomic"
"time" "time"
"github.com/cespare/xxhash/v2"
"github.com/golang/snappy" "github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/metrics" "github.com/VictoriaMetrics/metrics"
) )
@@ -60,11 +53,6 @@ type Client struct {
wg sync.WaitGroup wg sync.WaitGroup
doneCh chan struct{} doneCh chan struct{}
// Whether to encode the write request with VictoriaMetrics remote write protocol.
// It is set to true by default, and will be switched to false if the client
// receives specific errors indicating that the remote storage doesn't support VictoriaMetrics remote write protocol.
isVMRemoteWrite atomic.Bool
} }
// Config is config for remote write client. // Config is config for remote write client.
@@ -124,12 +112,9 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
doneCh: make(chan struct{}), doneCh: make(chan struct{}),
input: make(chan prompb.TimeSeries, cfg.MaxQueueSize), input: make(chan prompb.TimeSeries, cfg.MaxQueueSize),
} }
c.isVMRemoteWrite.Store(true)
for i := 0; i < cc; i++ { for i := 0; i < cc; i++ {
c.wg.Go(func() { c.run(ctx)
c.run(ctx, i)
})
} }
return c, nil return c, nil
} }
@@ -171,7 +156,8 @@ func (c *Client) Close() error {
return nil return nil
} }
func (c *Client) run(ctx context.Context, id int) { func (c *Client) run(ctx context.Context) {
ticker := time.NewTicker(c.flushInterval)
wr := &prompb.WriteRequest{} wr := &prompb.WriteRequest{}
shutdown := func() { shutdown := func() {
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout) lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
@@ -187,73 +173,42 @@ func (c *Client) run(ctx context.Context, id int) {
cancel() cancel()
} }
c.wg.Add(1)
// add jitter to spread remote write flushes over the flush interval to avoid congestion at the remote write destination go func() {
h := xxhash.Sum64(bytesutil.ToUnsafeBytes(fmt.Sprintf("%d", id))) defer c.wg.Done()
randJitter := uint64(float64(c.flushInterval) * (float64(h) / (1 << 64))) defer ticker.Stop()
timer := time.NewTimer(time.Duration(randJitter)) for {
addJitter:
for {
select {
case <-c.doneCh:
timer.Stop()
shutdown()
return
case <-ctx.Done():
timer.Stop()
shutdown()
return
case <-timer.C:
break addJitter
}
}
ticker := time.NewTicker(c.flushInterval)
defer ticker.Stop()
for {
select {
case <-c.doneCh:
shutdown()
return
case <-ctx.Done():
shutdown()
return
case <-ticker.C:
c.flush(ctx, wr)
// drain the potential stale tick to avoid small or empty flushes after a slow flush.
select { select {
case <-c.doneCh:
shutdown()
return
case <-ctx.Done():
shutdown()
return
case <-ticker.C: case <-ticker.C:
default:
}
case ts, ok := <-c.input:
if !ok {
continue
}
wr.Timeseries = append(wr.Timeseries, ts)
if len(wr.Timeseries) >= c.maxBatchSize {
c.flush(ctx, wr) c.flush(ctx, wr)
case ts, ok := <-c.input:
if !ok {
continue
}
wr.Timeseries = append(wr.Timeseries, ts)
if len(wr.Timeseries) >= c.maxBatchSize {
c.flush(ctx, wr)
}
} }
} }
} }()
} }
var ( var (
rwErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`) rwErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
rwTotal = metrics.NewCounter(`vmalert_remotewrite_total`) rwTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
// sentRows and sentBytes are historical counters that can now be replaced by flushedRows and flushedBytes histograms. They may be deprecated in the future after the new histograms have been adopted for some time. sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`) sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`) droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
flushedRows = metrics.NewHistogram(`vmalert_remotewrite_sent_rows`) sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
flushedBytes = metrics.NewHistogram(`vmalert_remotewrite_sent_bytes`) bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
remoteWriteQueueSize = metrics.NewHistogram(`vmalert_remotewrite_queue_size`)
_ = metrics.NewGauge(`vmalert_remotewrite_queue_capacity`, func() float64 {
return float64(*maxQueueSize)
})
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 { _ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
return float64(*concurrency) return float64(*concurrency)
@@ -267,45 +222,34 @@ func GetDroppedRows() int { return int(droppedRows.Get()) }
// it to remote-write endpoint. Flush performs limited amount of retries // it to remote-write endpoint. Flush performs limited amount of retries
// if request fails. // if request fails.
func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) { func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
remoteWriteQueueSize.Update(float64(len(c.input)))
if len(wr.Timeseries) < 1 { if len(wr.Timeseries) < 1 {
return return
} }
defer wr.Reset() defer wr.Reset()
defer bufferFlushDuration.UpdateDuration(time.Now()) defer bufferFlushDuration.UpdateDuration(time.Now())
bb := writeRequestBufPool.Get() data := wr.MarshalProtobuf(nil)
bb.B = wr.MarshalProtobuf(bb.B[:0]) b := snappy.Encode(nil, data)
zb := compressBufPool.Get()
defer compressBufPool.Put(zb)
if c.isVMRemoteWrite.Load() {
zb.B = zstd.CompressLevel(zb.B[:0], bb.B, 0)
} else {
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
}
writeRequestBufPool.Put(bb)
maxRetryInterval := *retryMaxTime retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
bt := timeutil.NewBackoffTimer(*retryMinInterval, maxRetryInterval) if retryInterval > maxRetryInterval {
retryInterval = maxRetryInterval
}
timeStart := time.Now() timeStart := time.Now()
defer func() { defer func() {
sendDuration.Add(time.Since(timeStart).Seconds()) sendDuration.Add(time.Since(timeStart).Seconds())
}() }()
attempts := 0
L: L:
for { for attempts := 0; ; attempts++ {
err := c.send(ctx, zb.B) err := c.send(ctx, b)
if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) { if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) {
// Something in the middle between client and destination might be closing // Something in the middle between client and destination might be closing
// the connection. So we do a one more attempt in hope request will succeed. // the connection. So we do a one more attempt in hope request will succeed.
err = c.send(ctx, zb.B) err = c.send(ctx, b)
} }
if err == nil { if err == nil {
sentRows.Add(len(wr.Timeseries)) sentRows.Add(len(wr.Timeseries))
sentBytes.Add(len(zb.B)) sentBytes.Add(len(b))
flushedRows.Update(float64(len(wr.Timeseries)))
flushedBytes.Update(float64(len(zb.B)))
return return
} }
@@ -331,13 +275,13 @@ L:
break break
} }
if bt.CurrentDelay() > timeLeftForRetries { if retryInterval > timeLeftForRetries {
bt.SetDelay(timeLeftForRetries) retryInterval = timeLeftForRetries
} }
// sleeping to prevent remote db hammering // sleeping to prevent remote db hammering
bt.Wait(ctx.Done()) time.Sleep(retryInterval)
retryInterval *= 2
attempts++
} }
rwErrors.Inc() rwErrors.Inc()
@@ -357,16 +301,12 @@ func (c *Client) send(ctx context.Context, data []byte) error {
return fmt.Errorf("failed to create new HTTP request: %w", err) return fmt.Errorf("failed to create new HTTP request: %w", err)
} }
req.Header.Set("User-Agent", "vmalert") // RFC standard compliant headers
req.Header.Set("Content-Encoding", "snappy")
req.Header.Set("Content-Type", "application/x-protobuf") req.Header.Set("Content-Type", "application/x-protobuf")
if encoding.IsZstd(data) { // Prometheus compliant headers
req.Header.Set("Content-Encoding", "zstd") req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
req.Header.Set("X-VictoriaMetrics-Remote-Write-Version", "1")
} else {
req.Header.Set("Content-Encoding", "snappy")
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
}
if c.authCfg != nil { if c.authCfg != nil {
err = c.authCfg.SetHeaders(req, true) err = c.authCfg.SetHeaders(req, true)
@@ -395,29 +335,6 @@ func (c *Client) send(ctx context.Context, data []byte) error {
// respond with HTTP 2xx status code when write is successful. // respond with HTTP 2xx status code when write is successful.
return nil return nil
case 4: case 4:
// - Remote Write v1 specification implicitly expects a `400 Bad Request` when the encoding is not supported.
// - Remote Write v2 specification explicitly specifies a `415 Unsupported Media Type` for unsupported encodings.
// - Real-world implementations of v1 use both 400 and 415 status codes.
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
if resp.StatusCode == http.StatusUnsupportedMediaType || resp.StatusCode == http.StatusBadRequest {
if encoding.IsZstd(data) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", req.URL.Redacted())
zstdBlockLen := len(data)
data, err = repackBlockFromZstdToSnappy(data)
if err == nil {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", req.URL.Redacted())
c.isVMRemoteWrite.Store(false)
return c.send(ctx, data)
}
logger.Warnf("failed to repack zstd block (%d bytes) to snappy: %s; The block will be rejected. "+
"Possible cause: ungraceful shutdown leading to persisted queue corruption.",
zstdBlockLen, err)
}
}
if resp.StatusCode != http.StatusTooManyRequests { if resp.StatusCode != http.StatusTooManyRequests {
// MUST NOT retry write requests on HTTP 4xx responses other than 429 // MUST NOT retry write requests on HTTP 4xx responses other than 429
return &nonRetriableError{ return &nonRetriableError{
@@ -438,19 +355,3 @@ type nonRetriableError struct {
func (e *nonRetriableError) Error() string { func (e *nonRetriableError) Error() string {
return e.err.Error() return e.err.Error()
} }
var (
writeRequestBufPool bytesutil.ByteBufferPool
compressBufPool bytesutil.ByteBufferPool
)
// repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block.
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
plainBlock := make([]byte, 0, len(zstdBlock)*2)
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock)
if err != nil {
return nil, err
}
return snappy.Encode(nil, plainBlock), nil
}

View File

@@ -12,7 +12,8 @@ import (
"testing" "testing"
"time" "time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd" "github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
) )
@@ -43,7 +44,7 @@ func TestClient_Push(t *testing.T) {
r := rand.New(rand.NewSource(1)) r := rand.New(rand.NewSource(1))
const rowsN = int(1e4) const rowsN = int(1e4)
for range rowsN { for i := 0; i < rowsN; i++ {
s := prompb.TimeSeries{ s := prompb.TimeSeries{
Samples: []prompb.Sample{{ Samples: []prompb.Sample{{
Value: r.Float64(), Value: r.Float64(),
@@ -101,11 +102,8 @@ func TestClient_run_maxBatchSizeDuringShutdown(t *testing.T) {
} }
// push time series to the client. // push time series to the client.
for range pushCnt { for i := 0; i < pushCnt; i++ {
if err = rwClient.Push(prompb.TimeSeries{ if err = rwClient.Push(prompb.TimeSeries{}); err != nil {
Labels: []prompb.Label{{Name: "__name__", Value: "m"}},
Samples: []prompb.Sample{{Value: 1, Timestamp: 1000}},
}); err != nil {
t.Fatalf("cannot time series to the client: %s", err) t.Fatalf("cannot time series to the client: %s", err)
} }
} }
@@ -158,8 +156,8 @@ func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
} }
h := r.Header.Get("Content-Encoding") h := r.Header.Get("Content-Encoding")
if h != "zstd" { if h != "snappy" {
rw.err(w, fmt.Errorf("header read error: Content-Encoding is not zstd (%q)", h)) rw.err(w, fmt.Errorf("header read error: Content-Encoding is not snappy (%q)", h))
} }
h = r.Header.Get("Content-Type") h = r.Header.Get("Content-Type")
@@ -167,9 +165,9 @@ func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
rw.err(w, fmt.Errorf("header read error: Content-Type is not x-protobuf (%q)", h)) rw.err(w, fmt.Errorf("header read error: Content-Type is not x-protobuf (%q)", h))
} }
h = r.Header.Get("X-VictoriaMetrics-Remote-Write-Version") h = r.Header.Get("X-Prometheus-Remote-Write-Version")
if h != "1" { if h != "0.1.0" {
rw.err(w, fmt.Errorf("header read error: X-VictoriaMetrics-Remote-Write-Version is not 1 (%q)", h)) rw.err(w, fmt.Errorf("header read error: X-Prometheus-Remote-Write-Version is not 0.1.0 (%q)", h))
} }
data, err := io.ReadAll(r.Body) data, err := io.ReadAll(r.Body)
@@ -179,7 +177,7 @@ func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
} }
defer func() { _ = r.Body.Close() }() defer func() { _ = r.Body.Close() }()
b, err := zstd.Decompress(nil, data) b, err := snappy.Decode(nil, data)
if err != nil { if err != nil {
rw.err(w, fmt.Errorf("decode err: %w", err)) rw.err(w, fmt.Errorf("decode err: %w", err))
return return

View File

@@ -9,7 +9,8 @@ import (
"strings" "strings"
"sync" "sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd" "github.com/golang/snappy"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
@@ -63,17 +64,19 @@ func (c *DebugClient) Close() error {
} }
func (c *DebugClient) send(data []byte) error { func (c *DebugClient) send(data []byte) error {
b := zstd.CompressLevel(nil, data, 0) b := snappy.Encode(nil, data)
r := bytes.NewReader(b) r := bytes.NewReader(b)
req, err := http.NewRequest(http.MethodPost, c.addr, r) req, err := http.NewRequest(http.MethodPost, c.addr, r)
if err != nil { if err != nil {
return fmt.Errorf("failed to create new HTTP request: %w", err) return fmt.Errorf("failed to create new HTTP request: %w", err)
} }
req.Header.Set("Content-Encoding", "zstd") // RFC standard compliant headers
req.Header.Set("Content-Encoding", "snappy")
req.Header.Set("Content-Type", "application/x-protobuf") req.Header.Set("Content-Type", "application/x-protobuf")
req.Header.Set("X-VictoriaMetrics-Remote-Write-Version", "1") // Prometheus compliant headers
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
if !*disablePathAppend { if !*disablePathAppend {
req.URL.Path = path.Join(req.URL.Path, "/api/v1/write") req.URL.Path = path.Join(req.URL.Path, "/api/v1/write")

View File

@@ -22,7 +22,7 @@ func TestDebugClient_Push(t *testing.T) {
const rowsN = 100 const rowsN = 100
var sent int var sent int
for i := range rowsN { for i := 0; i < rowsN; i++ {
s := prompb.TimeSeries{ s := prompb.TimeSeries{
Samples: []prompb.Sample{{ Samples: []prompb.Sample{{
Value: float64(i), Value: float64(i),

View File

@@ -13,8 +13,8 @@ import (
) )
var ( var (
addr = flag.String("remoteWrite.url", "", "Optional URL to persist alerts state and recording rules results in form of timeseries. "+ addr = flag.String("remoteWrite.url", "", "Optional URL to VictoriaMetrics or vminsert where to persist alerts state "+
"It must support either VictoriaMetrics remote write protocol or Prometheus remote_write protocol. "+ "and recording rules results in form of timeseries. "+
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+ "Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
"For example, if -remoteWrite.url=http://127.0.0.1:8428 is specified, "+ "For example, if -remoteWrite.url=http://127.0.0.1:8428 is specified, "+
"then the alerts state will be written to http://127.0.0.1:8428/api/v1/write . See also -remoteWrite.disablePathAppend, '-remoteWrite.showURL'.") "then the alerts state will be written to http://127.0.0.1:8428/api/v1/write . See also -remoteWrite.disablePathAppend, '-remoteWrite.showURL'.")
@@ -26,7 +26,6 @@ var (
"Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'") "Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'")
basicAuthUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username for -remoteWrite.url") basicAuthUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username for -remoteWrite.url")
basicAuthUsernameFile = flag.String("remoteWrite.basicAuth.usernameFile", "", "Optional path to basic auth username to use for -remoteWrite.url")
basicAuthPassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password for -remoteWrite.url") basicAuthPassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password for -remoteWrite.url")
basicAuthPasswordFile = flag.String("remoteWrite.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -remoteWrite.url") basicAuthPasswordFile = flag.String("remoteWrite.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -remoteWrite.url")
@@ -62,7 +61,6 @@ func InitSecretFlags() {
if !*showRemoteWriteURL { if !*showRemoteWriteURL {
flagutil.RegisterSecretFlag("remoteWrite.url") flagutil.RegisterSecretFlag("remoteWrite.url")
} }
flagutil.RegisterSecretFlag("remoteWrite.headers")
} }
// Init creates Client object from given flags. // Init creates Client object from given flags.
@@ -85,7 +83,7 @@ func Init(ctx context.Context) (*Client, error) {
return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err) return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
} }
authCfg, err := vmalertutil.AuthConfig( authCfg, err := vmalertutil.AuthConfig(
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthUsernameFile, *basicAuthPassword, *basicAuthPasswordFile), vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
vmalertutil.WithBearer(*bearerToken, *bearerTokenFile), vmalertutil.WithBearer(*bearerToken, *bearerTokenFile),
vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams), vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
vmalertutil.WithHeaders(*headers)) vmalertutil.WithHeaders(*headers))

View File

@@ -2,7 +2,6 @@ package rule
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"hash/fnv" "hash/fnv"
"math" "math"
@@ -188,54 +187,6 @@ func (ar *AlertingRule) ID() uint64 {
return ar.RuleID return ar.RuleID
} }
// ToAPI returns ApiRule representation of ar
func (ar *AlertingRule) ToAPI() ApiRule {
state := ar.state
lastState := state.getLast()
r := ApiRule{
Type: TypeAlerting,
DatasourceType: ar.Type.String(),
Name: ar.Name,
Query: ar.Expr,
Duration: ar.For.Seconds(),
KeepFiringFor: ar.KeepFiringFor.Seconds(),
Labels: ar.Labels,
Annotations: ar.Annotations,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
State: "inactive",
Alerts: ar.AlertsToAPI(),
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: state.size(),
Updates: state.getAll(),
Debug: ar.Debug,
// encode as strings to avoid rounding in JSON
ID: fmt.Sprintf("%d", ar.ID()),
GroupID: fmt.Sprintf("%d", ar.GroupID),
GroupName: ar.GroupName,
File: ar.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
// satisfy apiRule.State logic
if len(r.Alerts) > 0 {
r.State = notifier.StatePending.String()
stateFiring := notifier.StateFiring.String()
for _, a := range r.Alerts {
if a.State == stateFiring {
r.State = stateFiring
break
}
}
}
return r
}
// GetAlerts returns active alerts of rule // GetAlerts returns active alerts of rule
func (ar *AlertingRule) GetAlerts() []*notifier.Alert { func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
ar.alertsMu.RLock() ar.alertsMu.RLock()
@@ -247,6 +198,16 @@ func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
return alerts return alerts
} }
// GetAlert returns alert if id exists
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
ar.alertsMu.RLock()
defer ar.alertsMu.RUnlock()
if ar.alerts == nil {
return nil
}
return ar.alerts[id]
}
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) { func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) {
if !ar.Debug { if !ar.Debug {
return return
@@ -312,13 +273,6 @@ type labelSet struct {
// On k conflicts in origin set, the original value is preferred and copied // On k conflicts in origin set, the original value is preferred and copied
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value. // to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
func (ls *labelSet) add(k, v string) { func (ls *labelSet) add(k, v string) {
// do not add label with empty value to the result, as it has no meaning:
// if the label already exists in the original query result, remove it to preserve compatibility with relabeling, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10766.
// otherwise, ignore the label, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984.
if v == "" {
delete(ls.processed, k)
return
}
ls.processed[k] = v ls.processed[k] = v
ov, ok := ls.origin[k] ov, ok := ls.origin[k]
if !ok { if !ok {
@@ -348,13 +302,14 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
ls.processed[l.Name] = l.Value ls.processed[l.Name] = l.Value
} }
// labels only support limited templating variables,
// including `labels`, `value` and `expr`, to avoid breaking alert states or causing cardinality issue with results
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{ extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
Labels: ls.origin, Labels: ls.origin,
Value: m.Values[0], Value: m.Values[0],
Expr: ar.Expr, Expr: ar.Expr,
}) })
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %w", err)
}
for k, v := range extraLabels { for k, v := range extraLabels {
ls.add(k, v) ls.add(k, v)
} }
@@ -365,7 +320,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
if !*disableAlertGroupLabel && ar.GroupName != "" { if !*disableAlertGroupLabel && ar.GroupName != "" {
ls.add(alertGroupNameLabel, ar.GroupName) ls.add(alertGroupNameLabel, ar.GroupName)
} }
return ls, err return ls, nil
} }
// execRange executes alerting rule on the given time range similarly to exec. // execRange executes alerting rule on the given time range similarly to exec.
@@ -386,12 +341,16 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
return []datasource.Metric{{Timestamps: []int64{0}, Values: []float64{math.NaN()}}}, nil return []datasource.Metric{{Timestamps: []int64{0}, Values: []float64{math.NaN()}}}, nil
} }
for _, s := range res.Data { for _, s := range res.Data {
ls, err := ar.expandLabelTemplates(s, qFn) ls, err := ar.expandLabelTemplates(s)
if err != nil { if err != nil {
return nil, err return nil, err
} }
alertID := hash(ls.processed) alertID := hash(ls.processed)
a := ar.newAlert(s, time.Time{}, ls.processed, nil) // initial alert as, err := ar.expandAnnotationTemplates(s, qFn, time.Time{}, ls)
if err != nil {
return nil, err
}
a := ar.newAlert(s, time.Time{}, ls.processed, as) // initial alert
prevT := time.Time{} prevT := time.Time{}
for i := range s.Values { for i := range s.Values {
@@ -407,6 +366,8 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
// reset to Pending if there are gaps > EvalInterval between DPs // reset to Pending if there are gaps > EvalInterval between DPs
a.State = notifier.StatePending a.State = notifier.StatePending
a.ActiveAt = at a.ActiveAt = at
// re-template the annotations as active timestamp is changed
a.Annotations, _ = ar.expandAnnotationTemplates(s, qFn, at, ls)
a.Start = time.Time{} a.Start = time.Time{}
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring { } else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
a.State = notifier.StateFiring a.State = notifier.StateFiring
@@ -452,7 +413,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
defer func() { defer func() {
ar.state.add(curState) ar.state.add(curState)
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) { if curState.Err != nil {
ar.metrics.errors.Inc() ar.metrics.errors.Inc()
} }
}() }()
@@ -461,8 +422,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err) return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
} }
isPartial := isPartialResponse(res) ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartialResponse(res))
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartial)
qFn := func(query string) ([]datasource.Metric, error) { qFn := func(query string) ([]datasource.Metric, error) {
res, _, err := ar.q.Query(ctx, query, ts) res, _, err := ar.q.Query(ctx, query, ts)
return res.Data, err return res.Data, err
@@ -474,11 +434,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
expandedLabels := make([]*labelSet, len(res.Data)) expandedLabels := make([]*labelSet, len(res.Data))
expandedAnnotations := make([]map[string]string, len(res.Data)) expandedAnnotations := make([]map[string]string, len(res.Data))
for i, m := range res.Data { for i, m := range res.Data {
ls, err := ar.expandLabelTemplates(m, qFn) ls, err := ar.expandLabelTemplates(m)
if err != nil { if err != nil {
// only set error in current state, but do not break alert processing
curState.Err = err curState.Err = err
logger.Errorf("got templating error in rule %s: %q", ar.Name, err) return nil, curState.Err
} }
at := ts at := ts
alertID := hash(ls.processed) alertID := hash(ls.processed)
@@ -488,11 +447,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
at = a.ActiveAt at = a.ActiveAt
} }
} }
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls, isPartial) as, err := ar.expandAnnotationTemplates(m, qFn, at, ls)
if err != nil { if err != nil {
// only set error in current state, but do not break alert processing
curState.Err = err curState.Err = err
logger.Errorf("got templating error in rule %s: %q", ar.Name, err) return nil, curState.Err
} }
expandedLabels[i] = ls expandedLabels[i] = ls
expandedAnnotations[i] = as expandedAnnotations[i] = as
@@ -598,29 +556,31 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
return append(tss, ar.toTimeSeries(ts.Unix())...), nil return append(tss, ar.toTimeSeries(ts.Unix())...), nil
} }
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) { func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric) (*labelSet, error) {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported in rule label")
}
ls, err := ar.toLabels(m, qFn) ls, err := ar.toLabels(m, qFn)
if err != nil { if err != nil {
return ls, fmt.Errorf("failed to expand label templates: %s", err) return nil, fmt.Errorf("failed to expand label templates: %s", err)
} }
return ls, nil return ls, nil
} }
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet, isPartial bool) (map[string]string, error) { func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet) (map[string]string, error) {
tplData := notifier.AlertTplData{ tplData := notifier.AlertTplData{
Value: m.Values[0], Value: m.Values[0],
Type: ar.Type.String(), Type: ar.Type.String(),
Labels: ls.origin, Labels: ls.origin,
Expr: ar.Expr, Expr: ar.Expr,
AlertID: hash(ls.processed), AlertID: hash(ls.processed),
GroupID: ar.GroupID, GroupID: ar.GroupID,
ActiveAt: activeAt, ActiveAt: activeAt,
For: ar.For, For: ar.For,
IsPartial: isPartial,
} }
as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData) as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData)
if err != nil { if err != nil {
return as, fmt.Errorf("failed to expand annotation templates: %s", err) return nil, fmt.Errorf("failed to expand annotation templates: %s", err)
} }
return as, nil return as, nil
} }
@@ -820,9 +780,7 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])", expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds())) alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
// query ALERTS_FOR_STATE at `ts-1s` instead `ts` to avoid retrieving data written in the current run, res, _, err := q.Query(ctx, expr, ts)
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10335
res, _, err := q.Query(ctx, expr, ts.Add(-1*time.Second))
if err != nil { if err != nil {
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err) return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
} }

View File

@@ -1,106 +0,0 @@
//go:build synctest
package rule
import (
"context"
"strings"
"testing"
"testing/synctest"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
)
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
// wrap into synctest because of time manipulations
synctest.Test(t, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
ar := &AlertingRule{
Name: "TestActiveAtPreservation",
Labels: map[string]string{
"test_query_in_label": `{{ "static_value" }}`,
},
Annotations: map[string]string{
"description": "Alert active since {{ $activeAt }}",
},
alerts: make(map[uint64]*notifier.Alert),
q: fq,
state: &ruleState{
entries: make([]StateEntry, 10),
},
}
// Mock query result - return empty result to make suppress_for_mass_alert = false
// (no need to add anything to fq for empty result)
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
// First execution - creates new alert
ts1 := time.Now()
_, err := ar.exec(context.TODO(), ts1, 0)
if err != nil {
t.Fatalf("unexpected error on first exec: %s", err)
}
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
firstAlert := ar.GetAlerts()[0]
// Verify first execution: activeAt should be ts1 and annotation should reflect it
if !firstAlert.ActiveAt.Equal(ts1) {
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
}
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
}
// Second execution - should preserve activeAt in annotation
// Ensure different timestamp with different seconds
// sleep is non-blocking thanks to synctest
time.Sleep(2 * time.Second)
ts2 := time.Now()
_, err = ar.exec(context.TODO(), ts2, 0)
if err != nil {
t.Fatalf("unexpected error on second exec: %s", err)
}
// Get the alert again (should be the same alert)
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
secondAlert := ar.GetAlerts()[0]
// Critical test: activeAt should still be ts1, not ts2
if !secondAlert.ActiveAt.Equal(ts1) {
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
}
// Critical test: annotation should still contain ts1 time, not ts2
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
}
// Additional verification: annotation should NOT contain ts2 time
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
}
// Verify query template in labels still works (this would fail if query templates were broken)
if firstAlert.Labels["test_query_in_label"] != "static_value" {
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
}
})
}

View File

@@ -663,7 +663,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-pending", Name: "for-pending",
Type: config.NewPrometheusType().String(), Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-pending"}, Labels: map[string]string{"alertname": "for-pending"},
Annotations: map[string]string{}, Annotations: map[string]string{"activeAt": "5000"},
State: notifier.StatePending, State: notifier.StatePending,
ActiveAt: time.Unix(5, 0), ActiveAt: time.Unix(5, 0),
Value: 1, Value: 1,
@@ -683,7 +683,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-firing", Name: "for-firing",
Type: config.NewPrometheusType().String(), Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-firing"}, Labels: map[string]string{"alertname": "for-firing"},
Annotations: map[string]string{}, Annotations: map[string]string{"activeAt": "1000"},
State: notifier.StateFiring, State: notifier.StateFiring,
ActiveAt: time.Unix(1, 0), ActiveAt: time.Unix(1, 0),
Start: time.Unix(5, 0), Start: time.Unix(5, 0),
@@ -704,7 +704,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-hold-pending", Name: "for-hold-pending",
Type: config.NewPrometheusType().String(), Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-hold-pending"}, Labels: map[string]string{"alertname": "for-hold-pending"},
Annotations: map[string]string{}, Annotations: map[string]string{"activeAt": "5000"},
State: notifier.StatePending, State: notifier.StatePending,
ActiveAt: time.Unix(5, 0), ActiveAt: time.Unix(5, 0),
Value: 1, Value: 1,
@@ -826,9 +826,12 @@ func TestGroup_Restore(t *testing.T) {
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil) fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
fg.Init() fg.Init()
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
wg.Go(func() { wg.Add(1)
fg.Start(context.Background(), nil, fqr) go func() {
}) nts := func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} }
fg.Start(context.Background(), nts, nil, fqr)
wg.Done()
}()
fg.Close() fg.Close()
wg.Wait() wg.Wait()
@@ -1119,7 +1122,7 @@ func TestAlertingRuleLimit_Success(t *testing.T) {
} }
func TestAlertingRule_Template(t *testing.T) { func TestAlertingRule_Template(t *testing.T) {
f := func(rule *AlertingRule, metrics []datasource.Metric, isResponsePartial bool, alertsExpected map[uint64]*notifier.Alert) { f := func(rule *AlertingRule, metrics []datasource.Metric, alertsExpected map[uint64]*notifier.Alert) {
t.Helper() t.Helper()
fakeGroup := Group{ fakeGroup := Group{
@@ -1132,7 +1135,6 @@ func TestAlertingRule_Template(t *testing.T) {
entries: make([]StateEntry, 10), entries: make([]StateEntry, 10),
} }
fq.Add(metrics...) fq.Add(metrics...)
fq.SetPartialResponse(isResponsePartial)
if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil { if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
@@ -1163,7 +1165,7 @@ func TestAlertingRule_Template(t *testing.T) {
}, []datasource.Metric{ }, []datasource.Metric{
metricWithValueAndLabels(t, 1, "instance", "foo"), metricWithValueAndLabels(t, 1, "instance", "foo"),
metricWithValueAndLabels(t, 1, "instance", "bar"), metricWithValueAndLabels(t, 1, "instance", "bar"),
}, false, map[uint64]*notifier.Alert{ }, map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): { hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
Annotations: map[string]string{ Annotations: map[string]string{
"summary": `common: Too high connection number for "foo"`, "summary": `common: Too high connection number for "foo"`,
@@ -1192,14 +1194,14 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "{{ $labels.instance }}", "instance": "{{ $labels.instance }}",
}, },
Annotations: map[string]string{ Annotations: map[string]string{
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}".{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`, "summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}"`,
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`, "description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
}, },
alerts: make(map[uint64]*notifier.Alert), alerts: make(map[uint64]*notifier.Alert),
}, []datasource.Metric{ }, []datasource.Metric{
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"), metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"), metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
}, false, map[uint64]*notifier.Alert{ }, map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): { hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
Labels: map[string]string{ Labels: map[string]string{
alertNameLabel: "override label", alertNameLabel: "override label",
@@ -1207,7 +1209,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "foo", "instance": "foo",
}, },
Annotations: map[string]string{ Annotations: map[string]string{
"summary": `first: Too high connection number for "foo".`, "summary": `first: Too high connection number for "foo"`,
"description": `override: It is 2 connections for "foo"`, "description": `override: It is 2 connections for "foo"`,
}, },
}, },
@@ -1218,7 +1220,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "bar", "instance": "bar",
}, },
Annotations: map[string]string{ Annotations: map[string]string{
"summary": `second: Too high connection number for "bar".`, "summary": `second: Too high connection number for "bar"`,
"description": `override: It is 10 connections for "bar"`, "description": `override: It is 10 connections for "bar"`,
}, },
}, },
@@ -1231,7 +1233,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "{{ $labels.instance }}", "instance": "{{ $labels.instance }}",
}, },
Annotations: map[string]string{ Annotations: map[string]string{
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}.{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`, "summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
}, },
alerts: make(map[uint64]*notifier.Alert), alerts: make(map[uint64]*notifier.Alert),
}, []datasource.Metric{ }, []datasource.Metric{
@@ -1239,7 +1241,7 @@ func TestAlertingRule_Template(t *testing.T) {
alertNameLabel, "originAlertname", alertNameLabel, "originAlertname",
alertGroupNameLabel, "originGroupname", alertGroupNameLabel, "originGroupname",
"instance", "foo"), "instance", "foo"),
}, true, map[uint64]*notifier.Alert{ }, map[uint64]*notifier.Alert{
hash(map[string]string{ hash(map[string]string{
alertNameLabel: "OriginLabels", alertNameLabel: "OriginLabels",
"exported_alertname": "originAlertname", "exported_alertname": "originAlertname",
@@ -1255,7 +1257,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "foo", "instance": "foo",
}, },
Annotations: map[string]string{ Annotations: map[string]string{
"summary": `Alert "originAlertname(originGroupname)" for instance foo. WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.`, "summary": `Alert "originAlertname(originGroupname)" for instance foo`,
}, },
}, },
}) })
@@ -1363,7 +1365,6 @@ func TestAlertingRule_ToLabels(t *testing.T) {
{Name: "instance", Value: "0.0.0.0:8800"}, {Name: "instance", Value: "0.0.0.0:8800"},
{Name: "group", Value: "vmalert"}, {Name: "group", Value: "vmalert"},
{Name: "alertname", Value: "ConfigurationReloadFailure"}, {Name: "alertname", Value: "ConfigurationReloadFailure"},
{Name: "pod", Value: "vmalert-0"},
}, },
Values: []float64{1}, Values: []float64{1},
Timestamps: []int64{time.Now().UnixNano()}, Timestamps: []int64{time.Now().UnixNano()},
@@ -1371,11 +1372,8 @@ func TestAlertingRule_ToLabels(t *testing.T) {
ar := &AlertingRule{ ar := &AlertingRule{
Labels: map[string]string{ Labels: map[string]string{
"instance": "override", // this should override instance with new value "instance": "override", // this should override instance with new value
"group": "vmalert", // this shouldn't have effect since value in metric is equal "group": "vmalert", // this shouldn't have effect since value in metric is equal
"invalid_label": "{{ .Values.mustRuntimeFail }}",
"empty_label": "", // this should be dropped
"pod": "", // this should remove the pod label from query result
}, },
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0", Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
Name: "AlertingRulesError", Name: "AlertingRulesError",
@@ -1383,12 +1381,10 @@ func TestAlertingRule_ToLabels(t *testing.T) {
} }
expectedOriginLabels := map[string]string{ expectedOriginLabels := map[string]string{
"instance": "0.0.0.0:8800", "instance": "0.0.0.0:8800",
"group": "vmalert", "group": "vmalert",
"alertname": "ConfigurationReloadFailure", "alertname": "ConfigurationReloadFailure",
"alertgroup": "vmalert", "alertgroup": "vmalert",
"pod": "vmalert-0",
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
} }
expectedProcessedLabels := map[string]string{ expectedProcessedLabels := map[string]string{
@@ -1398,12 +1394,11 @@ func TestAlertingRule_ToLabels(t *testing.T) {
"exported_alertname": "ConfigurationReloadFailure", "exported_alertname": "ConfigurationReloadFailure",
"group": "vmalert", "group": "vmalert",
"alertgroup": "vmalert", "alertgroup": "vmalert",
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
} }
ls, err := ar.toLabels(metric, nil) ls, err := ar.toLabels(metric, nil)
if err == nil || !strings.Contains(err.Error(), "error evaluating template") { if err != nil {
t.Fatalf("unexpected error %q", err.Error()) t.Fatalf("unexpected error: %s", err)
} }
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) { if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
@@ -1434,50 +1429,3 @@ func TestAlertingRuleExec_Partial(t *testing.T) {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }
} }
func TestAlertingRule_QueryTemplateInLabels(t *testing.T) {
fq := &datasource.FakeQuerier{}
fakeGroup := Group{
Name: "TestQueryTemplateInLabels",
}
ar := &AlertingRule{
Name: "test_alert",
Labels: map[string]string{
"suppress_for_mass_alert": `{{ if (printf "ALERTS{alertname='SomeAlert', alertstate='firing', device='%s'} == 1" $labels.device | query) }}true{{ else }}false{{ end }}`,
},
Annotations: map[string]string{
"summary": "Test alert with query template in labels",
},
alerts: make(map[uint64]*notifier.Alert),
}
ar.GroupID = fakeGroup.GetID()
ar.q = fq
ar.state = &ruleState{
entries: make([]StateEntry, 10),
}
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "device", "sda1"))
ts := time.Now()
_, err := ar.exec(context.TODO(), ts, 0)
if err != nil {
t.Fatalf("unexpected error with query template in labels: %s", err)
}
// Verify that the alert was created and the query template was executed
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
alert := ar.GetAlerts()[0]
suppressLabel, exists := alert.Labels["suppress_for_mass_alert"]
if !exists {
t.Fatalf("expected 'suppress_for_mass_alert' label to exist")
}
// The query template should have been executed (even if it returns false due to mock data)
if suppressLabel != "true" && suppressLabel != "false" {
t.Fatalf("expected 'suppress_for_mass_alert' label to be 'true' or 'false', got '%s'", suppressLabel)
}
}

View File

@@ -2,13 +2,12 @@ package rule
import ( import (
"context" "context"
"encoding/json"
"errors" "errors"
"flag" "flag"
"fmt" "fmt"
"hash/fnv" "hash/fnv"
"maps"
"net/url" "net/url"
"path"
"sync" "sync"
"time" "time"
@@ -26,14 +25,10 @@ import (
) )
var ( var (
ruleResultsLimit = flag.Int("rule.resultsLimit", 0, "Limits the number of alerts or recording results a single rule can produce. "+
"Can be overridden by the limit option under group if specified. "+
"If exceeded, the rule will be marked with an error and all its results will be discarded. "+
"0 means no limit.")
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+ ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.") "Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier.") resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+ maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
"which by default is 4 times evaluationInterval of the parent group") "which by default is 4 times evaluationInterval of the parent group")
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+ evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+
"Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+ "Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+
@@ -41,11 +36,6 @@ var (
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.") disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+ remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+
"For example, if lookback=1h then range from now() to now()-1h will be scanned.") "For example, if lookback=1h then range from now() to now()-1h will be scanned.")
maxStartDelay = flag.Duration("group.maxStartDelay", 5*time.Minute, "Defines the max delay before starting the group evaluation. Group's start is artificially delayed for random duration on interval"+
" [0..min(--group.maxStartDelay, group.interval)]. This helps smoothing out the load on the configured datasource, so evaluations aren't executed too close to each other.")
ruleStripFilePath = flag.Bool("rule.stripFilePath", false, "Whether to strip rule file paths in logs and all API responses, including /metrics. "+
"For example, file path '/path/to/tenant_id/rules.yml' will be stripped to 'groupHashID/rules.yml'. "+
"This flag may be useful for hiding sensitive information in file paths, such as S3 bucket details.")
) )
// Group is an entity for grouping rules // Group is an entity for grouping rules
@@ -95,7 +85,6 @@ type groupMetrics struct {
iterationTotal *metrics.Counter iterationTotal *metrics.Counter
iterationDuration *metrics.Summary iterationDuration *metrics.Summary
iterationMissed *metrics.Counter iterationMissed *metrics.Counter
iterationReset *metrics.Counter
iterationInterval *metrics.Gauge iterationInterval *metrics.Gauge
} }
@@ -103,7 +92,9 @@ type groupMetrics struct {
// set2 has priority over set1. // set2 has priority over set1.
func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string { func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string {
r := map[string]string{} r := map[string]string{}
maps.Copy(r, set1) for k, v := range set1 {
r[k] = v
}
for k, v := range set2 { for k, v := range set2 {
if prevV, ok := r[k]; ok { if prevV, ok := r[k]; ok {
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q", logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
@@ -121,6 +112,7 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
Name: cfg.Name, Name: cfg.Name,
File: cfg.File, File: cfg.File,
Interval: cfg.Interval.Duration(), Interval: cfg.Interval.Duration(),
Limit: cfg.Limit,
Concurrency: cfg.Concurrency, Concurrency: cfg.Concurrency,
checksum: cfg.Checksum, checksum: cfg.Checksum,
Params: cfg.Params, Params: cfg.Params,
@@ -137,11 +129,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
if g.Interval == 0 { if g.Interval == 0 {
g.Interval = defaultInterval g.Interval = defaultInterval
} }
if cfg.Limit != nil {
g.Limit = *cfg.Limit
} else {
g.Limit = *ruleResultsLimit
}
if g.Concurrency < 1 { if g.Concurrency < 1 {
g.Concurrency = 1 g.Concurrency = 1
} }
@@ -152,12 +139,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
g.EvalDelay = &cfg.EvalDelay.D g.EvalDelay = &cfg.EvalDelay.D
} }
g.id = g.CreateID() g.id = g.CreateID()
// strip file path from group.File after generated group ID when ruleStripFilePath is set,
// so it won't be exposed in logs and api responses
if *ruleStripFilePath {
_, filename := path.Split(g.File)
g.File = fmt.Sprintf("%d/%s", g.id, filename)
}
for _, h := range cfg.Headers { for _, h := range cfg.Headers {
g.Headers[h.Key] = h.Value g.Headers[h.Key] = h.Value
} }
@@ -308,7 +289,7 @@ func (g *Group) InterruptEval() {
} }
} }
// Close stops the group and its rules, unregisters group metrics // Close stops the group and it's rules, unregisters group metrics
func (g *Group) Close() { func (g *Group) Close() {
if g.doneCh == nil { if g.doneCh == nil {
return return
@@ -317,6 +298,10 @@ func (g *Group) Close() {
g.InterruptEval() g.InterruptEval()
<-g.finishedCh <-g.finishedCh
g.closeGroupMetrics()
}
func (g *Group) closeGroupMetrics() {
metrics.UnregisterSet(g.metrics.set, true) metrics.UnregisterSet(g.metrics.set, true)
} }
@@ -331,7 +316,6 @@ func (g *Group) Init() {
g.metrics.iterationTotal = g.metrics.set.NewCounter(fmt.Sprintf(`vmalert_iteration_total{%s}`, labels)) g.metrics.iterationTotal = g.metrics.set.NewCounter(fmt.Sprintf(`vmalert_iteration_total{%s}`, labels))
g.metrics.iterationDuration = g.metrics.set.NewSummary(fmt.Sprintf(`vmalert_iteration_duration_seconds{%s}`, labels)) g.metrics.iterationDuration = g.metrics.set.NewSummary(fmt.Sprintf(`vmalert_iteration_duration_seconds{%s}`, labels))
g.metrics.iterationMissed = g.metrics.set.NewCounter(fmt.Sprintf(`vmalert_iteration_missed_total{%s}`, labels)) g.metrics.iterationMissed = g.metrics.set.NewCounter(fmt.Sprintf(`vmalert_iteration_missed_total{%s}`, labels))
g.metrics.iterationReset = g.metrics.set.NewCounter(fmt.Sprintf(`vmalert_iteration_reset_total{%s}`, labels))
g.metrics.iterationInterval = g.metrics.set.NewGauge(fmt.Sprintf(`vmalert_iteration_interval_seconds{%s}`, labels), func() float64 { g.metrics.iterationInterval = g.metrics.set.NewGauge(fmt.Sprintf(`vmalert_iteration_interval_seconds{%s}`, labels), func() float64 {
i := g.Interval.Seconds() i := g.Interval.Seconds()
return i return i
@@ -343,13 +327,13 @@ func (g *Group) Init() {
} }
// Start starts group's evaluation // Start starts group's evaluation
func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasource.QuerierBuilder) { func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
defer func() { close(g.finishedCh) }() defer func() { close(g.finishedCh) }()
evalTS := time.Now() evalTS := time.Now()
// sleep random duration to spread group rules evaluation // sleep random duration to spread group rules evaluation
// over maxStartDelay to reduce the load on datasource. // over time in order to reduce load on datasource.
if !SkipRandSleepOnGroupStart { if !SkipRandSleepOnGroupStart {
sleepBeforeStart := g.delayBeforeStart(evalTS, *maxStartDelay) sleepBeforeStart := delayBeforeStart(evalTS, g.GetID(), g.Interval, g.EvalOffset)
g.infof("will start in %v", sleepBeforeStart) g.infof("will start in %v", sleepBeforeStart)
sleepTimer := time.NewTimer(sleepBeforeStart) sleepTimer := time.NewTimer(sleepBeforeStart)
@@ -381,22 +365,21 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
e := &executor{ e := &executor{
Rw: rw, Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders, notifierHeaders: g.NotifierHeaders,
} }
g.infof("started") g.infof("started")
eval := func(ctx context.Context, ts time.Time) time.Time { eval := func(ctx context.Context, ts time.Time) {
g.metrics.iterationTotal.Inc() g.metrics.iterationTotal.Inc()
start := time.Now() start := time.Now()
if len(g.Rules) < 1 { if len(g.Rules) < 1 {
g.metrics.iterationDuration.UpdateDuration(start) g.metrics.iterationDuration.UpdateDuration(start)
g.mu.Lock()
g.LastEvaluation = start g.LastEvaluation = start
g.mu.Unlock() return
return ts
} }
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration) resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
@@ -409,10 +392,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
} }
} }
g.metrics.iterationDuration.UpdateDuration(start) g.metrics.iterationDuration.UpdateDuration(start)
g.mu.Lock()
g.LastEvaluation = start g.LastEvaluation = start
g.mu.Unlock()
return ts
} }
evalCtx, cancel := context.WithCancel(ctx) evalCtx, cancel := context.WithCancel(ctx)
@@ -421,18 +401,15 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
g.mu.Unlock() g.mu.Unlock()
defer g.evalCancel() defer g.evalCancel()
// start the interval ticker before the first evaluation, eval(evalCtx, evalTS)
// so that the evaluation timestamps of groups with the `eval_offset` option are also aligned,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10773
t := time.NewTicker(g.Interval) t := time.NewTicker(g.Interval)
defer t.Stop() defer t.Stop()
realEvalTS := eval(evalCtx, evalTS)
// restore the rules state after the first evaluation // restore the rules state after the first evaluation
// so only active alerts can be restored. // so only active alerts can be restored.
if rr != nil { if rr != nil {
err := g.restore(ctx, rr, realEvalTS, *remoteReadLookBack) err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
if err != nil { if err != nil {
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err) logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
} }
@@ -476,16 +453,14 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
if missed < 0 { if missed < 0 {
// missed can become < 0 due to irregular delays during evaluation // missed can become < 0 due to irregular delays during evaluation
// which can result in time.Since(evalTS) < g.Interval; // which can result in time.Since(evalTS) < g.Interval;
// or the system wall clock was changed backward, // or the system wall clock was changed backward
// Reset the evalTS to the current time. missed = 0
evalTS = time.Now() evalTS = time.Now()
g.metrics.iterationReset.Inc()
} else {
evalTS = evalTS.Add((missed + 1) * g.Interval)
} }
if missed > 0 { if missed > 0 {
g.metrics.iterationMissed.Inc() g.metrics.iterationMissed.Inc()
} }
evalTS = evalTS.Add((missed + 1) * g.Interval)
eval(evalCtx, evalTS) eval(evalCtx, evalTS)
} }
@@ -497,35 +472,32 @@ func (g *Group) UpdateWith(newGroup *Group) {
g.updateCh <- newGroup g.updateCh <- newGroup
} }
// delayBeforeStart returns duration for delaying the evaluation start // DeepCopy returns a deep copy of group
// based on given ts and Group settings. The delay can't exceed maxDelay. func (g *Group) DeepCopy() *Group {
// maxDelay is ignored if g.EvalOffset != nil. g.mu.RLock()
// data, _ := json.Marshal(g)
// Delaying is important to smooth out the load on the datasource when all groups start at the same time. g.mu.RUnlock()
// delayBeforeStart calculates delay based on Group ID, so all groups will start at different moments of time. newG := Group{}
func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Duration { _ = json.Unmarshal(data, &newG)
if g.EvalOffset != nil { newG.Rules = g.Rules
offset := *g.EvalOffset newG.id = g.id
// adjust the offset for negative evalOffset, the rule is: return &newG
// `eval_offset: -x` is equivalent to `eval_offset: y` for `interval: x+y`. }
// For example, `eval_offset: -6m` is equivalent to `eval_offset: 4m` for `interval: 10m`.
if offset < 0 { // if offset is specified, delayBeforeStart returns a duration to help aligning timestamp with offset;
offset += g.Interval // otherwise, it returns a random duration between [0..interval] based on group key.
} func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *time.Duration) time.Duration {
// if offset is specified, ignore the maxDelay and return a duration aligned with offset if offset != nil {
currentOffsetPoint := ts.Truncate(g.Interval).Add(offset) currentOffsetPoint := ts.Truncate(interval).Add(*offset)
if currentOffsetPoint.Before(ts) { if currentOffsetPoint.Before(ts) {
// wait until the next offset point // wait until the next offset point
return currentOffsetPoint.Add(g.Interval).Sub(ts) return currentOffsetPoint.Add(interval).Sub(ts)
} }
return currentOffsetPoint.Sub(ts) return currentOffsetPoint.Sub(ts)
} }
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
// artificially limit interval, so groups with big intervals could start sooner.
interval := min(g.Interval, maxDelay)
var randSleep time.Duration var randSleep time.Duration
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64))) randSleep = time.Duration(float64(interval) * (float64(key) / (1 << 64)))
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds()) sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
if randSleep < sleepOffset { if randSleep < sleepOffset {
randSleep += interval randSleep += interval
@@ -587,13 +559,15 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
if !disableProgressBar { if !disableProgressBar {
bar = pb.StartNew(iterations * len(g.Rules)) bar = pb.StartNew(iterations * len(g.Rules))
} }
for i := range g.Rules { for _, r := range g.Rules {
rule := g.Rules[i]
sem <- struct{}{} sem <- struct{}{}
wg.Go(func() { wg.Add(1)
res <- replayRuleRange(rule, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency) go func(r Rule, ri rangeIterator) {
// pass ri as a copy, so it can be modified within the replayRuleRange
res <- replayRuleRange(r, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
<-sem <-sem
}) wg.Done()
}(r, ri)
} }
wg.Wait() wg.Wait()
@@ -623,10 +597,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1) res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1)
for ri.next() { for ri.next() {
sem <- struct{}{} sem <- struct{}{}
start := ri.s wg.Add(1)
end := ri.e
wg.Go(func() { go func(s, e time.Time) {
n, err := replayRule(r, start, end, rw, replayRuleRetryAttempts) n, err := replayRule(r, s, e, rw, replayRuleRetryAttempts)
if err != nil { if err != nil {
logger.Fatalf("rule %q: %s", r, err) logger.Fatalf("rule %q: %s", r, err)
} }
@@ -635,7 +609,8 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
} }
res <- n res <- n
<-sem <-sem
}) wg.Done()
}(ri.s, ri.e)
} }
wg.Wait() wg.Wait()
close(res) close(res)
@@ -649,9 +624,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
} }
// ExecOnce evaluates all the rules under group for once with given timestamp. // ExecOnce evaluates all the rules under group for once with given timestamp.
func (g *Group) ExecOnce(ctx context.Context, rw remotewrite.RWClient, evalTS time.Time) chan error { func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
e := &executor{ e := &executor{
Rw: rw, Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders, notifierHeaders: g.NotifierHeaders,
} }
if len(g.Rules) < 1 { if len(g.Rules) < 1 {
@@ -726,6 +702,7 @@ func (g *Group) getEvalDelay() time.Duration {
// executor contains group's notify and rw configs // executor contains group's notify and rw configs
type executor struct { type executor struct {
Notifiers func() []notifier.Notifier
notifierHeaders map[string]string notifierHeaders map[string]string
Rw remotewrite.RWClient Rw remotewrite.RWClient
@@ -746,13 +723,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
sem := make(chan struct{}, concurrency) sem := make(chan struct{}, concurrency)
go func() { go func() {
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
for i := range rules { for _, r := range rules {
rule := rules[i]
sem <- struct{}{} sem <- struct{}{}
wg.Go(func() { wg.Add(1)
res <- e.exec(ctx, rule, ts, resolveDuration, limit) go func(r Rule) {
res <- e.exec(ctx, r, ts, resolveDuration, limit)
<-sem <-sem
}) wg.Done()
}(r)
} }
wg.Wait() wg.Wait()
close(res) close(res)
@@ -781,7 +759,6 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return fmt.Errorf("rule %q: failed to execute: %w", r, err) return fmt.Errorf("rule %q: failed to execute: %w", r, err)
} }
var errG vmalertutil.ErrGroup
if e.Rw != nil { if e.Rw != nil {
pushToRW := func(tss []prompb.TimeSeries) error { pushToRW := func(tss []prompb.TimeSeries) error {
var lastErr error var lastErr error
@@ -793,26 +770,31 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return lastErr return lastErr
} }
if err := pushToRW(tss); err != nil { if err := pushToRW(tss); err != nil {
errG.Add(err) return err
} }
} }
ar, ok := r.(*AlertingRule) ar, ok := r.(*AlertingRule)
if !ok { if !ok {
return errG.Err() return nil
} }
alerts := ar.alertsToSend(resolveDuration, *resendDelay) alerts := ar.alertsToSend(resolveDuration, *resendDelay)
if len(alerts) < 1 { if len(alerts) < 1 {
return errG.Err() return nil
} }
notifierErr := notifier.Send(ctx, alerts, e.notifierHeaders) wg := sync.WaitGroup{}
for err := range notifierErr { errGr := new(vmalertutil.ErrGroup)
if err != nil { for _, nt := range e.Notifiers() {
errG.Add(fmt.Errorf("rule %q: notifier failure: %w", r, err)) wg.Add(1)
} go func(nt notifier.Notifier) {
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
}
wg.Done()
}(nt)
} }
wg.Wait()
return errG.Err() return errGr.Err()
} }

View File

@@ -262,7 +262,7 @@ func TestUpdateDuringRandSleep(t *testing.T) {
updateCh: make(chan *Group), updateCh: make(chan *Group),
} }
g.Init() g.Init()
go g.Start(context.Background(), nil, nil) go g.Start(context.Background(), nil, nil, nil)
rule1 := AlertingRule{ rule1 := AlertingRule{
Name: "jobDown", Name: "jobDown",
@@ -346,8 +346,7 @@ func TestGroupStart(t *testing.T) {
} }
fs := &datasource.FakeQuerier{} fs := &datasource.FakeQuerier{}
fn, cleanup := notifier.InitFakeNotifier() fn := &notifier.FakeNotifier{}
defer cleanup()
const evalInterval = time.Millisecond const evalInterval = time.Millisecond
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"}) g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
@@ -396,7 +395,7 @@ func TestGroupStart(t *testing.T) {
fs.Add(m2) fs.Add(m2)
g.Init() g.Init()
go func() { go func() {
g.Start(context.Background(), nil, fs) g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
close(finished) close(finished)
}() }()
@@ -405,8 +404,7 @@ func TestGroupStart(t *testing.T) {
var cur uint64 var cur uint64
prev := g.metrics.iterationTotal.Get() prev := g.metrics.iterationTotal.Get()
i := 0 for i := 0; ; i++ {
for {
if i > 40 { if i > 40 {
t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i) t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i)
} }
@@ -415,7 +413,6 @@ func TestGroupStart(t *testing.T) {
return return
} }
time.Sleep(interval) time.Sleep(interval)
i++
} }
} }
@@ -475,10 +472,15 @@ func TestFaultyNotifier(t *testing.T) {
r := newTestAlertingRule("instant", 0) r := newTestAlertingRule("instant", 0)
r.q = fq r.q = fq
fn, cleanup := notifier.InitFakeNotifier() fn := &notifier.FakeNotifier{}
defer cleanup() e := &executor{
Notifiers: func() []notifier.Notifier {
e := &executor{} return []notifier.Notifier{
&notifier.FaultyNotifier{},
fn,
}
},
}
delay := 5 * time.Second delay := 5 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), delay) ctx, cancel := context.WithTimeout(context.Background(), delay)
defer cancel() defer cancel()
@@ -551,7 +553,7 @@ func TestCloseWithEvalInterruption(t *testing.T) {
g := NewGroup(groups[0], fq, evalInterval, nil) g := NewGroup(groups[0], fq, evalInterval, nil)
g.Init() g.Init()
go g.Start(context.Background(), nil, nil) go g.Start(context.Background(), nil, nil, nil)
time.Sleep(evalInterval * 20) time.Sleep(evalInterval * 20)
@@ -569,10 +571,9 @@ func TestCloseWithEvalInterruption(t *testing.T) {
func TestGroupStartDelay(t *testing.T) { func TestGroupStartDelay(t *testing.T) {
g := &Group{} g := &Group{}
g.id = uint64(math.MaxUint64 / 10)
// interval of 5min and key generate a static delay of 30s // interval of 5min and key generate a static delay of 30s
g.Interval = time.Minute * 5 g.Interval = time.Minute * 5
maxDelay := time.Minute * 5 key := uint64(math.MaxUint64 / 10)
f := func(atS, expS string) { f := func(atS, expS string) {
t.Helper() t.Helper()
@@ -584,7 +585,7 @@ func TestGroupStartDelay(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
delay := g.delayBeforeStart(at, maxDelay) delay := delayBeforeStart(at, key, g.Interval, g.EvalOffset)
gotStart := at.Add(delay) gotStart := at.Add(delay)
if expTS != gotStart { if expTS != gotStart {
t.Fatalf("expected to get %v; got %v instead", expTS, gotStart) t.Fatalf("expected to get %v; got %v instead", expTS, gotStart)
@@ -605,24 +606,6 @@ func TestGroupStartDelay(t *testing.T) {
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00") f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00") f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00") f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
// test group with negative offset -2min, which is equivalent to 3min offset for 5min interval
offset = -2 * time.Minute
g.EvalOffset = &offset
f("2023-01-01T00:00:15.000+00:00", "2023-01-01T00:03:00.000+00:00")
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
maxDelay = time.Minute * 1
g.EvalOffset = nil
// test group with maxDelay, and offset disabled
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:06.000+00:00")
f("2023-01-01T00:00:01.000+00:00", "2023-01-01T00:00:06.000+00:00")
f("2023-01-01T00:00:06.100+00:00", "2023-01-01T00:01:06.000+00:00")
f("2023-01-01T00:00:11.000+00:00", "2023-01-01T00:01:06.000+00:00")
} }
func TestGetPrometheusReqTimestamp(t *testing.T) { func TestGetPrometheusReqTimestamp(t *testing.T) {
@@ -742,64 +725,3 @@ func parseTime(t *testing.T, s string) time.Time {
} }
return tt return tt
} }
func TestRuleStripFilePath(t *testing.T) {
configG := config.Group{
Name: "group",
File: "/var/local/test/rules.yaml",
Type: config.NewRawType("prometheus"),
Concurrency: 1,
Rules: []config.Rule{
{
ID: 0,
Alert: "alert",
},
{
ID: 1,
Record: "record",
},
}}
qb := &datasource.FakeQuerier{}
g := NewGroup(configG, qb, 1*time.Minute, nil)
gID := g.id
if g.File != "/var/local/test/rules.yaml" {
t.Fatalf("expected file path to be unchanged; got %q instead", g.File)
}
for _, r := range g.Rules {
if ar, ok := r.(*AlertingRule); ok {
if ar.File != "/var/local/test/rules.yaml" {
t.Fatalf("expected rule file path to be unchanged; got %q instead", ar.File)
}
}
if rr, ok := r.(*RecordingRule); ok {
if rr.File != "/var/local/test/rules.yaml" {
t.Fatalf("expected rule file path to be unchanged; got %q instead", rr.File)
}
}
}
oldRuleStripFilePath := *ruleStripFilePath
*ruleStripFilePath = true
defer func() {
*ruleStripFilePath = oldRuleStripFilePath
}()
g = NewGroup(configG, qb, 1*time.Minute, nil)
if g.File != fmt.Sprintf("%d/rules.yaml", gID) {
t.Fatalf("expected file path to be stripped to %q; got %q instead", fmt.Sprintf("%d/rules.yaml", gID), g.File)
}
for _, r := range g.Rules {
if ar, ok := r.(*AlertingRule); ok {
if ar.File != fmt.Sprintf("%d/rules.yaml", gID) {
t.Fatalf("expected rule file path to be unchanged; got %q instead", ar.File)
}
}
if rr, ok := r.(*RecordingRule); ok {
if rr.File != fmt.Sprintf("%d/rules.yaml", gID) {
t.Fatalf("expected rule file path to be unchanged; got %q instead", rr.File)
}
}
}
}

View File

@@ -2,7 +2,6 @@ package rule
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"strings" "strings"
"time" "time"
@@ -82,37 +81,6 @@ func (rr *RecordingRule) ID() uint64 {
return rr.RuleID return rr.RuleID
} }
// ToAPI returns ApiRule representation of rr
func (rr *RecordingRule) ToAPI() ApiRule {
state := rr.state
lastState := state.getLast()
r := ApiRule{
Type: TypeRecording,
DatasourceType: rr.Type.String(),
Name: rr.Name,
Query: rr.Expr,
Labels: rr.Labels,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: state.size(),
Updates: state.getAll(),
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", rr.ID()),
GroupID: fmt.Sprintf("%d", rr.GroupID),
GroupName: rr.GroupName,
File: rr.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
return r
}
// NewRecordingRule creates a new RecordingRule // NewRecordingRule creates a new RecordingRule
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule { func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
debug := group.Debug debug := group.Debug
@@ -198,7 +166,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
defer func() { defer func() {
rr.state.add(curState) rr.state.add(curState)
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) { if curState.Err != nil {
rr.metrics.errors.Inc() rr.metrics.errors.Inc()
} }
}() }()
@@ -237,8 +205,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
Labels: stringToLabels(k), Labels: stringToLabels(k),
Samples: []prompb.Sample{ Samples: []prompb.Sample{
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6}, {Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
}, }})
})
} }
rr.lastEvaluation = curEvaluation rr.lastEvaluation = curEvaluation
return tss, nil return tss, nil
@@ -293,13 +260,6 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompb.TimeSeries {
} }
// add extra labels configured by user // add extra labels configured by user
for k := range rr.Labels { for k := range rr.Labels {
// do not add label with empty value to the result, as it has no meaning:
// if the label already exists in the original query result, remove it to preserve compatibility with relabeling, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10766.
// otherwise, ignore the label, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984.
if rr.Labels[k] == "" {
m.DelLabel(k)
continue
}
existingLabel := promrelabel.GetLabelByName(m.Labels, k) existingLabel := promrelabel.GetLabelByName(m.Labels, k)
if existingLabel != nil { // there is a conflict between extra and existing label if existingLabel != nil { // there is a conflict between extra and existing label
if existingLabel.Value == rr.Labels[k] { if existingLabel.Value == rr.Labels[k] {

View File

@@ -163,13 +163,11 @@ func TestRecordingRule_Exec(t *testing.T) {
f(&RecordingRule{ f(&RecordingRule{
Name: "job:foo", Name: "job:foo",
Labels: map[string]string{ Labels: map[string]string{
"source": "test", "source": "test",
"empty_label": "", // this should be dropped
"pod": "", // this should remove the pod label from query result
}, },
}, [][]datasource.Metric{{ }, [][]datasource.Metric{{
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo", "pod", "vmalert-0"), metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar", "source", "origin", "pod", "vmalert-1"), metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar", "source", "origin"),
metricWithValueAndLabels(t, 1, "__name__", "baz", "job", "baz", "source", "test"), metricWithValueAndLabels(t, 1, "__name__", "baz", "job", "baz", "source", "test"),
}}, [][]prompb.TimeSeries{{ }}, [][]prompb.TimeSeries{{
newTimeSeries([]float64{2}, []int64{ts.UnixNano()}, []prompb.Label{ newTimeSeries([]float64{2}, []int64{ts.UnixNano()}, []prompb.Label{

View File

@@ -21,8 +21,6 @@ type Rule interface {
// ID returns unique ID that may be used for // ID returns unique ID that may be used for
// identifying this Rule among others. // identifying this Rule among others.
ID() uint64 ID() uint64
// ToAPI returns ApiRule representation of Rule
ToAPI() ApiRule
// exec executes the rule with given context at the given timestamp and limit. // exec executes the rule with given context at the given timestamp and limit.
// returns an err if number of resulting time series exceeds the limit. // returns an err if number of resulting time series exceeds the limit.
exec(ctx context.Context, ts time.Time, limit int) ([]prompb.TimeSeries, error) exec(ctx context.Context, ts time.Time, limit int) ([]prompb.TimeSeries, error)
@@ -70,6 +68,39 @@ type StateEntry struct {
Curl string `json:"curl"` Curl string `json:"curl"`
} }
// GetLastEntry returns latest stateEntry of rule
func GetLastEntry(r Rule) StateEntry {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.getLast()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.getLast()
}
return StateEntry{}
}
// GetRuleStateSize returns size of rule stateEntry
func GetRuleStateSize(r Rule) int {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.size()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.size()
}
return 0
}
// GetAllRuleState returns rule entire stateEntries
func GetAllRuleState(r Rule) []StateEntry {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.getAll()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.getAll()
}
return []StateEntry{}
}
func (s *ruleState) size() int { func (s *ruleState) size() int {
s.RLock() s.RLock()
defer s.RUnlock() defer s.RUnlock()
@@ -121,7 +152,7 @@ func (s *ruleState) add(e StateEntry) {
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) { func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
var err error var err error
var tss []prompb.TimeSeries var tss []prompb.TimeSeries
for i := range replayRuleRetryAttempts { for i := 0; i < replayRuleRetryAttempts; i++ {
tss, err = r.execRange(context.Background(), start, end) tss, err = r.execRange(context.Background(), start, end)
if err == nil { if err == nil {
break break

View File

@@ -40,7 +40,7 @@ func TestRule_state(t *testing.T) {
} }
var last time.Time var last time.Time
for range stateEntriesN * 2 { for i := 0; i < stateEntriesN*2; i++ {
last = time.Now() last = time.Now()
r.state.add(StateEntry{At: last}) r.state.add(StateEntry{At: last})
} }
@@ -65,15 +65,17 @@ func TestRule_stateConcurrent(_ *testing.T) {
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}} r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
const workers = 50 const workers = 50
const iterations = 100 const iterations = 100
var wg sync.WaitGroup wg := sync.WaitGroup{}
for range workers { wg.Add(workers)
wg.Go(func() { for i := 0; i < workers; i++ {
for range iterations { go func() {
defer wg.Done()
for i := 0; i < iterations; i++ {
r.state.add(StateEntry{At: time.Now()}) r.state.add(StateEntry{At: time.Now()})
r.state.getAll() r.state.getAll()
r.state.getLast() r.state.getLast()
} }
}) }()
} }
wg.Wait() wg.Wait()
} }

View File

@@ -19,13 +19,13 @@ func CompareRules(t *testing.T, a, b Rule) error {
case *AlertingRule: case *AlertingRule:
br, ok := b.(*AlertingRule) br, ok := b.(*AlertingRule)
if !ok { if !ok {
return fmt.Errorf("rule %d supposed to be of type AlertingRule", b.ID()) return fmt.Errorf("rule %q supposed to be of type AlertingRule", b.ID())
} }
return compareAlertingRules(t, v, br) return compareAlertingRules(t, v, br)
case *RecordingRule: case *RecordingRule:
br, ok := b.(*RecordingRule) br, ok := b.(*RecordingRule)
if !ok { if !ok {
return fmt.Errorf("rule %d supposed to be of type RecordingRule", b.ID()) return fmt.Errorf("rule %q supposed to be of type RecordingRule", b.ID())
} }
return compareRecordingRules(t, v, br) return compareRecordingRules(t, v, br)
default: default:

View File

@@ -34,12 +34,11 @@ body {
padding-top: 4.5rem; padding-top: 4.5rem;
} }
.vm-group { .group-items {
cursor: pointer; cursor: pointer;
padding: 5px; padding: 5px;
margin-top: 5px; margin-top: 5px;
position: relative; position: relative;
display: none;
} }
.btn svg, .dropdown-item svg { .btn svg, .dropdown-item svg {
@@ -56,22 +55,14 @@ body {
height: 38px; height: 38px;
} }
.vm-item:not(.vm-found) { .group-items:not(:has(.sub-item:not(.d-none))) {
display: none; display: none !important;
} }
.vm-group:has(.vm-item:is(.vm-found)), .vm-group:is(.vm-found) { .group-items:hover {
display: flex;
}
.vm-group:hover {
background-color: #f8f9fa!important; background-color: #f8f9fa!important;
} }
.vm-group:is(.vm-found) .vm-item {
display: table-row;
}
.table { .table {
table-layout: fixed; table-layout: fixed;
} }
@@ -120,9 +111,3 @@ textarea.curl-area {
.w-60 { .w-60 {
width: 60%; width: 60%;
} }
.annotations {
white-space: pre-wrap;
color: gray;
word-wrap: break-word;
}

View File

@@ -11,7 +11,7 @@
<path d="M224.163 175.27a1.9 1.9 0 0 0 2.8 0l6-5.9a2.1 2.1 0 0 0 .2-2.7 1.9 1.9 0 0 0-3-.2l-2.6 2.6v-5.2c0-1.54-1.667-2.502-3-1.732-.619.357-1 1.017-1 1.732v5.2l-2.6-2.6a1.9 1.9 0 0 0-3 .2 2.1 2.1 0 0 0 .2 2.7zm-16.459-23.297h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1m36 4h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1m-16.59-23.517a1.9 1.9 0 0 0-2.8 0l-6 5.9a2.1 2.1 0 0 0-.2 2.7 1.9 1.9 0 0 0 3 .2l2.6-2.6v5.2c0 1.54 1.667 2.502 3 1.732.619-.357 1-1.017 1-1.732v-5.2l2.6 2.6a1.9 1.9 0 0 0 3-.2 2.1 2.1 0 0 0-.2-2.7z"/> <path d="M224.163 175.27a1.9 1.9 0 0 0 2.8 0l6-5.9a2.1 2.1 0 0 0 .2-2.7 1.9 1.9 0 0 0-3-.2l-2.6 2.6v-5.2c0-1.54-1.667-2.502-3-1.732-.619.357-1 1.017-1 1.732v5.2l-2.6-2.6a1.9 1.9 0 0 0-3 .2 2.1 2.1 0 0 0 .2 2.7zm-16.459-23.297h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1m36 4h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1m-16.59-23.517a1.9 1.9 0 0 0-2.8 0l-6 5.9a2.1 2.1 0 0 0-.2 2.7 1.9 1.9 0 0 0 3 .2l2.6-2.6v5.2c0 1.54 1.667 2.502 3 1.732.619-.357 1-1.017 1-1.732v-5.2l2.6 2.6a1.9 1.9 0 0 0 3-.2 2.1 2.1 0 0 0-.2-2.7z"/>
</symbol> </symbol>
<symbol id="state" viewBox="-10 -10 320 310"> <symbol id="filter" viewBox="-10 -10 320 310">
<path d="M288.953 0h-277c-5.522 0-10 4.478-10 10v49.531c0 5.522 4.478 10 10 10h12.372l91.378 107.397v113.978a10 10 0 0 0 15.547 8.32l49.5-33a10 10 0 0 0 4.453-8.32v-80.978l91.378-107.397h12.372c5.522 0 10-4.478 10-10V10c0-5.522-4.477-10-10-10M167.587 166.77a10 10 0 0 0-2.384 6.48v79.305l-29.5 19.666V173.25a10 10 0 0 0-2.384-6.48L50.585 69.531h199.736zM278.953 49.531h-257V20h257z"/> <path d="M288.953 0h-277c-5.522 0-10 4.478-10 10v49.531c0 5.522 4.478 10 10 10h12.372l91.378 107.397v113.978a10 10 0 0 0 15.547 8.32l49.5-33a10 10 0 0 0 4.453-8.32v-80.978l91.378-107.397h12.372c5.522 0 10-4.478 10-10V10c0-5.522-4.477-10-10-10M167.587 166.77a10 10 0 0 0-2.384 6.48v79.305l-29.5 19.666V173.25a10 10 0 0 0-2.384-6.48L50.585 69.531h199.736zM278.953 49.531h-257V20h257z"/>
</symbol> </symbol>

Before

Width:  |  Height:  |  Size: 4.7 KiB

After

Width:  |  Height:  |  Size: 4.7 KiB

View File

@@ -8,9 +8,9 @@ function actionAll(isCollapse) {
}); });
} }
function groupForState(key) { function groupFilter(key) {
if (key) { if (key) {
location.href = `?state=${key}`; location.href = `?filter=${key}`;
} else { } else {
window.location = window.location.pathname; window.location = window.location.pathname;
} }
@@ -65,34 +65,32 @@ function getParamURL(key) {
return url.searchParams.get(key) return url.searchParams.get(key)
} }
function matchText(search, item) {
const text = item.innerText.toLowerCase();
return text.indexOf(search) >= 0;
}
function filterRules(searchPhrase) { function filterRules(searchPhrase) {
document.querySelectorAll('.vm-group').forEach((group) => { document.querySelectorAll('.sub-items').forEach((rules) => {
if (!searchPhrase) { let found = false;
group.classList.add('vm-found'); rules.querySelectorAll('.sub-item').forEach((rule) => {
return; if (searchPhrase) {
} const ruleName = rule.innerText.toLowerCase();
for (const item of group.querySelectorAll('.vm-group-search')) { const matches = []
if (matchText(searchPhrase, item)) { const hasValue = ruleName.indexOf(searchPhrase) >= 0;
group.classList.add('vm-found'); rule.querySelectorAll('.label').forEach((label) => {
return; const text = label.innerText.toLowerCase();
if (text.indexOf(searchPhrase) >= 0) {
matches.push(text);
}
});
if (!matches.length && !hasValue) {
rule.classList.add('d-none');
return;
}
} }
} rule.classList.remove('d-none');
group.classList.remove('vm-found'); found = true;
for (const item of group.querySelectorAll('.vm-item')) { });
if (matchText(searchPhrase, item)) { if (found && searchPhrase || !searchPhrase) {
item.classList.add('vm-found'); rules.classList.remove('d-none');
continue; } else {
} rules.classList.add('d-none');
if (Array.from(item.querySelectorAll('.label')).find(l => matchText(searchPhrase, l))) {
item.classList.add('vm-found');
continue;
}
item.classList.remove('vm-found');
} }
}); });
} }

View File

@@ -402,20 +402,6 @@ func templateFuncs() textTpl.FuncMap {
return t, nil return t, nil
}, },
// formatTime formats the given Unix timestamp with the provided layout.
// For example: {{ now | formatTime "2006-01-02T15:04:05Z07:00" }}
"formatTime": func(layout string, i any) (string, error) {
v, err := toFloat64(i)
if err != nil {
return "", fmt.Errorf("formatTime: %w", err)
}
if math.IsNaN(v) || math.IsInf(v, 0) {
return "", fmt.Errorf("formatTime: cannot convert %v to time", v)
}
t := timeFromUnixTimestamp(v).Time().UTC()
return t.Format(layout), nil
},
/* URLs */ /* URLs */
// externalURL returns value of `external.url` flag // externalURL returns value of `external.url` flag
@@ -499,12 +485,6 @@ func templateFuncs() textTpl.FuncMap {
/* Helpers */ /* Helpers */
// now returns the Unix timestamp in seconds at the time of the template evaluation.
// For example: {{ (now | toTime).Sub $activeAt }} will return the duration the alert has been active.
"now": func() float64 {
return float64(time.Now().Unix())
},
// Converts a list of objects to a map with keys arg0, arg1 etc. // Converts a list of objects to a map with keys arg0, arg1 etc.
// This is intended to allow multiple arguments to be passed to templates. // This is intended to allow multiple arguments to be passed to templates.
"args": func(args ...any) map[string]any { "args": func(args ...any) map[string]any {

View File

@@ -6,7 +6,6 @@ import (
"strings" "strings"
"testing" "testing"
textTpl "text/template" textTpl "text/template"
"time"
) )
func TestTemplateFuncs_StringConversion(t *testing.T) { func TestTemplateFuncs_StringConversion(t *testing.T) {
@@ -104,26 +103,6 @@ func TestTemplateFuncs_Formatting(t *testing.T) {
f("humanizeTimestamp", 1679055557, "2023-03-17 12:19:17 +0000 UTC") f("humanizeTimestamp", 1679055557, "2023-03-17 12:19:17 +0000 UTC")
} }
func TestTemplateFuncs_FormatTime(t *testing.T) {
funcs := templateFuncs()
formatTime := funcs["formatTime"].(func(layout string, i any) (string, error))
f := func(layout string, input any, expected string) {
t.Helper()
result, err := formatTime(layout, input)
if err != nil {
t.Fatalf("unexpected error for formatTime(%q, %v): %s", layout, input, err)
}
if result != expected {
t.Fatalf("unexpected result for formatTime(%q, %v); got\n%s\nwant\n%s", layout, input, result, expected)
}
}
f(time.RFC3339, float64(1679055557), "2023-03-17T12:19:17Z")
f("2006-01-02T15:04:05", int64(1679055557), "2023-03-17T12:19:17")
f(time.RFC822, int(1679055557), "17 Mar 23 12:19 UTC")
}
func mkTemplate(current, replacement any) textTemplate { func mkTemplate(current, replacement any) textTemplate {
tmpl := textTemplate{} tmpl := textTemplate{}
if current != nil { if current != nil {

View File

@@ -20,12 +20,11 @@ func AuthConfig(filterOptions ...AuthConfigOptions) (*promauth.Config, error) {
} }
// WithBasicAuth returns AuthConfigOptions and initialized promauth.BasicAuthConfig based on given params // WithBasicAuth returns AuthConfigOptions and initialized promauth.BasicAuthConfig based on given params
func WithBasicAuth(username, usernameFile, password, passwordFile string) AuthConfigOptions { func WithBasicAuth(username, password, passwordFile string) AuthConfigOptions {
return func(config *promauth.HTTPClientConfig) { return func(config *promauth.HTTPClientConfig) {
if username != "" || usernameFile != "" || password != "" || passwordFile != "" { if username != "" || password != "" || passwordFile != "" {
config.BasicAuth = &promauth.BasicAuthConfig{ config.BasicAuth = &promauth.BasicAuthConfig{
Username: username, Username: username,
UsernameFile: usernameFile,
Password: promauth.NewSecret(password), Password: promauth.NewSecret(password),
PasswordFile: passwordFile, PasswordFile: passwordFile,
} }

View File

@@ -45,7 +45,7 @@ func (eg *ErrGroup) Error() string {
return "" return ""
} }
var b strings.Builder var b strings.Builder
fmt.Fprintf(&b, "errors(%d): \n", len(eg.errs)) fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
for i, err := range eg.errs { for i, err := range eg.errs {
b.WriteString(err.Error()) b.WriteString(err.Error())
if i != len(eg.errs)-1 { if i != len(eg.errs)-1 {

View File

@@ -30,8 +30,8 @@ func TestErrGroup(t *testing.T) {
} }
f(nil, "") f(nil, "")
f([]error{errors.New("timeout")}, "errors(1): \ntimeout") f([]error{errors.New("timeout")}, "errors(1): timeout")
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): \ntimeout\ndeadline") f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): timeout\ndeadline")
} }
// TestErrGroupConcurrent supposed to test concurrent // TestErrGroupConcurrent supposed to test concurrent
@@ -42,7 +42,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
const writersN = 4 const writersN = 4
payload := make(chan error, writersN) payload := make(chan error, writersN)
for range writersN { for i := 0; i < writersN; i++ {
go func() { go func() {
for err := range payload { for err := range payload {
eg.Add(err) eg.Add(err)
@@ -51,7 +51,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
} }
const iterations = 500 const iterations = 500
for i := range iterations { for i := 0; i < iterations; i++ {
payload <- fmt.Errorf("error %d", i) payload <- fmt.Errorf("error %d", i)
if i%10 == 0 { if i%10 == 0 {
_ = eg.Err() _ = eg.Err()

Some files were not shown because too many files have changed in this diff Show More