mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-31 15:51:52 +03:00
Compare commits
1 Commits
roaring-bi
...
weakpointe
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e9261be945 |
2
.github/ISSUE_TEMPLATE/question.yml
vendored
2
.github/ISSUE_TEMPLATE/question.yml
vendored
@@ -5,7 +5,7 @@ body:
|
|||||||
- type: textarea
|
- type: textarea
|
||||||
id: describe-the-component
|
id: describe-the-component
|
||||||
attributes:
|
attributes:
|
||||||
label: Is your question related to a specific component?
|
label: Is your question request related to a specific component?
|
||||||
placeholder: |
|
placeholder: |
|
||||||
VictoriaMetrics, vmagent, vmalert, vmui, etc...
|
VictoriaMetrics, vmagent, vmalert, vmui, etc...
|
||||||
validations:
|
validations:
|
||||||
|
|||||||
48
.github/scripts/lint-changelog-tip.sh
vendored
48
.github/scripts/lint-changelog-tip.sh
vendored
@@ -1,48 +0,0 @@
|
|||||||
#!/usr/bin/env sh
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
CHANGELOG_FILE="docs/victoriametrics/changelog/CHANGELOG.md"
|
|
||||||
|
|
||||||
GITHUB_BASE_REF=${GITHUB_BASE_REF:-"master"}
|
|
||||||
GIT_REMOTE=${GIT_REMOTE:-"origin"}
|
|
||||||
|
|
||||||
git diff "${GIT_REMOTE}/${GITHUB_BASE_REF}"...HEAD -- $CHANGELOG_FILE > diff.txt
|
|
||||||
if ! grep -q "^+" diff.txt; then
|
|
||||||
echo "No additions in CHANGELOG.md"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
ADDED_LINES=$(grep "^+\S" diff.txt | sed 's/^+//')
|
|
||||||
|
|
||||||
START_TIP=$(grep -n "^## tip" "$CHANGELOG_FILE" | head -1 | cut -d: -f1)
|
|
||||||
if [ -z "$START_TIP" ]; then
|
|
||||||
echo "ERROR: ${CHANGELOG_FILE} does not contain a ## tip section"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
END_TIP=$(awk "NR>$START_TIP && /^## / {print NR; exit}" "${CHANGELOG_FILE}")
|
|
||||||
if [ -z "$END_TIP" ]; then
|
|
||||||
END_TIP=$(wc -l < "$CHANGELOG_FILE")
|
|
||||||
fi
|
|
||||||
|
|
||||||
BAD=0
|
|
||||||
while IFS= read -r line; do
|
|
||||||
# Grep exact line inside the file and get line numbers
|
|
||||||
MATCHES=$(grep -n -F "$line" "$CHANGELOG_FILE" | cut -d: -f1)
|
|
||||||
for m in $MATCHES; do
|
|
||||||
if [ "$m" -lt "$START_TIP" ] || [ "$m" -gt "$END_TIP" ]; then
|
|
||||||
echo "'$line' on line ${m} is outside ## tip section (lines ${START_TIP}-${END_TIP})"
|
|
||||||
BAD=1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
done << EOF
|
|
||||||
$ADDED_LINES
|
|
||||||
EOF
|
|
||||||
|
|
||||||
if [ "$BAD" -ne 0 ]; then
|
|
||||||
echo "CHANGELOG modifications must be placed inside the ## tip section."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "CHANGELOG modifications are valid."
|
|
||||||
9
.github/workflows/build.yml
vendored
9
.github/workflows/build.yml
vendored
@@ -47,8 +47,6 @@ jobs:
|
|||||||
arch: arm
|
arch: arm
|
||||||
- os: linux
|
- os: linux
|
||||||
arch: ppc64le
|
arch: ppc64le
|
||||||
- os: linux
|
|
||||||
arch: s390x
|
|
||||||
- os: darwin
|
- os: darwin
|
||||||
arch: amd64
|
arch: amd64
|
||||||
- os: darwin
|
- os: darwin
|
||||||
@@ -61,18 +59,17 @@ jobs:
|
|||||||
arch: amd64
|
arch: amd64
|
||||||
steps:
|
steps:
|
||||||
- name: Code checkout
|
- name: Code checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
id: go
|
id: go
|
||||||
uses: actions/setup-go@v6
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
cache-dependency-path: |
|
cache-dependency-path: |
|
||||||
go.sum
|
go.sum
|
||||||
Makefile
|
Makefile
|
||||||
app/**/Makefile
|
app/**/Makefile
|
||||||
go-version-file: 'go.mod'
|
go-version: stable
|
||||||
- run: go version
|
|
||||||
|
|
||||||
- name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }}
|
- name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }}
|
||||||
run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }}
|
run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }}
|
||||||
|
|||||||
19
.github/workflows/changelog-linter.yml
vendored
19
.github/workflows/changelog-linter.yml
vendored
@@ -1,19 +0,0 @@
|
|||||||
name: 'changelog-linter'
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- "docs/victoriametrics/changelog/CHANGELOG.md"
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
tip-lint:
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
steps:
|
|
||||||
- uses: 'actions/checkout@v6'
|
|
||||||
with:
|
|
||||||
# needed for proper diff
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: 'Validate that changelog changes are under ## tip'
|
|
||||||
run: |
|
|
||||||
GITHUB_BASE_REF=${{ github.base_ref }} ./.github/scripts/lint-changelog-tip.sh
|
|
||||||
37
.github/workflows/check-commit-signed.yml
vendored
37
.github/workflows/check-commit-signed.yml
vendored
@@ -1,37 +0,0 @@
|
|||||||
name: check-commit-signed
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
check-commit-signed:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v6
|
|
||||||
with:
|
|
||||||
fetch-depth: 0 # we need full history for commit verification
|
|
||||||
|
|
||||||
- name: Check commit signatures
|
|
||||||
run: |
|
|
||||||
if [ "${{ github.event_name }}" != "pull_request" ]; then
|
|
||||||
echo "Not a PR event, skipping signature check"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
RANGE="${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
|
|
||||||
echo "Checking commits in PR range: $RANGE"
|
|
||||||
|
|
||||||
if [ -z "$(git rev-list $RANGE)" ]; then
|
|
||||||
echo "No new commits in this PR, skipping signature check"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
unsigned=$(git log --pretty="%H %G?" $RANGE | grep -vE " (G|E)$" || true)
|
|
||||||
if [ -n "$unsigned" ]; then
|
|
||||||
echo "Found unsigned commits:"
|
|
||||||
echo "$unsigned"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "All commits in PR are signed (G or E)"
|
|
||||||
8
.github/workflows/check-licenses.yml
vendored
8
.github/workflows/check-licenses.yml
vendored
@@ -19,13 +19,11 @@ jobs:
|
|||||||
|
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
id: go
|
id: go
|
||||||
uses: actions/setup-go@v6
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version-file: 'go.mod'
|
go-version: stable
|
||||||
cache: false
|
cache: false
|
||||||
|
|
||||||
- run: go version
|
|
||||||
|
|
||||||
- name: Cache Go artifacts
|
- name: Cache Go artifacts
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
with:
|
with:
|
||||||
@@ -34,7 +32,7 @@ jobs:
|
|||||||
~/go/pkg/mod
|
~/go/pkg/mod
|
||||||
~/go/bin
|
~/go/bin
|
||||||
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||||
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-
|
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
|
||||||
|
|
||||||
- name: Check License
|
- name: Check License
|
||||||
run: make check-licenses
|
run: make check-licenses
|
||||||
|
|||||||
15
.github/workflows/codeql-analysis-go.yml
vendored
15
.github/workflows/codeql-analysis-go.yml
vendored
@@ -29,15 +29,14 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
id: go
|
id: go
|
||||||
uses: actions/setup-go@v6
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
cache: false
|
cache: false
|
||||||
go-version-file: 'go.mod'
|
go-version: stable
|
||||||
- run: go version
|
|
||||||
|
|
||||||
- name: Cache Go artifacts
|
- name: Cache Go artifacts
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
@@ -47,17 +46,17 @@ jobs:
|
|||||||
~/go/bin
|
~/go/bin
|
||||||
~/go/pkg/mod
|
~/go/pkg/mod
|
||||||
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-
|
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
|
||||||
|
|
||||||
- name: Initialize CodeQL
|
- name: Initialize CodeQL
|
||||||
uses: github/codeql-action/init@v4
|
uses: github/codeql-action/init@v3
|
||||||
with:
|
with:
|
||||||
languages: go
|
languages: go
|
||||||
|
|
||||||
- name: Autobuild
|
- name: Autobuild
|
||||||
uses: github/codeql-action/autobuild@v4
|
uses: github/codeql-action/autobuild@v3
|
||||||
|
|
||||||
- name: Perform CodeQL Analysis
|
- name: Perform CodeQL Analysis
|
||||||
uses: github/codeql-action/analyze@v4
|
uses: github/codeql-action/analyze@v3
|
||||||
with:
|
with:
|
||||||
category: 'language:go'
|
category: 'language:go'
|
||||||
|
|||||||
4
.github/workflows/docs.yaml
vendored
4
.github/workflows/docs.yaml
vendored
@@ -16,12 +16,12 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Code checkout
|
- name: Code checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
path: __vm
|
path: __vm
|
||||||
|
|
||||||
- name: Checkout private code
|
- name: Checkout private code
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
repository: VictoriaMetrics/vmdocs
|
repository: VictoriaMetrics/vmdocs
|
||||||
token: ${{ secrets.VM_BOT_GH_TOKEN }}
|
token: ${{ secrets.VM_BOT_GH_TOKEN }}
|
||||||
|
|||||||
35
.github/workflows/test.yml
vendored
35
.github/workflows/test.yml
vendored
@@ -32,19 +32,18 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Code checkout
|
- name: Code checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
id: go
|
id: go
|
||||||
uses: actions/setup-go@v6
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
cache-dependency-path: |
|
cache-dependency-path: |
|
||||||
go.sum
|
go.sum
|
||||||
Makefile
|
Makefile
|
||||||
app/**/Makefile
|
app/**/Makefile
|
||||||
go-version-file: 'go.mod'
|
go-version: stable
|
||||||
|
|
||||||
- run: go version
|
|
||||||
|
|
||||||
- name: Cache golangci-lint
|
- name: Cache golangci-lint
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
@@ -52,7 +51,7 @@ jobs:
|
|||||||
path: |
|
path: |
|
||||||
~/.cache/golangci-lint
|
~/.cache/golangci-lint
|
||||||
~/go/bin
|
~/go/bin
|
||||||
key: golangci-lint-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('.golangci.yml') }}
|
key: golangci-lint-${{ runner.os }}-${{ hashFiles('.golangci.yml') }}
|
||||||
|
|
||||||
- name: Run check-all
|
- name: Run check-all
|
||||||
run: |
|
run: |
|
||||||
@@ -72,45 +71,43 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Code checkout
|
- name: Code checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
id: go
|
id: go
|
||||||
uses: actions/setup-go@v6
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
cache-dependency-path: |
|
cache-dependency-path: |
|
||||||
go.sum
|
go.sum
|
||||||
Makefile
|
Makefile
|
||||||
app/**/Makefile
|
app/**/Makefile
|
||||||
go-version-file: 'go.mod'
|
go-version: stable
|
||||||
- run: go version
|
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: make ${{ matrix.scenario}}
|
run: GOGC=10 make ${{ matrix.scenario}}
|
||||||
|
|
||||||
- name: Publish coverage
|
- name: Publish coverage
|
||||||
uses: codecov/codecov-action@v5
|
uses: codecov/codecov-action@v5
|
||||||
with:
|
with:
|
||||||
files: ./coverage.txt
|
files: ./coverage.txt
|
||||||
|
|
||||||
apptest:
|
integration:
|
||||||
name: apptest
|
name: integration
|
||||||
runs-on: apptest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Code checkout
|
- name: Code checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
id: go
|
id: go
|
||||||
uses: actions/setup-go@v6
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
cache-dependency-path: |
|
cache-dependency-path: |
|
||||||
go.sum
|
go.sum
|
||||||
Makefile
|
Makefile
|
||||||
app/**/Makefile
|
app/**/Makefile
|
||||||
go-version-file: 'go.mod'
|
go-version: stable
|
||||||
- run: go version
|
|
||||||
|
|
||||||
- name: Run app tests
|
- name: Run integration tests
|
||||||
run: make apptest
|
run: make integration-test
|
||||||
|
|||||||
28
.github/workflows/vmui.yml
vendored
28
.github/workflows/vmui.yml
vendored
@@ -32,41 +32,35 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Code checkout
|
- name: Code checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Cache node_modules
|
- name: Setup Node
|
||||||
id: cache
|
uses: actions/setup-node@v4
|
||||||
uses: actions/cache@v5
|
|
||||||
with:
|
with:
|
||||||
path: app/vmui/packages/vmui/node_modules
|
node-version: '24.x'
|
||||||
key: vmui-deps-${{ runner.os }}-${{ hashFiles('app/vmui/packages/vmui/package-lock.json', 'app/vmui/Dockerfile-build') }}
|
|
||||||
restore-keys: |
|
|
||||||
vmui-deps-${{ runner.os }}-
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Cache node-modules
|
||||||
if: steps.cache.outputs.cache-hit != 'true'
|
uses: actions/cache@v4
|
||||||
run: make vmui-install
|
with:
|
||||||
|
path: |
|
||||||
|
app/vmui/packages/vmui/node_modules
|
||||||
|
key: vmui-artifacts-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
||||||
|
restore-keys: vmui-artifacts-${{ runner.os }}-
|
||||||
|
|
||||||
- name: Run lint
|
- name: Run lint
|
||||||
id: lint
|
id: lint
|
||||||
run: make vmui-lint
|
run: make vmui-lint
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
env:
|
|
||||||
VMUI_SKIP_INSTALL: true
|
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
id: test
|
id: test
|
||||||
run: make vmui-test
|
run: make vmui-test
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
env:
|
|
||||||
VMUI_SKIP_INSTALL: true
|
|
||||||
|
|
||||||
- name: Run typecheck
|
- name: Run typecheck
|
||||||
id: typecheck
|
id: typecheck
|
||||||
run: make vmui-typecheck
|
run: make vmui-typecheck
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
env:
|
|
||||||
VMUI_SKIP_INSTALL: true
|
|
||||||
|
|
||||||
- name: Annotate Code Linting Results
|
- name: Annotate Code Linting Results
|
||||||
uses: ataylorme/eslint-annotate-action@v3
|
uses: ataylorme/eslint-annotate-action@v3
|
||||||
|
|||||||
2
LICENSE
2
LICENSE
@@ -175,7 +175,7 @@
|
|||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
Copyright 2019-2026 VictoriaMetrics, Inc.
|
Copyright 2019-2025 VictoriaMetrics, Inc.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
|
|||||||
69
Makefile
69
Makefile
@@ -17,7 +17,7 @@ EXTRA_GO_BUILD_TAGS ?=
|
|||||||
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
|
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
|
||||||
TAR_OWNERSHIP ?= --owner=1000 --group=1000
|
TAR_OWNERSHIP ?= --owner=1000 --group=1000
|
||||||
|
|
||||||
GOLANGCI_LINT_VERSION := 2.9.0
|
GOLANGCI_LINT_VERSION := 2.4.0
|
||||||
|
|
||||||
.PHONY: $(MAKECMDGOALS)
|
.PHONY: $(MAKECMDGOALS)
|
||||||
|
|
||||||
@@ -125,15 +125,6 @@ vmutils-linux-ppc64le: \
|
|||||||
vmrestore-linux-ppc64le \
|
vmrestore-linux-ppc64le \
|
||||||
vmctl-linux-ppc64le
|
vmctl-linux-ppc64le
|
||||||
|
|
||||||
vmutils-linux-s390x: \
|
|
||||||
vmagent-linux-s390x \
|
|
||||||
vmalert-linux-s390x \
|
|
||||||
vmalert-tool-linux-s390x \
|
|
||||||
vmauth-linux-s390x \
|
|
||||||
vmbackup-linux-s390x \
|
|
||||||
vmrestore-linux-s390x \
|
|
||||||
vmctl-linux-s390x
|
|
||||||
|
|
||||||
vmutils-darwin-amd64: \
|
vmutils-darwin-amd64: \
|
||||||
vmagent-darwin-amd64 \
|
vmagent-darwin-amd64 \
|
||||||
vmalert-darwin-amd64 \
|
vmalert-darwin-amd64 \
|
||||||
@@ -266,7 +257,6 @@ release-victoria-metrics: \
|
|||||||
release-victoria-metrics-linux-amd64 \
|
release-victoria-metrics-linux-amd64 \
|
||||||
release-victoria-metrics-linux-arm \
|
release-victoria-metrics-linux-arm \
|
||||||
release-victoria-metrics-linux-arm64 \
|
release-victoria-metrics-linux-arm64 \
|
||||||
release-victoria-metrics-linux-s390x \
|
|
||||||
release-victoria-metrics-darwin-amd64 \
|
release-victoria-metrics-darwin-amd64 \
|
||||||
release-victoria-metrics-darwin-arm64 \
|
release-victoria-metrics-darwin-arm64 \
|
||||||
release-victoria-metrics-freebsd-amd64 \
|
release-victoria-metrics-freebsd-amd64 \
|
||||||
@@ -285,9 +275,6 @@ release-victoria-metrics-linux-arm:
|
|||||||
release-victoria-metrics-linux-arm64:
|
release-victoria-metrics-linux-arm64:
|
||||||
GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch
|
GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch
|
||||||
|
|
||||||
release-victoria-metrics-linux-s390x:
|
|
||||||
GOOS=linux GOARCH=s390x $(MAKE) release-victoria-metrics-goos-goarch
|
|
||||||
|
|
||||||
release-victoria-metrics-darwin-amd64:
|
release-victoria-metrics-darwin-amd64:
|
||||||
GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
|
GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
|
||||||
|
|
||||||
@@ -327,7 +314,6 @@ release-vmutils: \
|
|||||||
release-vmutils-linux-amd64 \
|
release-vmutils-linux-amd64 \
|
||||||
release-vmutils-linux-arm64 \
|
release-vmutils-linux-arm64 \
|
||||||
release-vmutils-linux-arm \
|
release-vmutils-linux-arm \
|
||||||
release-vmutils-linux-s390x \
|
|
||||||
release-vmutils-darwin-amd64 \
|
release-vmutils-darwin-amd64 \
|
||||||
release-vmutils-darwin-arm64 \
|
release-vmutils-darwin-arm64 \
|
||||||
release-vmutils-freebsd-amd64 \
|
release-vmutils-freebsd-amd64 \
|
||||||
@@ -346,9 +332,6 @@ release-vmutils-linux-arm64:
|
|||||||
release-vmutils-linux-arm:
|
release-vmutils-linux-arm:
|
||||||
GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch
|
GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch
|
||||||
|
|
||||||
release-vmutils-linux-s390x:
|
|
||||||
GOOS=linux GOARCH=s390x $(MAKE) release-vmutils-goos-goarch
|
|
||||||
|
|
||||||
release-vmutils-darwin-amd64:
|
release-vmutils-darwin-amd64:
|
||||||
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
|
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
|
||||||
|
|
||||||
@@ -435,7 +418,7 @@ release-vmutils-windows-goarch: \
|
|||||||
vmctl-windows-$(GOARCH)-prod.exe
|
vmctl-windows-$(GOARCH)-prod.exe
|
||||||
|
|
||||||
pprof-cpu:
|
pprof-cpu:
|
||||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics $(PPROF_FILE)
|
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
|
||||||
|
|
||||||
fmt:
|
fmt:
|
||||||
gofmt -l -w -s ./lib
|
gofmt -l -w -s ./lib
|
||||||
@@ -443,7 +426,7 @@ fmt:
|
|||||||
gofmt -l -w -s ./apptest
|
gofmt -l -w -s ./apptest
|
||||||
|
|
||||||
vet:
|
vet:
|
||||||
go vet -tags 'synctest' ./lib/...
|
GOEXPERIMENT=synctest go vet ./lib/...
|
||||||
go vet ./app/...
|
go vet ./app/...
|
||||||
go vet ./apptest/...
|
go vet ./apptest/...
|
||||||
|
|
||||||
@@ -452,52 +435,39 @@ check-all: fmt vet golangci-lint govulncheck
|
|||||||
clean-checkers: remove-golangci-lint remove-govulncheck
|
clean-checkers: remove-golangci-lint remove-govulncheck
|
||||||
|
|
||||||
test:
|
test:
|
||||||
go test -tags 'synctest' ./lib/... ./app/...
|
GOEXPERIMENT=synctest go test ./lib/... ./app/...
|
||||||
|
|
||||||
test-race:
|
test-race:
|
||||||
go test -tags 'synctest' -race ./lib/... ./app/...
|
GOEXPERIMENT=synctest go test -race ./lib/... ./app/...
|
||||||
|
|
||||||
test-pure:
|
test-pure:
|
||||||
CGO_ENABLED=0 go test -tags 'synctest' ./lib/... ./app/...
|
GOEXPERIMENT=synctest CGO_ENABLED=0 go test ./lib/... ./app/...
|
||||||
|
|
||||||
test-full:
|
test-full:
|
||||||
go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
GOEXPERIMENT=synctest go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||||
|
|
||||||
test-full-386:
|
test-full-386:
|
||||||
GOARCH=386 go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||||
|
|
||||||
|
integration-test:
|
||||||
|
$(MAKE) apptest
|
||||||
|
|
||||||
apptest:
|
apptest:
|
||||||
$(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
|
$(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
|
||||||
go test ./apptest/... -skip="^Test(Cluster|Legacy).*"
|
go test ./apptest/... -skip="^TestCluster.*"
|
||||||
|
|
||||||
apptest-legacy: victoria-metrics vmbackup vmrestore
|
|
||||||
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
|
|
||||||
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
|
|
||||||
VERSION=v1.132.0; \
|
|
||||||
VMSINGLE=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}.tar.gz; \
|
|
||||||
VMCLUSTER=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}-cluster.tar.gz; \
|
|
||||||
URL=https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/$${VERSION}; \
|
|
||||||
DIR=/tmp/$${VERSION}; \
|
|
||||||
test -d $${DIR} || (mkdir $${DIR} && \
|
|
||||||
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
|
|
||||||
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
|
|
||||||
); \
|
|
||||||
VM_LEGACY_VMSINGLE_PATH=$${DIR}/victoria-metrics-prod \
|
|
||||||
VM_LEGACY_VMSTORAGE_PATH=$${DIR}/vmstorage-prod \
|
|
||||||
go test ./apptest/tests -run="^TestLegacySingle.*"
|
|
||||||
|
|
||||||
benchmark:
|
benchmark:
|
||||||
go test -run=NO_TESTS -bench=. ./lib/...
|
GOEXPERIMENT=synctest go test -bench=. ./lib/...
|
||||||
go test -run=NO_TESTS -bench=. ./app/...
|
go test -bench=. ./app/...
|
||||||
|
|
||||||
benchmark-pure:
|
benchmark-pure:
|
||||||
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./lib/...
|
GOEXPERIMENT=synctest CGO_ENABLED=0 go test -bench=. ./lib/...
|
||||||
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./app/...
|
CGO_ENABLED=0 go test -bench=. ./app/...
|
||||||
|
|
||||||
vendor-update:
|
vendor-update:
|
||||||
go get -u ./lib/...
|
go get -u ./lib/...
|
||||||
go get -u ./app/...
|
go get -u ./app/...
|
||||||
go mod tidy -compat=1.26
|
go mod tidy -compat=1.24
|
||||||
go mod vendor
|
go mod vendor
|
||||||
|
|
||||||
app-local:
|
app-local:
|
||||||
@@ -513,15 +483,14 @@ app-local-windows-goarch:
|
|||||||
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
|
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
|
||||||
|
|
||||||
quicktemplate-gen: install-qtc
|
quicktemplate-gen: install-qtc
|
||||||
qtc -dir=lib
|
qtc
|
||||||
qtc -dir=app
|
|
||||||
|
|
||||||
install-qtc:
|
install-qtc:
|
||||||
which qtc || go install github.com/valyala/quicktemplate/qtc@latest
|
which qtc || go install github.com/valyala/quicktemplate/qtc@latest
|
||||||
|
|
||||||
|
|
||||||
golangci-lint: install-golangci-lint
|
golangci-lint: install-golangci-lint
|
||||||
golangci-lint run --build-tags 'synctest'
|
GOEXPERIMENT=synctest golangci-lint run
|
||||||
|
|
||||||
install-golangci-lint:
|
install-golangci-lint:
|
||||||
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)
|
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)
|
||||||
|
|||||||
21
README.md
21
README.md
@@ -3,7 +3,7 @@
|
|||||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
[](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||||

|

|
||||||
[](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
|
[](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
|
||||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml)
|
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml)
|
||||||
[](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
|
[](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
|
||||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
|
[](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
|
||||||

|

|
||||||
@@ -16,21 +16,16 @@
|
|||||||
<img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo">
|
<img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo">
|
||||||
</picture>
|
</picture>
|
||||||
|
|
||||||
VictoriaMetrics is a fast, cost-effective, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
|
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
|
||||||
|
|
||||||
Here are some resources and information about VictoriaMetrics:
|
Here are some resources and information about VictoriaMetrics:
|
||||||
|
|
||||||
- **Case studies**: [Grammarly, Roblox, Wix, Spotify,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
|
- Documentation: [docs.victoriametrics.com](https://docs.victoriametrics.com)
|
||||||
- **Available**: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), Docker images on [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
- Case studies: [Grammarly, Roblox, Wix,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
|
||||||
- **Deployment types**: [Single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) under [Apache License 2.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE).
|
- Available: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), docker images [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||||
- **Getting started:** Read [key concepts](https://docs.victoriametrics.com/victoriametrics/keyconcepts/) and follow the
|
- Deployment types: [Single-node version](https://docs.victoriametrics.com/), [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/), and [Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/)
|
||||||
[quick start guide](https://docs.victoriametrics.com/victoriametrics/quick-start/).
|
- Changelog: [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics)
|
||||||
- **Community**: [Slack](https://slack.victoriametrics.com/) (join via [Slack Inviter](https://slack.victoriametrics.com/)), [X (Twitter)](https://x.com/VictoriaMetrics), [YouTube](https://www.youtube.com/@VictoriaMetrics). See full list [here](https://docs.victoriametrics.com/victoriametrics/#community-and-contributions).
|
- Community: [Slack](https://slack.victoriametrics.com/), [X (Twitter)](https://x.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
|
||||||
- **Changelog**: Project evolves fast - check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).
|
|
||||||
- **Enterprise support:** [Contact us](mailto:info@victoriametrics.com) for commercial support with additional [enterprise features](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
|
||||||
- **Enterprise releases:** Enterprise and [long-term support releases (LTS)](https://docs.victoriametrics.com/victoriametrics/lts-releases/) are publicly available and can be evaluated for free
|
|
||||||
using a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
|
|
||||||
- **Security:** we achieved [security certifications](https://victoriametrics.com/security/) for Database Software Development and Software-Based Monitoring Services.
|
|
||||||
|
|
||||||
Yes, we open-source both the single-node VictoriaMetrics and the cluster version.
|
Yes, we open-source both the single-node VictoriaMetrics and the cluster version.
|
||||||
|
|
||||||
|
|||||||
11
SECURITY.md
11
SECURITY.md
@@ -4,11 +4,12 @@
|
|||||||
|
|
||||||
The following versions of VictoriaMetrics receive regular security fixes:
|
The following versions of VictoriaMetrics receive regular security fixes:
|
||||||
|
|
||||||
| Version | Supported |
|
| Version | Supported |
|
||||||
|--------------------------------------------------------------------------------|--------------------|
|
|---------|--------------------|
|
||||||
| [Latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
|
| [latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
|
||||||
| [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
|
| v1.102.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
|
||||||
| other releases | :x: |
|
| v1.110.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
|
||||||
|
| other releases | :x: |
|
||||||
|
|
||||||
See [this page](https://victoriametrics.com/security/) for more details.
|
See [this page](https://victoriametrics.com/security/) for more details.
|
||||||
|
|
||||||
|
|||||||
@@ -27,9 +27,6 @@ victoria-metrics-linux-ppc64le-prod:
|
|||||||
victoria-metrics-linux-386-prod:
|
victoria-metrics-linux-386-prod:
|
||||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386
|
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386
|
||||||
|
|
||||||
victoria-metrics-linux-s390x-prod:
|
|
||||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-s390x
|
|
||||||
|
|
||||||
victoria-metrics-darwin-amd64-prod:
|
victoria-metrics-darwin-amd64-prod:
|
||||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64
|
APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64
|
||||||
|
|
||||||
|
|||||||
@@ -134,7 +134,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
}
|
}
|
||||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>")
|
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>")
|
||||||
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
|
|
||||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>")
|
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>")
|
||||||
fmt.Fprintf(w, "Useful endpoints:</br>")
|
fmt.Fprintf(w, "Useful endpoints:</br>")
|
||||||
httpserver.WriteAPIHelp(w, [][2]string{
|
httpserver.WriteAPIHelp(w, [][2]string{
|
||||||
@@ -170,7 +169,7 @@ func usage() {
|
|||||||
const s = `
|
const s = `
|
||||||
victoria-metrics is a time series database and monitoring solution.
|
victoria-metrics is a time series database and monitoring solution.
|
||||||
|
|
||||||
See the docs at https://docs.victoriametrics.com/victoriametrics/
|
See the docs at https://docs.victoriametrics.com/
|
||||||
`
|
`
|
||||||
flagutil.Usage(s)
|
flagutil.Usage(s)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,11 +10,9 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -29,9 +27,11 @@ var selfScraperWG sync.WaitGroup
|
|||||||
|
|
||||||
func startSelfScraper() {
|
func startSelfScraper() {
|
||||||
selfScraperStopCh = make(chan struct{})
|
selfScraperStopCh = make(chan struct{})
|
||||||
selfScraperWG.Go(func() {
|
selfScraperWG.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer selfScraperWG.Done()
|
||||||
selfScraper(*selfScrapeInterval)
|
selfScraper(*selfScrapeInterval)
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
func stopSelfScraper() {
|
func stopSelfScraper() {
|
||||||
@@ -48,7 +48,6 @@ func selfScraper(scrapeInterval time.Duration) {
|
|||||||
|
|
||||||
var bb bytesutil.ByteBuffer
|
var bb bytesutil.ByteBuffer
|
||||||
var rows prometheus.Rows
|
var rows prometheus.Rows
|
||||||
var metadataRows prometheus.MetadataRows
|
|
||||||
var mrs []storage.MetricRow
|
var mrs []storage.MetricRow
|
||||||
var labels []prompb.Label
|
var labels []prompb.Label
|
||||||
t := time.NewTicker(scrapeInterval)
|
t := time.NewTicker(scrapeInterval)
|
||||||
@@ -58,12 +57,8 @@ func selfScraper(scrapeInterval time.Duration) {
|
|||||||
appmetrics.WritePrometheusMetrics(&bb)
|
appmetrics.WritePrometheusMetrics(&bb)
|
||||||
s := bytesutil.ToUnsafeString(bb.B)
|
s := bytesutil.ToUnsafeString(bb.B)
|
||||||
rows.Reset()
|
rows.Reset()
|
||||||
// Parse metrics and optionally metadata when enabled
|
// VictoriaMetrics components don't expose metadata yet, only need to parse samples
|
||||||
if prommetadata.IsEnabled() {
|
rows.UnmarshalWithErrLogger(s, nil)
|
||||||
rows, metadataRows = prometheus.UnmarshalWithMetadata(rows, metadataRows, s, nil)
|
|
||||||
} else {
|
|
||||||
rows.UnmarshalWithErrLogger(s, nil)
|
|
||||||
}
|
|
||||||
mrs = mrs[:0]
|
mrs = mrs[:0]
|
||||||
for i := range rows.Rows {
|
for i := range rows.Rows {
|
||||||
r := &rows.Rows[i]
|
r := &rows.Rows[i]
|
||||||
@@ -96,19 +91,6 @@ func selfScraper(scrapeInterval time.Duration) {
|
|||||||
if err := vmstorage.AddRows(mrs); err != nil {
|
if err := vmstorage.AddRows(mrs); err != nil {
|
||||||
logger.Errorf("cannot store self-scraped metrics: %s", err)
|
logger.Errorf("cannot store self-scraped metrics: %s", err)
|
||||||
}
|
}
|
||||||
if len(metadataRows.Rows) > 0 {
|
|
||||||
mms := make([]metricsmetadata.Row, 0, len(metadataRows.Rows))
|
|
||||||
for _, mm := range metadataRows.Rows {
|
|
||||||
mms = append(mms, metricsmetadata.Row{
|
|
||||||
MetricFamilyName: bytesutil.ToUnsafeBytes(mm.Metric),
|
|
||||||
Help: bytesutil.ToUnsafeBytes(mm.Help),
|
|
||||||
Type: mm.Type,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
if err := vmstorage.AddMetadataRows(mms); err != nil {
|
|
||||||
logger.Errorf("cannot store self-scraped metrics metadata: %s", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
|
|||||||
@@ -33,13 +33,13 @@ func PopulateTimeTpl(b []byte, tGlobal time.Time) []byte {
|
|||||||
}
|
}
|
||||||
switch strings.TrimSpace(parts[0]) {
|
switch strings.TrimSpace(parts[0]) {
|
||||||
case `TIME_S`:
|
case `TIME_S`:
|
||||||
return fmt.Appendf(nil, "%d", t.Unix())
|
return []byte(fmt.Sprintf("%d", t.Unix()))
|
||||||
case `TIME_MSZ`:
|
case `TIME_MSZ`:
|
||||||
return fmt.Appendf(nil, "%d", t.Unix()*1e3)
|
return []byte(fmt.Sprintf("%d", t.Unix()*1e3))
|
||||||
case `TIME_MS`:
|
case `TIME_MS`:
|
||||||
return fmt.Appendf(nil, "%d", timeToMillis(t))
|
return []byte(fmt.Sprintf("%d", timeToMillis(t)))
|
||||||
case `TIME_NS`:
|
case `TIME_NS`:
|
||||||
return fmt.Appendf(nil, "%d", t.UnixNano())
|
return []byte(fmt.Sprintf("%d", t.UnixNano()))
|
||||||
default:
|
default:
|
||||||
log.Fatalf("unknown time pattern %s in %s", parts[0], repl)
|
log.Fatalf("unknown time pattern %s in %s", parts[0], repl)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,9 +27,6 @@ vmagent-linux-ppc64le-prod:
|
|||||||
vmagent-linux-386-prod:
|
vmagent-linux-386-prod:
|
||||||
APP_NAME=vmagent $(MAKE) app-via-docker-linux-386
|
APP_NAME=vmagent $(MAKE) app-via-docker-linux-386
|
||||||
|
|
||||||
vmagent-linux-s390x-prod:
|
|
||||||
APP_NAME=vmagent $(MAKE) app-via-docker-linux-s390x
|
|
||||||
|
|
||||||
vmagent-darwin-amd64-prod:
|
vmagent-darwin-amd64-prod:
|
||||||
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64
|
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64
|
||||||
|
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/zabbixconnector"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||||
@@ -75,7 +74,7 @@ var (
|
|||||||
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
|
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
|
||||||
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
|
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
|
||||||
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||||
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config and /remotewrite-.*-config pages. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config page. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||||
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||||
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
|
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
|
||||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
|
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
|
||||||
@@ -245,7 +244,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
}
|
}
|
||||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
fmt.Fprintf(w, "<h2>vmagent</h2>")
|
fmt.Fprintf(w, "<h2>vmagent</h2>")
|
||||||
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
|
|
||||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
|
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
|
||||||
fmt.Fprintf(w, "Useful endpoints:</br>")
|
fmt.Fprintf(w, "Useful endpoints:</br>")
|
||||||
httpserver.WriteAPIHelp(w, [][2]string{
|
httpserver.WriteAPIHelp(w, [][2]string{
|
||||||
@@ -254,8 +252,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
{"metric-relabel-debug", "debug metric relabeling"},
|
{"metric-relabel-debug", "debug metric relabeling"},
|
||||||
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
||||||
{"config", "-promscrape.config contents"},
|
{"config", "-promscrape.config contents"},
|
||||||
{"remotewrite-relabel-config", "-remoteWrite.relabelConfig contents"},
|
|
||||||
{"remotewrite-url-relabel-config", "-remoteWrite.urlRelabelConfig contents"},
|
|
||||||
{"metrics", "available service metrics"},
|
{"metrics", "available service metrics"},
|
||||||
{"flags", "command-line flags"},
|
{"flags", "command-line flags"},
|
||||||
{"-/reload", "reload configuration"},
|
{"-/reload", "reload configuration"},
|
||||||
@@ -352,17 +348,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
}
|
}
|
||||||
firehose.WriteSuccessResponse(w, r)
|
firehose.WriteSuccessResponse(w, r)
|
||||||
return true
|
return true
|
||||||
case "/zabbixconnector/api/v1/history":
|
|
||||||
zabbixconnectorHistoryRequests.Inc()
|
|
||||||
if err := zabbixconnector.InsertHandlerForHTTP(nil, r); err != nil {
|
|
||||||
zabbixconnectorHistoryErrors.Inc()
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(http.StatusBadRequest)
|
|
||||||
fmt.Fprintf(w, `{"error":%q}`, err.Error())
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
return true
|
|
||||||
case "/newrelic":
|
case "/newrelic":
|
||||||
newrelicCheckRequest.Inc()
|
newrelicCheckRequest.Inc()
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
@@ -492,42 +477,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
promscrape.WriteConfigData(&bb)
|
promscrape.WriteConfigData(&bb)
|
||||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
||||||
return true
|
return true
|
||||||
case "/remotewrite-relabel-config":
|
|
||||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
remoteWriteRelabelConfigRequests.Inc()
|
|
||||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
|
||||||
remotewrite.WriteRelabelConfigData(w)
|
|
||||||
return true
|
|
||||||
case "/api/v1/status/remotewrite-relabel-config":
|
|
||||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
remoteWriteStatusRelabelConfigRequests.Inc()
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
var bb bytesutil.ByteBuffer
|
|
||||||
remotewrite.WriteRelabelConfigData(&bb)
|
|
||||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
|
||||||
return true
|
|
||||||
case "/remotewrite-url-relabel-config":
|
|
||||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
remoteWriteURLRelabelConfigRequests.Inc()
|
|
||||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
|
||||||
remotewrite.WriteURLRelabelConfigData(w)
|
|
||||||
return true
|
|
||||||
case "/api/v1/status/remotewrite-url-relabel-config":
|
|
||||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
remoteWriteStatusURLRelabelConfigRequests.Inc()
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
var bb bytesutil.ByteBuffer
|
|
||||||
remotewrite.WriteURLRelabelConfigData(&bb)
|
|
||||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
|
||||||
return true
|
|
||||||
case "/prometheus/-/reload", "/-/reload":
|
case "/prometheus/-/reload", "/-/reload":
|
||||||
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
|
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
|
||||||
return true
|
return true
|
||||||
@@ -657,17 +606,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
|||||||
}
|
}
|
||||||
firehose.WriteSuccessResponse(w, r)
|
firehose.WriteSuccessResponse(w, r)
|
||||||
return true
|
return true
|
||||||
case "zabbixconnector/api/v1/history":
|
|
||||||
zabbixconnectorHistoryRequests.Inc()
|
|
||||||
if err := zabbixconnector.InsertHandlerForHTTP(at, r); err != nil {
|
|
||||||
zabbixconnectorHistoryErrors.Inc()
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(http.StatusBadRequest)
|
|
||||||
fmt.Fprintf(w, `{"error":%q}`, err.Error())
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
return true
|
|
||||||
case "newrelic":
|
case "newrelic":
|
||||||
newrelicCheckRequest.Inc()
|
newrelicCheckRequest.Inc()
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
@@ -789,9 +727,6 @@ var (
|
|||||||
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
||||||
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
||||||
|
|
||||||
zabbixconnectorHistoryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
|
|
||||||
zabbixconnectorHistoryErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
|
|
||||||
|
|
||||||
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
||||||
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
||||||
|
|
||||||
@@ -812,12 +747,6 @@ var (
|
|||||||
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
|
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
|
||||||
promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`)
|
promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`)
|
||||||
|
|
||||||
remoteWriteRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-relabel-config"}`)
|
|
||||||
remoteWriteStatusRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-relabel-config"}`)
|
|
||||||
|
|
||||||
remoteWriteURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-url-relabel-config"}`)
|
|
||||||
remoteWriteStatusURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-url-relabel-config"}`)
|
|
||||||
|
|
||||||
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
|
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompb.Label)
|
|||||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||||
return remotewrite.ErrQueueFullHTTPRetry
|
return remotewrite.ErrQueueFullHTTPRetry
|
||||||
}
|
}
|
||||||
rowsInserted.Add(samplesCount)
|
rowsInserted.Add(len(rows))
|
||||||
if at != nil {
|
if at != nil {
|
||||||
rowsTenantInserted.Get(at).Add(samplesCount)
|
rowsTenantInserted.Get(at).Add(samplesCount)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,14 +2,13 @@ package opentelemetry
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||||
@@ -25,13 +24,6 @@ var (
|
|||||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
|
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
|
||||||
)
|
)
|
||||||
|
|
||||||
// InsertHandlerForReader processes metrics from given reader.
|
|
||||||
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
|
|
||||||
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
|
|
||||||
return insertRows(at, tss, mms, nil)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// InsertHandler processes opentelemetry metrics.
|
// InsertHandler processes opentelemetry metrics.
|
||||||
func InsertHandler(at *auth.Token, req *http.Request) error {
|
func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||||
@@ -76,7 +68,7 @@ func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMeta
|
|||||||
ctx.WriteRequest.Timeseries = tssDst
|
ctx.WriteRequest.Timeseries = tssDst
|
||||||
|
|
||||||
var metadataTotal int
|
var metadataTotal int
|
||||||
if prommetadata.IsEnabled() {
|
if promscrape.IsMetadataEnabled() {
|
||||||
var accountID, projectID uint32
|
var accountID, projectID uint32
|
||||||
if at != nil {
|
if at != nil {
|
||||||
accountID = at.AccountID
|
accountID = at.AccountID
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||||
@@ -36,7 +36,7 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
encoding := req.Header.Get("Content-Encoding")
|
encoding := req.Header.Get("Content-Encoding")
|
||||||
return stream.Parse(req.Body, defaultTimestamp, encoding, true, prommetadata.IsEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
|
return stream.Parse(req.Body, defaultTimestamp, encoding, true, promscrape.IsMetadataEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
|
||||||
return insertRows(at, rows, mms, extraLabels)
|
return insertRows(at, rows, mms, extraLabels)
|
||||||
}, func(s string) {
|
}, func(s string) {
|
||||||
httpserver.LogError(req, s)
|
httpserver.LogError(req, s)
|
||||||
|
|||||||
@@ -6,8 +6,8 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||||
@@ -71,7 +71,7 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
|
|||||||
ctx.WriteRequest.Timeseries = tssDst
|
ctx.WriteRequest.Timeseries = tssDst
|
||||||
|
|
||||||
var metadataTotal int
|
var metadataTotal int
|
||||||
if prommetadata.IsEnabled() {
|
if promscrape.IsMetadataEnabled() {
|
||||||
var accountID, projectID uint32
|
var accountID, projectID uint32
|
||||||
if at != nil {
|
if at != nil {
|
||||||
accountID = at.AccountID
|
accountID = at.AccountID
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import (
|
|||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
@@ -202,10 +203,14 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
|
|||||||
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
|
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
|
||||||
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
|
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
|
||||||
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
|
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
|
||||||
return float64(concurrency)
|
return float64(*queues)
|
||||||
})
|
})
|
||||||
for range concurrency {
|
for i := 0; i < concurrency; i++ {
|
||||||
c.wg.Go(c.runWorker)
|
c.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer c.wg.Done()
|
||||||
|
c.runWorker()
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
|
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
|
||||||
}
|
}
|
||||||
@@ -458,6 +463,12 @@ again:
|
|||||||
// - Real-world implementations of v1 use both 400 and 415 status codes.
|
// - Real-world implementations of v1 use both 400 and 415 status codes.
|
||||||
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
|
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
|
||||||
case 415, 400:
|
case 415, 400:
|
||||||
|
if c.canDowngradeVMProto.Swap(false) {
|
||||||
|
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
|
||||||
|
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
|
||||||
|
c.useVMProto.Store(false)
|
||||||
|
}
|
||||||
|
|
||||||
if encoding.IsZstd(block) {
|
if encoding.IsZstd(block) {
|
||||||
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
|
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
|
||||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
|
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
|
||||||
@@ -549,9 +560,9 @@ func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.D
|
|||||||
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
|
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
|
||||||
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
|
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
|
||||||
plainBlock := make([]byte, 0, len(zstdBlock)*2)
|
plainBlock := make([]byte, 0, len(zstdBlock)*2)
|
||||||
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock)
|
plainBlock, err := zstd.Decompress(plainBlock, zstdBlock)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("zstd: decompress: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return snappy.Encode(nil, plainBlock), nil
|
return snappy.Encode(nil, plainBlock), nil
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ func TestCalculateRetryDuration(t *testing.T) {
|
|||||||
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
|
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
for range n {
|
for i := 0; i < n; i++ {
|
||||||
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
|
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -93,7 +93,10 @@ func TestParseRetryAfterHeader(t *testing.T) {
|
|||||||
|
|
||||||
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
|
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
|
||||||
func helper(d time.Duration) time.Duration {
|
func helper(d time.Duration) time.Duration {
|
||||||
dv := min(d/10, 10*time.Second)
|
dv := d / 10
|
||||||
|
if dv > 10*time.Second {
|
||||||
|
dv = 10 * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
return d + dv
|
return d + dv
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -48,7 +48,11 @@ func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite *atomic.Boo
|
|||||||
ps.wr.significantFigures = significantFigures
|
ps.wr.significantFigures = significantFigures
|
||||||
ps.wr.roundDigits = roundDigits
|
ps.wr.roundDigits = roundDigits
|
||||||
ps.stopCh = make(chan struct{})
|
ps.stopCh = make(chan struct{})
|
||||||
ps.periodicFlusherWG.Go(ps.periodicFlusher)
|
ps.periodicFlusherWG.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer ps.periodicFlusherWG.Done()
|
||||||
|
ps.periodicFlusher()
|
||||||
|
}()
|
||||||
return &ps
|
return &ps
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -51,9 +51,9 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
|
|||||||
|
|
||||||
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
|
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
|
||||||
var wr prompb.WriteRequest
|
var wr prompb.WriteRequest
|
||||||
for i := range seriesCount {
|
for i := 0; i < seriesCount; i++ {
|
||||||
var labels []prompb.Label
|
var labels []prompb.Label
|
||||||
for j := range labelsCount {
|
for j := 0; j < labelsCount; j++ {
|
||||||
labels = append(labels, prompb.Label{
|
labels = append(labels, prompb.Label{
|
||||||
Name: fmt.Sprintf("label_%d_%d", i, j),
|
Name: fmt.Sprintf("label_%d_%d", i, j),
|
||||||
Value: fmt.Sprintf("value_%d_%d", i, j),
|
Value: fmt.Sprintf("value_%d_%d", i, j),
|
||||||
|
|||||||
@@ -3,24 +3,22 @@ package remotewrite
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/metrics"
|
|
||||||
"gopkg.in/yaml.v2"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to all -remoteWrite.url.")
|
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
|
||||||
|
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
|
||||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
|
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
|
||||||
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
|
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
|
||||||
"The path can point either to local file or to http url. "+
|
"The path can point either to local file or to http url. "+
|
||||||
@@ -34,12 +32,9 @@ var (
|
|||||||
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
|
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var labelsGlobal []prompb.Label
|
||||||
|
|
||||||
var (
|
var (
|
||||||
labelsGlobal []prompb.Label
|
|
||||||
|
|
||||||
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
|
|
||||||
remoteWriteURLRelabelConfigData atomic.Pointer[[]any]
|
|
||||||
|
|
||||||
relabelConfigReloads *metrics.Counter
|
relabelConfigReloads *metrics.Counter
|
||||||
relabelConfigReloadErrors *metrics.Counter
|
relabelConfigReloadErrors *metrics.Counter
|
||||||
relabelConfigSuccess *metrics.Gauge
|
relabelConfigSuccess *metrics.Gauge
|
||||||
@@ -72,42 +67,6 @@ func initRelabelConfigs() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WriteRelabelConfigData writes -remoteWrite.relabelConfig contents to w
|
|
||||||
func WriteRelabelConfigData(w io.Writer) {
|
|
||||||
p := remoteWriteRelabelConfigData.Load()
|
|
||||||
if p == nil {
|
|
||||||
// Nothing to write to w
|
|
||||||
return
|
|
||||||
}
|
|
||||||
_, _ = w.Write(*p)
|
|
||||||
}
|
|
||||||
|
|
||||||
// WriteURLRelabelConfigData writes -remoteWrite.urlRelabelConfig contents to w
|
|
||||||
func WriteURLRelabelConfigData(w io.Writer) {
|
|
||||||
p := remoteWriteURLRelabelConfigData.Load()
|
|
||||||
if p == nil {
|
|
||||||
// Nothing to write to w
|
|
||||||
return
|
|
||||||
}
|
|
||||||
type urlRelabelCfg struct {
|
|
||||||
Url string `yaml:"url"`
|
|
||||||
RelabelConfig any `yaml:"relabel_config"`
|
|
||||||
}
|
|
||||||
var cs []urlRelabelCfg
|
|
||||||
for i, url := range *remoteWriteURLs {
|
|
||||||
cfgData := (*p)[i]
|
|
||||||
if !*showRemoteWriteURL {
|
|
||||||
url = fmt.Sprintf("%d:secret-url", i+1)
|
|
||||||
}
|
|
||||||
cs = append(cs, urlRelabelCfg{
|
|
||||||
Url: url,
|
|
||||||
RelabelConfig: cfgData,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
d, _ := yaml.Marshal(cs)
|
|
||||||
_, _ = w.Write(d)
|
|
||||||
}
|
|
||||||
|
|
||||||
func reloadRelabelConfigs() {
|
func reloadRelabelConfigs() {
|
||||||
rcs := allRelabelConfigs.Load()
|
rcs := allRelabelConfigs.Load()
|
||||||
if !rcs.isSet() {
|
if !rcs.isSet() {
|
||||||
@@ -131,43 +90,28 @@ func reloadRelabelConfigs() {
|
|||||||
func loadRelabelConfigs() (*relabelConfigs, error) {
|
func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||||
var rcs relabelConfigs
|
var rcs relabelConfigs
|
||||||
if *relabelConfigPathGlobal != "" {
|
if *relabelConfigPathGlobal != "" {
|
||||||
global, rawCfg, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
|
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
|
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
|
||||||
}
|
}
|
||||||
remoteWriteRelabelConfigData.Store(&rawCfg)
|
|
||||||
rcs.global = global
|
rcs.global = global
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
|
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
|
||||||
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
|
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
|
||||||
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
|
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
|
||||||
}
|
}
|
||||||
|
|
||||||
var urlRelabelCfgs []any
|
|
||||||
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
|
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
|
||||||
for i, path := range *relabelConfigPaths {
|
for i, path := range *relabelConfigPaths {
|
||||||
if len(path) == 0 {
|
if len(path) == 0 {
|
||||||
urlRelabelCfgs = append(urlRelabelCfgs, nil)
|
// Skip empty relabel config.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
prc, rawCfg, err := promrelabel.LoadRelabelConfigs(path)
|
prc, err := promrelabel.LoadRelabelConfigs(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
|
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
|
||||||
}
|
}
|
||||||
rcs.perURL[i] = prc
|
rcs.perURL[i] = prc
|
||||||
|
|
||||||
var parsedCfg any
|
|
||||||
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
|
|
||||||
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
|
|
||||||
}
|
}
|
||||||
if len(*remoteWriteURLs) > len(*relabelConfigPaths) {
|
|
||||||
// fill the urlRelabelCfgs with empty relabel configs if not set
|
|
||||||
for i := len(*relabelConfigPaths); i < len(*remoteWriteURLs); i++ {
|
|
||||||
urlRelabelCfgs = append(urlRelabelCfgs, nil)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
remoteWriteURLRelabelConfigData.Store(&urlRelabelCfgs)
|
|
||||||
return &rcs, nil
|
return &rcs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -176,9 +120,19 @@ type relabelConfigs struct {
|
|||||||
perURL []*promrelabel.ParsedConfigs
|
perURL []*promrelabel.ParsedConfigs
|
||||||
}
|
}
|
||||||
|
|
||||||
// isSet indicates whether (global or per-URL) command-line flags is set
|
|
||||||
func (rcs *relabelConfigs) isSet() bool {
|
func (rcs *relabelConfigs) isSet() bool {
|
||||||
return *relabelConfigPathGlobal != "" || len(*relabelConfigPaths) > 0
|
if rcs == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if rcs.global.Len() > 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for _, pc := range rcs.perURL {
|
||||||
|
if pc.Len() > 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// initLabelsGlobal must be called after parsing command-line flags.
|
// initLabelsGlobal must be called after parsing command-line flags.
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
||||||
"github.com/VictoriaMetrics/metrics"
|
"github.com/VictoriaMetrics/metrics"
|
||||||
@@ -59,7 +58,7 @@ var (
|
|||||||
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
|
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
|
||||||
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
||||||
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
|
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
|
||||||
queues = flagutil.NewArrayInt("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||||
"isn't enough for sending high volume of collected data to remote storage. "+
|
"isn't enough for sending high volume of collected data to remote storage. "+
|
||||||
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
|
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
|
||||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||||
@@ -176,6 +175,13 @@ func Init() {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if *queues > maxQueues {
|
||||||
|
*queues = maxQueues
|
||||||
|
}
|
||||||
|
if *queues <= 0 {
|
||||||
|
*queues = 1
|
||||||
|
}
|
||||||
|
|
||||||
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
|
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
|
||||||
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
|
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
|
||||||
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
|
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
|
||||||
@@ -208,7 +214,9 @@ func Init() {
|
|||||||
dropDanglingQueues()
|
dropDanglingQueues()
|
||||||
|
|
||||||
// Start config reloader.
|
// Start config reloader.
|
||||||
configReloaderWG.Go(func() {
|
configReloaderWG.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer configReloaderWG.Done()
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-configReloaderStopCh:
|
case <-configReloaderStopCh:
|
||||||
@@ -218,7 +226,7 @@ func Init() {
|
|||||||
reloadRelabelConfigs()
|
reloadRelabelConfigs()
|
||||||
reloadStreamAggrConfigs()
|
reloadStreamAggrConfigs()
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
func dropDanglingQueues() {
|
func dropDanglingQueues() {
|
||||||
@@ -258,6 +266,17 @@ func initRemoteWriteCtxs(urls []string) {
|
|||||||
if len(urls) == 0 {
|
if len(urls) == 0 {
|
||||||
logger.Panicf("BUG: urls must be non-empty")
|
logger.Panicf("BUG: urls must be non-empty")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
maxInmemoryBlocks := memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
|
||||||
|
if maxInmemoryBlocks / *queues > 100 {
|
||||||
|
// There is no much sense in keeping higher number of blocks in memory,
|
||||||
|
// since this means that the producer outperforms consumer and the queue
|
||||||
|
// will continue growing. It is better storing the queue to file.
|
||||||
|
maxInmemoryBlocks = 100 * *queues
|
||||||
|
}
|
||||||
|
if maxInmemoryBlocks < 2 {
|
||||||
|
maxInmemoryBlocks = 2
|
||||||
|
}
|
||||||
rwctxs := make([]*remoteWriteCtx, len(urls))
|
rwctxs := make([]*remoteWriteCtx, len(urls))
|
||||||
rwctxIdx := make([]int, len(urls))
|
rwctxIdx := make([]int, len(urls))
|
||||||
if retryMaxTime.String() != "" {
|
if retryMaxTime.String() != "" {
|
||||||
@@ -272,7 +291,7 @@ func initRemoteWriteCtxs(urls []string) {
|
|||||||
if *showRemoteWriteURL {
|
if *showRemoteWriteURL {
|
||||||
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
|
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
|
||||||
}
|
}
|
||||||
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, sanitizedURL)
|
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
|
||||||
rwctxIdx[i] = i
|
rwctxIdx[i] = i
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -466,9 +485,6 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
|
|||||||
matchIdxs.B = sas.Push(tssBlock, matchIdxs.B)
|
matchIdxs.B = sas.Push(tssBlock, matchIdxs.B)
|
||||||
if !*streamAggrGlobalKeepInput {
|
if !*streamAggrGlobalKeepInput {
|
||||||
tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput)
|
tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput)
|
||||||
} else if *streamAggrGlobalDropInput {
|
|
||||||
// if both keep_input and drop_input are true, we keep only the aggregated series
|
|
||||||
tssBlock = dropUnaggregatedSeries(tssBlock, matchIdxs.B)
|
|
||||||
}
|
}
|
||||||
matchIdxsPool.Put(matchIdxs)
|
matchIdxsPool.Put(matchIdxs)
|
||||||
}
|
}
|
||||||
@@ -538,9 +554,11 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
|
|||||||
// Push metadata to remote storage systems in parallel to reduce
|
// Push metadata to remote storage systems in parallel to reduce
|
||||||
// the time needed for sending the data to multiple remote storage systems.
|
// the time needed for sending the data to multiple remote storage systems.
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(len(rwctxs))
|
||||||
var anyPushFailed atomic.Bool
|
var anyPushFailed atomic.Bool
|
||||||
for _, rwctx := range rwctxs {
|
for _, rwctx := range rwctxs {
|
||||||
wg.Go(func() {
|
go func(rwctx *remoteWriteCtx) {
|
||||||
|
defer wg.Done()
|
||||||
if !rwctx.tryPushMetadataInternal(mms) {
|
if !rwctx.tryPushMetadataInternal(mms) {
|
||||||
rwctx.pushFailures.Inc()
|
rwctx.pushFailures.Inc()
|
||||||
if forceDropSamplesOnFailure {
|
if forceDropSamplesOnFailure {
|
||||||
@@ -549,7 +567,7 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
|
|||||||
}
|
}
|
||||||
anyPushFailed.Store(true)
|
anyPushFailed.Store(true)
|
||||||
}
|
}
|
||||||
})
|
}(rwctx)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
return !anyPushFailed.Load()
|
return !anyPushFailed.Load()
|
||||||
@@ -581,13 +599,15 @@ func tryPushTimeSeriesToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prom
|
|||||||
// Push tssBlock to remote storage systems in parallel to reduce
|
// Push tssBlock to remote storage systems in parallel to reduce
|
||||||
// the time needed for sending the data to multiple remote storage systems.
|
// the time needed for sending the data to multiple remote storage systems.
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(len(rwctxs))
|
||||||
var anyPushFailed atomic.Bool
|
var anyPushFailed atomic.Bool
|
||||||
for _, rwctx := range rwctxs {
|
for _, rwctx := range rwctxs {
|
||||||
wg.Go(func() {
|
go func(rwctx *remoteWriteCtx) {
|
||||||
|
defer wg.Done()
|
||||||
if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) {
|
if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) {
|
||||||
anyPushFailed.Store(true)
|
anyPushFailed.Store(true)
|
||||||
}
|
}
|
||||||
})
|
}(rwctx)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
return !anyPushFailed.Load()
|
return !anyPushFailed.Load()
|
||||||
@@ -609,11 +629,13 @@ func tryShardingTimeSeriesAmongRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock
|
|||||||
if len(shard) == 0 {
|
if len(shard) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
if !rwctx.TryPushTimeSeries(shard, forceDropSamplesOnFailure) {
|
go func(rwctx *remoteWriteCtx, tss []prompb.TimeSeries) {
|
||||||
|
defer wg.Done()
|
||||||
|
if !rwctx.TryPushTimeSeries(tss, forceDropSamplesOnFailure) {
|
||||||
anyPushFailed.Store(true)
|
anyPushFailed.Store(true)
|
||||||
}
|
}
|
||||||
})
|
}(rwctx, shard)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
return !anyPushFailed.Load()
|
return !anyPushFailed.Load()
|
||||||
@@ -822,7 +844,7 @@ type remoteWriteCtx struct {
|
|||||||
rowsDroppedOnPushFailure *metrics.Counter
|
rowsDroppedOnPushFailure *metrics.Counter
|
||||||
}
|
}
|
||||||
|
|
||||||
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string) *remoteWriteCtx {
|
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
|
||||||
// strip query params, otherwise changing params resets pq
|
// strip query params, otherwise changing params resets pq
|
||||||
pqURL := *remoteWriteURL
|
pqURL := *remoteWriteURL
|
||||||
pqURL.RawQuery = ""
|
pqURL.RawQuery = ""
|
||||||
@@ -837,23 +859,6 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
|||||||
}
|
}
|
||||||
|
|
||||||
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
|
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
|
||||||
queuesSize := queues.GetOptionalArg(argIdx)
|
|
||||||
if queuesSize > maxQueues {
|
|
||||||
queuesSize = maxQueues
|
|
||||||
} else if queuesSize <= 0 {
|
|
||||||
queuesSize = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
maxInmemoryBlocks := memory.Allowed() / len(*remoteWriteURLs) / *maxRowsPerBlock / 100
|
|
||||||
if maxInmemoryBlocks/queuesSize > 100 {
|
|
||||||
// There is no much sense in keeping higher number of blocks in memory,
|
|
||||||
// since this means that the producer outperforms consumer and the queue
|
|
||||||
// will continue growing. It is better storing the queue to file.
|
|
||||||
maxInmemoryBlocks = 100 * queuesSize
|
|
||||||
}
|
|
||||||
if maxInmemoryBlocks < 2 {
|
|
||||||
maxInmemoryBlocks = 2
|
|
||||||
}
|
|
||||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
|
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
|
||||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||||
return float64(fq.GetPendingBytes())
|
return float64(fq.GetPendingBytes())
|
||||||
@@ -871,16 +876,16 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
|||||||
var c *client
|
var c *client
|
||||||
switch remoteWriteURL.Scheme {
|
switch remoteWriteURL.Scheme {
|
||||||
case "http", "https":
|
case "http", "https":
|
||||||
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, queuesSize)
|
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
|
||||||
default:
|
default:
|
||||||
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
|
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
|
||||||
}
|
}
|
||||||
c.init(argIdx, queuesSize, sanitizedURL)
|
c.init(argIdx, *queues, sanitizedURL)
|
||||||
|
|
||||||
// Initialize pss
|
// Initialize pss
|
||||||
sf := significantFigures.GetOptionalArg(argIdx)
|
sf := significantFigures.GetOptionalArg(argIdx)
|
||||||
rd := roundDigits.GetOptionalArg(argIdx)
|
rd := roundDigits.GetOptionalArg(argIdx)
|
||||||
pssLen := queuesSize
|
pssLen := *queues
|
||||||
if n := cgroup.AvailableCPUs(); pssLen > n {
|
if n := cgroup.AvailableCPUs(); pssLen > n {
|
||||||
// There is no sense in running more than availableCPUs concurrent pendingSeries,
|
// There is no sense in running more than availableCPUs concurrent pendingSeries,
|
||||||
// since every pendingSeries can saturate up to a single CPU.
|
// since every pendingSeries can saturate up to a single CPU.
|
||||||
@@ -983,17 +988,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
|||||||
tss = append(*v, tss...)
|
tss = append(*v, tss...)
|
||||||
}
|
}
|
||||||
tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput)
|
tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput)
|
||||||
} else if rwctx.streamAggrDropInput {
|
|
||||||
// if both keep_input and drop_input are true, we keep only the aggregated series
|
|
||||||
if rctx == nil {
|
|
||||||
rctx = getRelabelCtx()
|
|
||||||
// Make a copy of tss before dropping aggregated series
|
|
||||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
|
||||||
tss = append(*v, tss...)
|
|
||||||
}
|
|
||||||
tss = dropUnaggregatedSeries(tss, matchIdxs.B)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
matchIdxsPool.Put(matchIdxs)
|
matchIdxsPool.Put(matchIdxs)
|
||||||
}
|
}
|
||||||
if rwctx.deduplicator != nil {
|
if rwctx.deduplicator != nil {
|
||||||
@@ -1016,10 +1011,9 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
var matchIdxsPool slicesutil.BufferPool[uint32]
|
var matchIdxsPool bytesutil.ByteBufferPool
|
||||||
|
|
||||||
// dropAggregatedSeries drops matched series, also the unmatched if dropInput is true.
|
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []byte, dropInput bool) []prompb.TimeSeries {
|
||||||
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput bool) []prompb.TimeSeries {
|
|
||||||
dst := src[:0]
|
dst := src[:0]
|
||||||
if !dropInput {
|
if !dropInput {
|
||||||
for i, match := range matchIdxs {
|
for i, match := range matchIdxs {
|
||||||
@@ -1034,20 +1028,6 @@ func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput
|
|||||||
return dst
|
return dst
|
||||||
}
|
}
|
||||||
|
|
||||||
// dropUnaggregatedSeries drops unmatched series.
|
|
||||||
func dropUnaggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32) []prompb.TimeSeries {
|
|
||||||
dst := src[:0]
|
|
||||||
for i, match := range matchIdxs {
|
|
||||||
if match == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
dst = append(dst, src[i])
|
|
||||||
}
|
|
||||||
tail := src[len(dst):]
|
|
||||||
clear(tail)
|
|
||||||
return dst
|
|
||||||
}
|
|
||||||
|
|
||||||
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) {
|
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) {
|
||||||
if rwctx.tryPushTimeSeriesInternal(tss) {
|
if rwctx.tryPushTimeSeriesInternal(tss) {
|
||||||
return
|
return
|
||||||
@@ -1080,7 +1060,7 @@ func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries)
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
if len(labelsGlobal) > 0 {
|
if len(labelsGlobal) > 0 {
|
||||||
// Make a copy of tss before adding extra labels to prevent
|
// Make a copy of tss before adding extra labels in order to prevent
|
||||||
// from affecting time series for other remoteWrite.url configs.
|
// from affecting time series for other remoteWrite.url configs.
|
||||||
rctx = getRelabelCtx()
|
rctx = getRelabelCtx()
|
||||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||||
|
|||||||
@@ -10,8 +10,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||||
@@ -28,12 +26,12 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
|
|||||||
itemsCount := 1_000 * bucketsCount
|
itemsCount := 1_000 * bucketsCount
|
||||||
m := make([]int, bucketsCount)
|
m := make([]int, bucketsCount)
|
||||||
var labels []prompb.Label
|
var labels []prompb.Label
|
||||||
for i := range itemsCount {
|
for i := 0; i < itemsCount; i++ {
|
||||||
labels = append(labels[:0], prompb.Label{
|
labels = append(labels[:0], prompb.Label{
|
||||||
Name: "__name__",
|
Name: "__name__",
|
||||||
Value: fmt.Sprintf("some_name_%d", i),
|
Value: fmt.Sprintf("some_name_%d", i),
|
||||||
})
|
})
|
||||||
for j := range 10 {
|
for j := 0; j < 10; j++ {
|
||||||
labels = append(labels, prompb.Label{
|
labels = append(labels, prompb.Label{
|
||||||
Name: fmt.Sprintf("label_%d", j),
|
Name: fmt.Sprintf("label_%d", j),
|
||||||
Value: fmt.Sprintf("value_%d_%d", i, j),
|
Value: fmt.Sprintf("value_%d_%d", i, j),
|
||||||
@@ -59,8 +57,8 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
|
|||||||
f(10)
|
f(10)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
|
||||||
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string, expectedRowsPushedAfterRelabel, expectedPushedSample int) {
|
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
|
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -73,16 +71,10 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
|||||||
}
|
}
|
||||||
allRelabelConfigs.Store(rcs)
|
allRelabelConfigs.Store(rcs)
|
||||||
|
|
||||||
path := "fast-queue-write-test"
|
|
||||||
fs.MustRemoveDir(path)
|
|
||||||
fq := persistentqueue.MustOpenFastQueue(path, "test", 100, 0, false)
|
|
||||||
defer fs.MustRemoveDir(path)
|
|
||||||
defer fq.MustClose()
|
|
||||||
|
|
||||||
pss := make([]*pendingSeries, 1)
|
pss := make([]*pendingSeries, 1)
|
||||||
isVMProto := &atomic.Bool{}
|
isVMProto := &atomic.Bool{}
|
||||||
isVMProto.Store(true)
|
isVMProto.Store(true)
|
||||||
pss[0] = newPendingSeries(fq, isVMProto, 0, 100)
|
pss[0] = newPendingSeries(nil, isVMProto, 0, 100)
|
||||||
rwctx := &remoteWriteCtx{
|
rwctx := &remoteWriteCtx{
|
||||||
idx: 0,
|
idx: 0,
|
||||||
streamAggrKeepInput: keepInput,
|
streamAggrKeepInput: keepInput,
|
||||||
@@ -91,8 +83,6 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
|||||||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
|
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
|
||||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
|
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
|
||||||
}
|
}
|
||||||
defer metrics.UnregisterAllMetrics()
|
|
||||||
|
|
||||||
if dedupInterval > 0 {
|
if dedupInterval > 0 {
|
||||||
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
|
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
|
||||||
}
|
}
|
||||||
@@ -114,27 +104,23 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
|||||||
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
|
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
|
||||||
expectedTss := make([]prompb.TimeSeries, len(inputTss))
|
expectedTss := make([]prompb.TimeSeries, len(inputTss))
|
||||||
|
|
||||||
// check inputTss is not modified after TryPushTimeSeries
|
// copy inputTss to make sure it is not mutated during TryPush call
|
||||||
copy(expectedTss, inputTss)
|
copy(expectedTss, inputTss)
|
||||||
if !rwctx.TryPushTimeSeries(inputTss, false) {
|
if !rwctx.TryPushTimeSeries(inputTss, false) {
|
||||||
t.Fatalf("cannot push samples to rwctx")
|
t.Fatalf("cannot push samples to rwctx")
|
||||||
}
|
}
|
||||||
|
|
||||||
if int(rwctx.rowsPushedAfterRelabel.Get()) != expectedRowsPushedAfterRelabel {
|
|
||||||
t.Fatalf("unexpected number of rows after relabel; got %d; want %d", rwctx.rowsPushedAfterRelabel.Get(), expectedRowsPushedAfterRelabel)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(pss[0].wr.tss) != expectedPushedSample {
|
|
||||||
t.Fatalf("unexpected number of pushed samples; got %d; want %d", len(pss[0].wr.tss), expectedPushedSample)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !reflect.DeepEqual(expectedTss, inputTss) {
|
if !reflect.DeepEqual(expectedTss, inputTss) {
|
||||||
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
|
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// relabeling
|
f(`
|
||||||
f(``, `
|
- interval: 1m
|
||||||
|
outputs: [sum_samples]
|
||||||
|
- interval: 2m
|
||||||
|
outputs: [count_series]
|
||||||
|
`, `
|
||||||
- action: keep
|
- action: keep
|
||||||
source_labels: [env]
|
source_labels: [env]
|
||||||
regex: "dev"
|
regex: "dev"
|
||||||
@@ -143,66 +129,53 @@ metric{env="dev"} 10
|
|||||||
metric{env="bar"} 20
|
metric{env="bar"} 20
|
||||||
metric{env="dev"} 15
|
metric{env="dev"} 15
|
||||||
metric{env="bar"} 25
|
metric{env="bar"} 25
|
||||||
`, 2, 2)
|
`)
|
||||||
|
|
||||||
// relabeling + aggregation
|
|
||||||
f(`
|
|
||||||
- match: '{env="dev"}'
|
|
||||||
interval: 1m
|
|
||||||
outputs: [sum_samples]
|
|
||||||
`, `
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: ".*"
|
|
||||||
`, false, 0, false, false, `
|
|
||||||
metric{env="dev"} 10
|
|
||||||
metric{env="bar"} 20
|
|
||||||
metric{env="dev"} 15
|
|
||||||
metric{env="bar"} 25
|
|
||||||
`, 4, 2)
|
|
||||||
|
|
||||||
// aggregation + keepInput
|
|
||||||
f(`
|
|
||||||
- match: '{env="dev"}'
|
|
||||||
interval: 1m
|
|
||||||
outputs: [sum_samples]
|
|
||||||
`, ``, false, 0, true, false, `
|
|
||||||
metric{env="dev"} 10
|
|
||||||
metric{env="bar"} 20
|
|
||||||
metric{env="dev"} 15
|
|
||||||
metric{env="bar"} 25
|
|
||||||
`, 4, 4)
|
|
||||||
|
|
||||||
// aggregation + dropInput
|
|
||||||
f(`
|
|
||||||
- match: '{env="dev"}'
|
|
||||||
interval: 1m
|
|
||||||
outputs: [sum_samples]
|
|
||||||
`, ``, false, 0, false, true, `
|
|
||||||
metric{env="dev"} 10
|
|
||||||
metric{env="bar"} 20
|
|
||||||
metric{env="dev"} 15
|
|
||||||
metric{env="bar"} 25
|
|
||||||
`, 4, 0)
|
|
||||||
|
|
||||||
// aggregation + keepInput + dropInput
|
|
||||||
f(`
|
|
||||||
- match: '{env="dev"}'
|
|
||||||
interval: 1m
|
|
||||||
outputs: [sum_samples]
|
|
||||||
`, ``, false, 0, true, true, `
|
|
||||||
metric{env="dev"} 10
|
|
||||||
metric{env="bar"} 20
|
|
||||||
metric{env="bar"} 25
|
|
||||||
`, 3, 1)
|
|
||||||
|
|
||||||
// aggregation + deduplication
|
|
||||||
f(``, ``, true, time.Hour, false, false, `
|
f(``, ``, true, time.Hour, false, false, `
|
||||||
metric{env="dev"} 10
|
metric{env="dev"} 10
|
||||||
metric{env="foo"} 20
|
metric{env="foo"} 20
|
||||||
metric{env="dev"} 15
|
metric{env="dev"} 15
|
||||||
metric{env="foo"} 25
|
metric{env="foo"} 25
|
||||||
`, 4, 0)
|
`)
|
||||||
|
f(``, `
|
||||||
|
- action: keep
|
||||||
|
source_labels: [env]
|
||||||
|
regex: "dev"
|
||||||
|
`, true, time.Hour, false, false, `
|
||||||
|
metric{env="dev"} 10
|
||||||
|
metric{env="bar"} 20
|
||||||
|
metric{env="dev"} 15
|
||||||
|
metric{env="bar"} 25
|
||||||
|
`)
|
||||||
|
f(``, `
|
||||||
|
- action: keep
|
||||||
|
source_labels: [env]
|
||||||
|
regex: "dev"
|
||||||
|
`, true, time.Hour, true, false, `
|
||||||
|
metric{env="test"} 10
|
||||||
|
metric{env="dev"} 20
|
||||||
|
metric{env="foo"} 15
|
||||||
|
metric{env="dev"} 25
|
||||||
|
`)
|
||||||
|
f(``, `
|
||||||
|
- action: keep
|
||||||
|
source_labels: [env]
|
||||||
|
regex: "dev"
|
||||||
|
`, true, time.Hour, false, true, `
|
||||||
|
metric{env="foo"} 10
|
||||||
|
metric{env="dev"} 20
|
||||||
|
metric{env="foo"} 15
|
||||||
|
metric{env="dev"} 25
|
||||||
|
`)
|
||||||
|
f(``, `
|
||||||
|
- action: keep
|
||||||
|
source_labels: [env]
|
||||||
|
regex: "dev"
|
||||||
|
`, true, time.Hour, true, true, `
|
||||||
|
metric{env="dev"} 10
|
||||||
|
metric{env="test"} 20
|
||||||
|
metric{env="dev"} 15
|
||||||
|
metric{env="bar"} 25
|
||||||
|
`)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
||||||
@@ -248,7 +221,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
|||||||
seriesCount := 100000
|
seriesCount := 100000
|
||||||
// build 1000000 series
|
// build 1000000 series
|
||||||
tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
|
tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
|
||||||
for i := range seriesCount {
|
for i := 0; i < seriesCount; i++ {
|
||||||
tssBlock = append(tssBlock, prompb.TimeSeries{
|
tssBlock = append(tssBlock, prompb.TimeSeries{
|
||||||
Labels: []prompb.Label{
|
Labels: []prompb.Label{
|
||||||
{
|
{
|
||||||
@@ -269,7 +242,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
|||||||
// build active time series set
|
// build active time series set
|
||||||
nodes := make([]string, 0, remoteWriteCount)
|
nodes := make([]string, 0, remoteWriteCount)
|
||||||
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
|
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
|
||||||
for i := range remoteWriteCount {
|
for i := 0; i < remoteWriteCount; i++ {
|
||||||
nodes = append(nodes, fmt.Sprintf("node%d", i))
|
nodes = append(nodes, fmt.Sprintf("node%d", i))
|
||||||
activeTimeSeriesByNodes[i] = make(map[string]struct{})
|
activeTimeSeriesByNodes[i] = make(map[string]struct{})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,12 +18,12 @@ var (
|
|||||||
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
|
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
|
||||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
||||||
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
|
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
|
||||||
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples that match any rule in "+
|
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation "+
|
||||||
"-streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
|
"with -streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||||
"are written to the remote storage. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
"are written to remote storages write. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||||
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop input samples that not matching any rule in "+
|
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation "+
|
||||||
"-streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
|
"with -remoteWrite.streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||||
"are written to the remote storage. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
"are written to remote storages write. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||||
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
|
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
|
||||||
"aggregator before optional aggregation with -streamAggr.config . "+
|
"aggregator before optional aggregation with -streamAggr.config . "+
|
||||||
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
||||||
@@ -43,11 +43,11 @@ var (
|
|||||||
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
|
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
|
||||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
||||||
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
|
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
|
||||||
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop input samples that not matching any rule in "+
|
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop all the input samples after the aggregation "+
|
||||||
"the corresponding -remoteWrite.streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
|
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||||
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples that match any rule in "+
|
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep all the input samples after the aggregation "+
|
||||||
"the corresponding -remoteWrite.streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
|
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||||
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
|
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
|
||||||
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
||||||
|
|||||||
@@ -1,80 +0,0 @@
|
|||||||
package zabbixconnector
|
|
||||||
|
|
||||||
import (
|
|
||||||
"net/http"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector/stream"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
|
||||||
"github.com/VictoriaMetrics/metrics"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="zabbixconnector"}`)
|
|
||||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="zabbixconnector"}`)
|
|
||||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="zabbixconnector"}`)
|
|
||||||
)
|
|
||||||
|
|
||||||
// InsertHandlerForHTTP processes remote write for ZabbixConnector POST /zabbixconnector/v1/history request.
|
|
||||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
|
||||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
encoding := req.Header.Get("Content-Encoding")
|
|
||||||
return stream.Parse(req.Body, encoding, func(rows []zabbixconnector.Row) error {
|
|
||||||
return insertRows(at, rows, extraLabels)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func insertRows(at *auth.Token, rows []zabbixconnector.Row, extraLabels []prompb.Label) error {
|
|
||||||
ctx := common.GetPushCtx()
|
|
||||||
defer common.PutPushCtx(ctx)
|
|
||||||
|
|
||||||
rowsTotal := len(rows)
|
|
||||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
|
||||||
labels := ctx.Labels[:0]
|
|
||||||
samples := ctx.Samples[:0]
|
|
||||||
for i := range rows {
|
|
||||||
r := &rows[i]
|
|
||||||
|
|
||||||
labelsLen := len(labels)
|
|
||||||
for j := range r.Tags {
|
|
||||||
tag := &r.Tags[j]
|
|
||||||
labels = append(labels, prompb.Label{
|
|
||||||
Name: bytesutil.ToUnsafeString(tag.Key),
|
|
||||||
Value: bytesutil.ToUnsafeString(tag.Value),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
labels = append(labels, extraLabels...)
|
|
||||||
|
|
||||||
samplesLen := len(samples)
|
|
||||||
samples = append(samples, prompb.Sample{
|
|
||||||
Value: r.Value,
|
|
||||||
Timestamp: r.Timestamp,
|
|
||||||
})
|
|
||||||
|
|
||||||
tssDst = append(tssDst, prompb.TimeSeries{
|
|
||||||
Labels: labels[labelsLen:],
|
|
||||||
Samples: samples[samplesLen:],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
ctx.WriteRequest.Timeseries = tssDst
|
|
||||||
ctx.Labels = labels
|
|
||||||
ctx.Samples = samples
|
|
||||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
|
||||||
return remotewrite.ErrQueueFullHTTPRetry
|
|
||||||
}
|
|
||||||
rowsInserted.Add(rowsTotal)
|
|
||||||
if at != nil {
|
|
||||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
|
||||||
}
|
|
||||||
rowsPerInsert.Update(float64(rowsTotal))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -27,9 +27,6 @@ vmalert-tool-linux-ppc64le-prod:
|
|||||||
vmalert-tool-linux-386-prod:
|
vmalert-tool-linux-386-prod:
|
||||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
|
||||||
|
|
||||||
vmalert-tool-linux-s390x-prod:
|
|
||||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-s390x
|
|
||||||
|
|
||||||
vmalert-tool-darwin-amd64-prod:
|
vmalert-tool-darwin-amd64-prod:
|
||||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64
|
||||||
|
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ func TestParseInputValue_Success(t *testing.T) {
|
|||||||
if len(outputExpected) != len(output) {
|
if len(outputExpected) != len(output) {
|
||||||
t.Fatalf("unexpected output length; got %d; want %d", len(outputExpected), len(output))
|
t.Fatalf("unexpected output length; got %d; want %d", len(outputExpected), len(output))
|
||||||
}
|
}
|
||||||
for i := range outputExpected {
|
for i := 0; i < len(outputExpected); i++ {
|
||||||
if outputExpected[i].Omitted != output[i].Omitted {
|
if outputExpected[i].Omitted != output[i].Omitted {
|
||||||
t.Fatalf("unexpected Omitted field in the output\ngot\n%v\nwant\n%v", output, outputExpected)
|
t.Fatalf("unexpected Omitted field in the output\ngot\n%v\nwant\n%v", output, outputExpected)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"maps"
|
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
@@ -13,7 +12,6 @@ import (
|
|||||||
"os/signal"
|
"os/signal"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
"slices"
|
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
@@ -36,7 +34,6 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||||
"github.com/VictoriaMetrics/metrics"
|
"github.com/VictoriaMetrics/metrics"
|
||||||
)
|
)
|
||||||
@@ -87,8 +84,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
|
|||||||
defer server.Close()
|
defer server.Close()
|
||||||
} else {
|
} else {
|
||||||
httpListenAddr = httpListenPort
|
httpListenAddr = httpListenPort
|
||||||
|
ln, err := net.Listen("tcp", fmt.Sprintf(":%s", httpListenPort))
|
||||||
ln, err := net.Listen(netutil.GetTCPNetwork(), fmt.Sprintf(":%s", httpListenPort))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Fatalf("cannot listen on port %s: %v", httpListenPort, err)
|
logger.Fatalf("cannot listen on port %s: %v", httpListenPort, err)
|
||||||
}
|
}
|
||||||
@@ -134,7 +130,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
|
|||||||
}
|
}
|
||||||
labels[s[:n]] = s[n+1:]
|
labels[s[:n]] = s[n+1:]
|
||||||
}
|
}
|
||||||
err = notifier.Init(labels, externalURL)
|
_, err = notifier.Init(nil, labels, externalURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Fatalf("failed to init notifier: %v", err)
|
logger.Fatalf("failed to init notifier: %v", err)
|
||||||
}
|
}
|
||||||
@@ -350,7 +346,9 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
|||||||
for k := range alertEvalTimesMap {
|
for k := range alertEvalTimesMap {
|
||||||
alertEvalTimes = append(alertEvalTimes, k)
|
alertEvalTimes = append(alertEvalTimes, k)
|
||||||
}
|
}
|
||||||
slices.Sort(alertEvalTimes)
|
sort.Slice(alertEvalTimes, func(i, j int) bool {
|
||||||
|
return alertEvalTimes[i] < alertEvalTimes[j]
|
||||||
|
})
|
||||||
|
|
||||||
// sort group eval order according to the given "group_eval_order".
|
// sort group eval order according to the given "group_eval_order".
|
||||||
sort.Slice(testGroups, func(i, j int) bool {
|
sort.Slice(testGroups, func(i, j int) bool {
|
||||||
@@ -361,8 +359,12 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
|||||||
var groups []*rule.Group
|
var groups []*rule.Group
|
||||||
for _, group := range testGroups {
|
for _, group := range testGroups {
|
||||||
mergedExternalLabels := make(map[string]string)
|
mergedExternalLabels := make(map[string]string)
|
||||||
maps.Copy(mergedExternalLabels, tg.ExternalLabels)
|
for k, v := range tg.ExternalLabels {
|
||||||
maps.Copy(mergedExternalLabels, externalLabels)
|
mergedExternalLabels[k] = v
|
||||||
|
}
|
||||||
|
for k, v := range externalLabels {
|
||||||
|
mergedExternalLabels[k] = v
|
||||||
|
}
|
||||||
ng := rule.NewGroup(group, q, time.Minute, mergedExternalLabels)
|
ng := rule.NewGroup(group, q, time.Minute, mergedExternalLabels)
|
||||||
ng.Init()
|
ng.Init()
|
||||||
groups = append(groups, ng)
|
groups = append(groups, ng)
|
||||||
@@ -375,7 +377,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
|||||||
if len(g.Rules) == 0 {
|
if len(g.Rules) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
errs := g.ExecOnce(context.Background(), rw, ts)
|
errs := g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, rw, ts)
|
||||||
for err := range errs {
|
for err := range errs {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,
|
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,
|
||||||
|
|||||||
@@ -27,9 +27,6 @@ vmalert-linux-ppc64le-prod:
|
|||||||
vmalert-linux-386-prod:
|
vmalert-linux-386-prod:
|
||||||
APP_NAME=vmalert $(MAKE) app-via-docker-linux-386
|
APP_NAME=vmalert $(MAKE) app-via-docker-linux-386
|
||||||
|
|
||||||
vmalert-linux-s390x-prod:
|
|
||||||
APP_NAME=vmalert $(MAKE) app-via-docker-linux-s390x
|
|
||||||
|
|
||||||
vmalert-darwin-amd64-prod:
|
vmalert-darwin-amd64-prod:
|
||||||
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64
|
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ type Group struct {
|
|||||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
||||||
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
|
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
|
||||||
Limit *int `yaml:"limit,omitempty"`
|
Limit int `yaml:"limit,omitempty"`
|
||||||
Rules []Rule `yaml:"rules"`
|
Rules []Rule `yaml:"rules"`
|
||||||
Concurrency int `yaml:"concurrency"`
|
Concurrency int `yaml:"concurrency"`
|
||||||
// Labels is a set of label value pairs, that will be added to every rule.
|
// Labels is a set of label value pairs, that will be added to every rule.
|
||||||
@@ -91,8 +91,8 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
|
|||||||
if g.EvalOffset != nil && g.EvalDelay != nil {
|
if g.EvalOffset != nil && g.EvalDelay != nil {
|
||||||
return fmt.Errorf("eval_offset cannot be used with eval_delay")
|
return fmt.Errorf("eval_offset cannot be used with eval_delay")
|
||||||
}
|
}
|
||||||
if g.Limit != nil && *g.Limit < 0 {
|
if g.Limit < 0 {
|
||||||
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", *g.Limit)
|
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
|
||||||
}
|
}
|
||||||
if g.Concurrency < 0 {
|
if g.Concurrency < 0 {
|
||||||
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)
|
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)
|
||||||
|
|||||||
@@ -116,7 +116,7 @@ func TestParse_Failure(t *testing.T) {
|
|||||||
|
|
||||||
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
|
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
|
||||||
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
|
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
|
||||||
f([]string{"testdata/dir/rules0-bad.rules"}, "invalid annotations")
|
f([]string{"testdata/dir/rules0-bad.rules"}, "error parsing annotation")
|
||||||
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
|
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
|
||||||
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
|
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
|
||||||
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
|
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
|
||||||
@@ -181,10 +181,9 @@ func TestGroupValidate_Failure(t *testing.T) {
|
|||||||
EvalOffset: promutil.NewDuration(2 * time.Minute),
|
EvalOffset: promutil.NewDuration(2 * time.Minute),
|
||||||
}, false, "eval_offset should be smaller than interval")
|
}, false, "eval_offset should be smaller than interval")
|
||||||
|
|
||||||
limit := -1
|
|
||||||
f(&Group{
|
f(&Group{
|
||||||
Name: "wrong limit",
|
Name: "wrong limit",
|
||||||
Limit: &limit,
|
Limit: -1,
|
||||||
}, false, "invalid limit")
|
}, false, "invalid limit")
|
||||||
|
|
||||||
f(&Group{
|
f(&Group{
|
||||||
@@ -343,6 +342,7 @@ func TestGroupValidate_Failure(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}, true, "bad prometheus expr")
|
}, true, "bad prometheus expr")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGroupValidate_Success(t *testing.T) {
|
func TestGroupValidate_Success(t *testing.T) {
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package config
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"slices"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
|
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
|
||||||
@@ -77,12 +76,13 @@ func (t *Type) ValidateExpr(expr string) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
|
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
|
||||||
}
|
}
|
||||||
labels, err := q.GetStatsLabels()
|
fields, _ := q.GetStatsByFields()
|
||||||
if err != nil {
|
for i := range fields {
|
||||||
return fmt.Errorf("cannot obtain labels from LogsQL expr: %q, err: %w", expr, err)
|
// VictoriaLogs inserts `_time` field as a label in result when query with `stats by (_time:step)`,
|
||||||
}
|
// making the result meaningless and may lead to cardinality issues.
|
||||||
if slices.Contains(labels, "_time") {
|
if fields[i] == "_time" {
|
||||||
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
|
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("unknown datasource type=%q", t.Name)
|
return fmt.Errorf("unknown datasource type=%q", t.Name)
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"maps"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -92,7 +91,9 @@ func (c *Client) Clone() *Client {
|
|||||||
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
|
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
|
||||||
copy(ns.extraHeaders, c.extraHeaders)
|
copy(ns.extraHeaders, c.extraHeaders)
|
||||||
}
|
}
|
||||||
maps.Copy(ns.extraParams, c.extraParams)
|
for k, v := range c.extraParams {
|
||||||
|
ns.extraParams[k] = v
|
||||||
|
}
|
||||||
|
|
||||||
return ns
|
return ns
|
||||||
}
|
}
|
||||||
@@ -172,26 +173,22 @@ func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result,
|
|||||||
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
defer func() { _ = resp.Body.Close() }()
|
|
||||||
|
|
||||||
// Process the received response.
|
// Process the received response.
|
||||||
var parseFn func(resp *http.Response) (Result, error)
|
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
|
||||||
switch c.dataSourceType {
|
switch c.dataSourceType {
|
||||||
case datasourcePrometheus:
|
case datasourcePrometheus:
|
||||||
parseFn = parsePrometheusInstantResponse
|
parseFn = parsePrometheusResponse
|
||||||
case datasourceGraphite:
|
case datasourceGraphite:
|
||||||
parseFn = parseGraphiteResponse
|
parseFn = parseGraphiteResponse
|
||||||
case datasourceVLogs:
|
case datasourceVLogs:
|
||||||
parseFn = parseVLogsInstantResponse
|
parseFn = parseVLogsResponse
|
||||||
default:
|
default:
|
||||||
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
|
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
|
||||||
}
|
}
|
||||||
|
result, err := parseFn(req, resp)
|
||||||
result, err := parseFn(resp)
|
_ = resp.Body.Close()
|
||||||
if err != nil {
|
return result, req, err
|
||||||
return Result{}, nil, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
|
|
||||||
}
|
|
||||||
return result, req, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// QueryRange executes the given query on the given time range.
|
// QueryRange executes the given query on the given time range.
|
||||||
@@ -232,23 +229,19 @@ func (c *Client) QueryRange(ctx context.Context, query string, start, end time.T
|
|||||||
return res, fmt.Errorf("second attempt: %w", err)
|
return res, fmt.Errorf("second attempt: %w", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
defer func() { _ = resp.Body.Close() }()
|
|
||||||
|
|
||||||
// Process the received response.
|
// Process the received response.
|
||||||
var parseFn func(resp *http.Response) (Result, error)
|
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
|
||||||
switch c.dataSourceType {
|
switch c.dataSourceType {
|
||||||
case datasourcePrometheus:
|
case datasourcePrometheus:
|
||||||
parseFn = parsePrometheusRangeResponse
|
parseFn = parsePrometheusResponse
|
||||||
case datasourceVLogs:
|
case datasourceVLogs:
|
||||||
parseFn = parseVLogsRangeResponse
|
parseFn = parseVLogsResponse
|
||||||
default:
|
default:
|
||||||
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
|
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
|
||||||
}
|
}
|
||||||
|
res, err = parseFn(req, resp)
|
||||||
res, err = parseFn(resp)
|
_ = resp.Body.Close()
|
||||||
if err != nil {
|
|
||||||
return Result{}, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
|
|
||||||
}
|
|
||||||
return res, err
|
return res, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -33,10 +33,10 @@ func (r graphiteResponse) metrics() []Metric {
|
|||||||
return ms
|
return ms
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseGraphiteResponse(resp *http.Response) (Result, error) {
|
func parseGraphiteResponse(req *http.Request, resp *http.Response) (Result, error) {
|
||||||
r := &graphiteResponse{}
|
r := &graphiteResponse{}
|
||||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||||
return Result{}, fmt.Errorf("error parsing graphite metrics: %w", err)
|
return Result{}, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
|
||||||
}
|
}
|
||||||
return Result{Data: r.metrics()}, nil
|
return Result{Data: r.metrics()}, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ type promResponse struct {
|
|||||||
// Stats supported by VictoriaMetrics since v1.90
|
// Stats supported by VictoriaMetrics since v1.90
|
||||||
Stats struct {
|
Stats struct {
|
||||||
SeriesFetched *string `json:"seriesFetched,omitempty"`
|
SeriesFetched *string `json:"seriesFetched,omitempty"`
|
||||||
} `json:"stats"`
|
} `json:"stats,omitempty"`
|
||||||
// IsPartial supported by VictoriaMetrics
|
// IsPartial supported by VictoriaMetrics
|
||||||
IsPartial *bool `json:"isPartial,omitempty"`
|
IsPartial *bool `json:"isPartial,omitempty"`
|
||||||
}
|
}
|
||||||
@@ -172,26 +172,17 @@ const (
|
|||||||
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
|
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
|
||||||
)
|
)
|
||||||
|
|
||||||
func parsePromResponse(resp *http.Response) (*promResponse, error) {
|
func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
|
||||||
r := &promResponse{}
|
r := &promResponse{}
|
||||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
return res, fmt.Errorf("error parsing response from %s: %w", req.URL.Redacted(), err)
|
||||||
}
|
}
|
||||||
if r.Status == statusError {
|
if r.Status == statusError {
|
||||||
return nil, fmt.Errorf("response error %q: %s", r.ErrorType, r.Error)
|
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
|
||||||
}
|
}
|
||||||
if r.Status != statusSuccess {
|
if r.Status != statusSuccess {
|
||||||
return nil, fmt.Errorf("unknown response status %q", r.Status)
|
return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
|
||||||
}
|
}
|
||||||
return r, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error) {
|
|
||||||
r, err := parsePromResponse(resp)
|
|
||||||
if err != nil {
|
|
||||||
return res, fmt.Errorf("failed to parse response: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var parseFn func() ([]Metric, error)
|
var parseFn func() ([]Metric, error)
|
||||||
switch r.Data.ResultType {
|
switch r.Data.ResultType {
|
||||||
case rtVector:
|
case rtVector:
|
||||||
@@ -200,6 +191,12 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
|
|||||||
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
|
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
|
||||||
}
|
}
|
||||||
parseFn = pi.metrics
|
parseFn = pi.metrics
|
||||||
|
case rtMatrix:
|
||||||
|
var pr promRange
|
||||||
|
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
parseFn = pr.metrics
|
||||||
case rScalar:
|
case rScalar:
|
||||||
var ps promScalar
|
var ps promScalar
|
||||||
if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
|
if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
|
||||||
@@ -209,6 +206,7 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
|
|||||||
default:
|
default:
|
||||||
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
|
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
|
||||||
}
|
}
|
||||||
|
|
||||||
ms, err := parseFn()
|
ms, err := parseFn()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return res, err
|
return res, err
|
||||||
@@ -224,34 +222,6 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
|
|||||||
return res, nil
|
return res, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func parsePrometheusRangeResponse(resp *http.Response) (res Result, err error) {
|
|
||||||
r, err := parsePromResponse(resp)
|
|
||||||
if err != nil {
|
|
||||||
return res, fmt.Errorf("failed to parse response: %w", err)
|
|
||||||
}
|
|
||||||
if r.Data.ResultType != rtMatrix {
|
|
||||||
return res, fmt.Errorf("unexpected result type %q; expected result type %q", r.Data.ResultType, rtMatrix)
|
|
||||||
}
|
|
||||||
|
|
||||||
var pr promRange
|
|
||||||
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
|
|
||||||
return res, err
|
|
||||||
}
|
|
||||||
ms, err := pr.metrics()
|
|
||||||
if err != nil {
|
|
||||||
return res, err
|
|
||||||
}
|
|
||||||
res = Result{Data: ms, IsPartial: r.IsPartial}
|
|
||||||
if r.Stats.SeriesFetched != nil {
|
|
||||||
intV, err := strconv.Atoi(*r.Stats.SeriesFetched)
|
|
||||||
if err != nil {
|
|
||||||
return res, fmt.Errorf("failed to convert stats.seriesFetched to int: %w", err)
|
|
||||||
}
|
|
||||||
res.SeriesFetched = &intV
|
|
||||||
}
|
|
||||||
return res, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
|
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
|
||||||
if c.appendTypePrefix {
|
if c.appendTypePrefix {
|
||||||
r.URL.Path += "/prometheus"
|
r.URL.Path += "/prometheus"
|
||||||
|
|||||||
@@ -65,23 +65,21 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
case 3:
|
case 3:
|
||||||
w.Write([]byte(`{"status":"unknown"}`))
|
w.Write([]byte(`{"status":"unknown"}`))
|
||||||
case 4:
|
case 4:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector"}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
|
||||||
case 5:
|
case 5:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
|
|
||||||
case 6:
|
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||||
case 7:
|
case 6:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||||
case 8:
|
case 7:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
|
||||||
case 9:
|
case 8:
|
||||||
w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) {
|
mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
c++
|
c++
|
||||||
switch c {
|
switch c {
|
||||||
case 10:
|
case 9:
|
||||||
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -104,9 +102,9 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
|
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
|
||||||
}
|
}
|
||||||
switch c {
|
switch c {
|
||||||
case 11:
|
case 10:
|
||||||
w.Write([]byte("[]"))
|
w.Write([]byte("[]"))
|
||||||
case 12:
|
case 11:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -125,7 +123,6 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
ts := time.Now()
|
ts := time.Now()
|
||||||
|
|
||||||
expErr := func(query, err string) {
|
expErr := func(query, err string) {
|
||||||
t.Helper()
|
|
||||||
_, _, gotErr := pq.Query(ctx, query, ts)
|
_, _, gotErr := pq.Query(ctx, query, ts)
|
||||||
if gotErr == nil {
|
if gotErr == nil {
|
||||||
t.Fatalf("expected %q got nil", err)
|
t.Fatalf("expected %q got nil", err)
|
||||||
@@ -138,11 +135,10 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
expErr(vmQuery, "500") // 0
|
expErr(vmQuery, "500") // 0
|
||||||
expErr(vmQuery, "error parsing response") // 1
|
expErr(vmQuery, "error parsing response") // 1
|
||||||
expErr(vmQuery, "response error") // 2
|
expErr(vmQuery, "response error") // 2
|
||||||
expErr(vmQuery, "unknown response status") // 3
|
expErr(vmQuery, "unknown status") // 3
|
||||||
expErr(vmQuery, "unexpected end of JSON input") // 4
|
expErr(vmQuery, "unexpected end of JSON input") // 4
|
||||||
expErr(vmQuery, "unknown result type") // 5
|
|
||||||
|
|
||||||
res, _, err := pq.Query(ctx, vmQuery, ts) // 6 - vector
|
res, _, err := pq.Query(ctx, vmQuery, ts) // 5 - vector
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
@@ -163,7 +159,7 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
}
|
}
|
||||||
metricsEqual(t, res.Data, expected)
|
metricsEqual(t, res.Data, expected)
|
||||||
|
|
||||||
res, req, err := pq.Query(ctx, vmQuery, ts) // 7 - scalar
|
res, req, err := pq.Query(ctx, vmQuery, ts) // 6 - scalar
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
@@ -188,7 +184,7 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
res.SeriesFetched)
|
res.SeriesFetched)
|
||||||
}
|
}
|
||||||
|
|
||||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 8 - scalar with stats
|
res, _, err = pq.Query(ctx, vmQuery, ts) // 7 - scalar with stats
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
@@ -209,7 +205,7 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
*res.SeriesFetched)
|
*res.SeriesFetched)
|
||||||
}
|
}
|
||||||
|
|
||||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 9
|
res, _, err = pq.Query(ctx, vmQuery, ts) // 8
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
@@ -220,7 +216,7 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
// test graphite
|
// test graphite
|
||||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||||
|
|
||||||
res, _, err = gq.Query(ctx, queryRender, ts) // 10 - graphite
|
res, _, err = gq.Query(ctx, queryRender, ts) // 9 - graphite
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
@@ -240,9 +236,9 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
vlogs := datasourceVLogs
|
vlogs := datasourceVLogs
|
||||||
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
|
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
|
||||||
|
|
||||||
expErr(vlogsQuery, "error parsing response") // 11
|
expErr(vlogsQuery, "error parsing response") // 10
|
||||||
|
|
||||||
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 12
|
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 11
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
@@ -394,8 +390,6 @@ func TestVMRangeQuery(t *testing.T) {
|
|||||||
switch c {
|
switch c {
|
||||||
case 0:
|
case 0:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
|
||||||
case 1:
|
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[1583786142, "1"]}}`))
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
|
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
|
||||||
@@ -428,7 +422,7 @@ func TestVMRangeQuery(t *testing.T) {
|
|||||||
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
|
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
|
||||||
}
|
}
|
||||||
switch c {
|
switch c {
|
||||||
case 2:
|
case 1:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -452,13 +446,13 @@ func TestVMRangeQuery(t *testing.T) {
|
|||||||
|
|
||||||
start, end := time.Now().Add(-time.Minute), time.Now()
|
start, end := time.Now().Add(-time.Minute), time.Now()
|
||||||
|
|
||||||
res, err := pq.QueryRange(ctx, vmQuery, start, end) // case 0
|
res, err := pq.QueryRange(ctx, vmQuery, start, end)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
m := res.Data
|
m := res.Data
|
||||||
if len(m) != 1 {
|
if len(m) != 1 {
|
||||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||||
}
|
}
|
||||||
expected := Metric{
|
expected := Metric{
|
||||||
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}},
|
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}},
|
||||||
@@ -469,9 +463,6 @@ func TestVMRangeQuery(t *testing.T) {
|
|||||||
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = pq.QueryRange(ctx, vmQuery, start, end) // case 1
|
|
||||||
expectError(t, err, "unexpected result type")
|
|
||||||
|
|
||||||
// test unsupported graphite
|
// test unsupported graphite
|
||||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||||
|
|
||||||
|
|||||||
@@ -40,28 +40,8 @@ func (c *Client) setVLogsRangeReqParams(r *http.Request, query string, start, en
|
|||||||
c.setReqParams(r, query)
|
c.setReqParams(r, query)
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseVLogsInstantResponse(resp *http.Response) (res Result, err error) {
|
func parseVLogsResponse(req *http.Request, resp *http.Response) (res Result, err error) {
|
||||||
res, err = parsePrometheusInstantResponse(resp)
|
res, err = parsePrometheusResponse(req, resp)
|
||||||
if err != nil {
|
|
||||||
return Result{}, err
|
|
||||||
}
|
|
||||||
for i := range res.Data {
|
|
||||||
m := &res.Data[i]
|
|
||||||
for j := range m.Labels {
|
|
||||||
// reserve the stats func result name with a new label `stats_result` instead of dropping it,
|
|
||||||
// since there could be multiple stats results in a single query, for instance:
|
|
||||||
// _time:5m | stats quantile(0.5, request_duration_seconds) p50, quantile(0.9, request_duration_seconds) p90
|
|
||||||
if m.Labels[j].Name == "__name__" {
|
|
||||||
m.Labels[j].Name = "stats_result"
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseVLogsRangeResponse(resp *http.Response) (res Result, err error) {
|
|
||||||
res, err = parsePrometheusRangeResponse(resp)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Result{}, err
|
return Result{}, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -132,9 +132,12 @@ func (ls Labels) String() string {
|
|||||||
// a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1
|
// a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1
|
||||||
// a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0
|
// a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0
|
||||||
func LabelCompare(a, b Labels) int {
|
func LabelCompare(a, b Labels) int {
|
||||||
l := min(len(b), len(a))
|
l := len(a)
|
||||||
|
if len(b) < l {
|
||||||
|
l = len(b)
|
||||||
|
}
|
||||||
|
|
||||||
for i := range l {
|
for i := 0; i < l; i++ {
|
||||||
if a[i].Name != b[i].Name {
|
if a[i].Name != b[i].Name {
|
||||||
if a[i].Name < b[i].Name {
|
if a[i].Name < b[i].Name {
|
||||||
return -1
|
return -1
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ func BenchmarkPromInstantUnmarshal(b *testing.B) {
|
|||||||
|
|
||||||
// BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op
|
// BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op
|
||||||
b.Run("Instant std+fastjson", func(b *testing.B) {
|
b.Run("Instant std+fastjson", func(b *testing.B) {
|
||||||
for range b.N {
|
for i := 0; i < b.N; i++ {
|
||||||
var pi promInstant
|
var pi promInstant
|
||||||
err = pi.Unmarshal(data)
|
err = pi.Unmarshal(data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -76,12 +77,15 @@ absolute path to all .tpl files in root.
|
|||||||
`Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+
|
`Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+
|
||||||
`If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
|
`If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
|
||||||
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
|
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
|
||||||
"In case of conflicts, original labels are kept with prefix 'exported_'.")
|
"In case of conflicts, original labels are kept with prefix `exported_`.")
|
||||||
|
|
||||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
|
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
|
||||||
)
|
)
|
||||||
|
|
||||||
var extURL *url.URL
|
var (
|
||||||
|
alertURLGeneratorFn notifier.AlertURLGenerator
|
||||||
|
extURL *url.URL
|
||||||
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||||
@@ -117,7 +121,7 @@ func main() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
err = notifier.InitAlertURLGeneratorFn(extURL, *externalAlertSource, *validateTemplates)
|
alertURLGeneratorFn, err = getAlertURLGenerator(extURL, *externalAlertSource, *validateTemplates)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Fatalf("failed to init `external.alert.source`: %s", err)
|
logger.Fatalf("failed to init `external.alert.source`: %s", err)
|
||||||
}
|
}
|
||||||
@@ -159,7 +163,7 @@ func main() {
|
|||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
manager, err := newManager(ctx)
|
manager, err := newManager(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Fatalf("failed to create manager: %s", err)
|
logger.Fatalf("failed to init: %s", err)
|
||||||
}
|
}
|
||||||
logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";"))
|
logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";"))
|
||||||
groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)
|
groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)
|
||||||
@@ -224,13 +228,14 @@ func newManager(ctx context.Context) (*manager, error) {
|
|||||||
labels[s[:n]] = s[n+1:]
|
labels[s[:n]] = s[n+1:]
|
||||||
}
|
}
|
||||||
|
|
||||||
err = notifier.Init(labels, *externalURL)
|
nts, err := notifier.Init(alertURLGeneratorFn, labels, *externalURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to init notifier: %w", err)
|
return nil, fmt.Errorf("failed to init notifier: %w", err)
|
||||||
}
|
}
|
||||||
manager := &manager{
|
manager := &manager{
|
||||||
groups: make(map[uint64]*rule.Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
querierBuilder: q,
|
querierBuilder: q,
|
||||||
|
notifiers: nts,
|
||||||
labels: labels,
|
labels: labels,
|
||||||
}
|
}
|
||||||
rw, err := remotewrite.Init(ctx)
|
rw, err := remotewrite.Init(ctx)
|
||||||
@@ -287,6 +292,35 @@ func getHostnameAsExternalURL(addr string, isSecure bool) (*url.URL, error) {
|
|||||||
return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
|
return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, validateTemplate bool) (notifier.AlertURLGenerator, error) {
|
||||||
|
if externalAlertSource == "" {
|
||||||
|
return func(a notifier.Alert) string {
|
||||||
|
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
|
||||||
|
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, paramGroupID, gID, paramAlertID, aID)
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
if validateTemplate {
|
||||||
|
if err := notifier.ValidateTemplates(map[string]string{
|
||||||
|
"tpl": externalAlertSource,
|
||||||
|
}); err != nil {
|
||||||
|
return nil, fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m := map[string]string{
|
||||||
|
"tpl": externalAlertSource,
|
||||||
|
}
|
||||||
|
return func(alert notifier.Alert) string {
|
||||||
|
qFn := func(_ string) ([]datasource.Metric, error) {
|
||||||
|
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
|
||||||
|
}
|
||||||
|
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("cannot template alert source: %s", err)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
func usage() {
|
func usage() {
|
||||||
const s = `
|
const s = `
|
||||||
vmalert processes alerts and recording rules.
|
vmalert processes alerts and recording rules.
|
||||||
|
|||||||
@@ -49,6 +49,30 @@ func TestGetExternalURL(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetAlertURLGenerator(t *testing.T) {
|
||||||
|
testAlert := notifier.Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
|
||||||
|
u, _ := url.Parse("https://victoriametrics.com/path")
|
||||||
|
fn, err := getAlertURLGenerator(u, "", false)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error %s", err)
|
||||||
|
}
|
||||||
|
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", paramGroupID, paramAlertID)
|
||||||
|
if exp != fn(testAlert) {
|
||||||
|
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
|
||||||
|
}
|
||||||
|
_, err = getAlertURLGenerator(nil, "foo?{{invalid}}", true)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected template validation error got nil")
|
||||||
|
}
|
||||||
|
fn, err = getAlertURLGenerator(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error %s", err)
|
||||||
|
}
|
||||||
|
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != fn(testAlert) {
|
||||||
|
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestConfigReload(t *testing.T) {
|
func TestConfigReload(t *testing.T) {
|
||||||
originalRulePath := *rulePath
|
originalRulePath := *rulePath
|
||||||
originalExternalURL := extURL
|
originalExternalURL := extURL
|
||||||
@@ -96,10 +120,9 @@ groups:
|
|||||||
querierBuilder: &datasource.FakeQuerier{},
|
querierBuilder: &datasource.FakeQuerier{},
|
||||||
groups: make(map[uint64]*rule.Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
labels: map[string]string{},
|
labels: map[string]string{},
|
||||||
|
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||||
rw: &remotewrite.Client{},
|
rw: &remotewrite.Client{},
|
||||||
}
|
}
|
||||||
_, cleanup := notifier.InitFakeNotifier()
|
|
||||||
defer cleanup()
|
|
||||||
|
|
||||||
syncCh := make(chan struct{})
|
syncCh := make(chan struct{})
|
||||||
sighupCh := procutil.NewSighupChan()
|
sighupCh := procutil.NewSighupChan()
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||||
@@ -17,6 +16,7 @@ import (
|
|||||||
// manager controls group states
|
// manager controls group states
|
||||||
type manager struct {
|
type manager struct {
|
||||||
querierBuilder datasource.QuerierBuilder
|
querierBuilder datasource.QuerierBuilder
|
||||||
|
notifiers func() []notifier.Notifier
|
||||||
|
|
||||||
rw remotewrite.RWClient
|
rw remotewrite.RWClient
|
||||||
// remote read builder.
|
// remote read builder.
|
||||||
@@ -29,8 +29,25 @@ type manager struct {
|
|||||||
groups map[uint64]*rule.Group
|
groups map[uint64]*rule.Group
|
||||||
}
|
}
|
||||||
|
|
||||||
// groupAPI generates apiGroup object from group by its ID(hash)
|
// ruleAPI generates apiRule object from alert by its ID(hash)
|
||||||
func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
|
func (m *manager) ruleAPI(gID, rID uint64) (apiRule, error) {
|
||||||
|
m.groupsMu.RLock()
|
||||||
|
defer m.groupsMu.RUnlock()
|
||||||
|
|
||||||
|
g, ok := m.groups[gID]
|
||||||
|
if !ok {
|
||||||
|
return apiRule{}, fmt.Errorf("can't find group with id %d", gID)
|
||||||
|
}
|
||||||
|
for _, rule := range g.Rules {
|
||||||
|
if rule.ID() == rID {
|
||||||
|
return ruleToAPI(rule), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return apiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// alertAPI generates apiAlert object from alert by its ID(hash)
|
||||||
|
func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
|
||||||
m.groupsMu.RLock()
|
m.groupsMu.RLock()
|
||||||
defer m.groupsMu.RUnlock()
|
defer m.groupsMu.RUnlock()
|
||||||
|
|
||||||
@@ -38,47 +55,13 @@ func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
|
|||||||
if !ok {
|
if !ok {
|
||||||
return nil, fmt.Errorf("can't find group with id %d", gID)
|
return nil, fmt.Errorf("can't find group with id %d", gID)
|
||||||
}
|
}
|
||||||
return g.ToAPI(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// ruleAPI generates apiRule object from alert by its ID(hash)
|
|
||||||
func (m *manager) ruleAPI(gID, rID uint64) (rule.ApiRule, error) {
|
|
||||||
m.groupsMu.RLock()
|
|
||||||
defer m.groupsMu.RUnlock()
|
|
||||||
|
|
||||||
group, ok := m.groups[gID]
|
|
||||||
if !ok {
|
|
||||||
return rule.ApiRule{}, fmt.Errorf("can't find group with id %d", gID)
|
|
||||||
}
|
|
||||||
g := group.ToAPI()
|
|
||||||
ruleID := strconv.FormatUint(rID, 10)
|
|
||||||
for _, r := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
if r.ID == ruleID {
|
ar, ok := r.(*rule.AlertingRule)
|
||||||
return r, nil
|
if !ok {
|
||||||
}
|
|
||||||
}
|
|
||||||
return rule.ApiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// alertAPI generates apiAlert object from alert by its ID(hash)
|
|
||||||
func (m *manager) alertAPI(gID, aID uint64) (*rule.ApiAlert, error) {
|
|
||||||
m.groupsMu.RLock()
|
|
||||||
defer m.groupsMu.RUnlock()
|
|
||||||
|
|
||||||
group, ok := m.groups[gID]
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("can't find group with id %d", gID)
|
|
||||||
}
|
|
||||||
g := group.ToAPI()
|
|
||||||
for _, r := range g.Rules {
|
|
||||||
if r.Type != rule.TypeAlerting {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
alertID := strconv.FormatUint(aID, 10)
|
if apiAlert := alertToAPI(ar, aID); apiAlert != nil {
|
||||||
for _, a := range r.Alerts {
|
return apiAlert, nil
|
||||||
if a.ID == alertID {
|
|
||||||
return a, nil
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)
|
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)
|
||||||
@@ -99,16 +82,17 @@ func (m *manager) close() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
|
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
|
||||||
|
m.wg.Add(1)
|
||||||
id := g.GetID()
|
id := g.GetID()
|
||||||
g.Init()
|
g.Init()
|
||||||
m.wg.Go(func() {
|
go func() {
|
||||||
|
defer m.wg.Done()
|
||||||
if restore {
|
if restore {
|
||||||
g.Start(ctx, m.rw, m.rr)
|
g.Start(ctx, m.notifiers, m.rw, m.rr)
|
||||||
} else {
|
} else {
|
||||||
g.Start(ctx, m.rw, nil)
|
g.Start(ctx, m.notifiers, m.rw, nil)
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
|
|
||||||
m.groups[id] = g
|
m.groups[id] = g
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -135,7 +119,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
|||||||
if rrPresent && m.rw == nil {
|
if rrPresent && m.rw == nil {
|
||||||
return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set")
|
return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set")
|
||||||
}
|
}
|
||||||
if arPresent && notifier.GetTargets() == nil {
|
if arPresent && m.notifiers == nil {
|
||||||
return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set")
|
return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -172,15 +156,15 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
|||||||
if len(toUpdate) > 0 {
|
if len(toUpdate) > 0 {
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
for _, item := range toUpdate {
|
for _, item := range toUpdate {
|
||||||
oldG := item.old
|
wg.Add(1)
|
||||||
newG := item.new
|
// cancel evaluation so the Update will be applied as fast as possible.
|
||||||
wg.Go(func() {
|
// it is important to call InterruptEval before the update, because cancel fn
|
||||||
// cancel evaluation so the Update will be applied as fast as possible.
|
// can be re-assigned during the update.
|
||||||
// it is important to call InterruptEval before the update, because cancel fn
|
item.old.InterruptEval()
|
||||||
// can be re-assigned during the update.
|
go func(oldGroup *rule.Group, newGroup *rule.Group) {
|
||||||
oldG.InterruptEval()
|
oldGroup.UpdateWith(newGroup)
|
||||||
oldG.UpdateWith(newG)
|
wg.Done()
|
||||||
})
|
}(item.old, item.new)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,11 +40,10 @@ func TestManagerEmptyRulesDir(t *testing.T) {
|
|||||||
// execution of configuration update.
|
// execution of configuration update.
|
||||||
// Should be executed with -race flag
|
// Should be executed with -race flag
|
||||||
func TestManagerUpdateConcurrent(t *testing.T) {
|
func TestManagerUpdateConcurrent(t *testing.T) {
|
||||||
_, cleanup := notifier.InitFakeNotifier()
|
|
||||||
defer cleanup()
|
|
||||||
m := &manager{
|
m := &manager{
|
||||||
groups: make(map[uint64]*rule.Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
querierBuilder: &datasource.FakeQuerier{},
|
querierBuilder: &datasource.FakeQuerier{},
|
||||||
|
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||||
}
|
}
|
||||||
paths := []string{
|
paths := []string{
|
||||||
"config/testdata/dir/rules0-good.rules",
|
"config/testdata/dir/rules0-good.rules",
|
||||||
@@ -65,11 +64,13 @@ func TestManagerUpdateConcurrent(t *testing.T) {
|
|||||||
|
|
||||||
const workers = 500
|
const workers = 500
|
||||||
const iterations = 10
|
const iterations = 10
|
||||||
var wg sync.WaitGroup
|
wg := sync.WaitGroup{}
|
||||||
for n := range workers {
|
wg.Add(workers)
|
||||||
wg.Go(func() {
|
for i := 0; i < workers; i++ {
|
||||||
|
go func(n int) {
|
||||||
|
defer wg.Done()
|
||||||
r := rand.New(rand.NewSource(int64(n)))
|
r := rand.New(rand.NewSource(int64(n)))
|
||||||
for range iterations {
|
for i := 0; i < iterations; i++ {
|
||||||
rnd := r.Intn(len(paths))
|
rnd := r.Intn(len(paths))
|
||||||
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
|
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
|
||||||
if err != nil { // update can fail and this is expected
|
if err != nil { // update can fail and this is expected
|
||||||
@@ -77,7 +78,7 @@ func TestManagerUpdateConcurrent(t *testing.T) {
|
|||||||
}
|
}
|
||||||
_ = m.update(context.Background(), cfg, false)
|
_ = m.update(context.Background(), cfg, false)
|
||||||
}
|
}
|
||||||
})
|
}(i)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
@@ -126,9 +127,8 @@ func TestManagerUpdate_Success(t *testing.T) {
|
|||||||
m := &manager{
|
m := &manager{
|
||||||
groups: make(map[uint64]*rule.Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
querierBuilder: &datasource.FakeQuerier{},
|
querierBuilder: &datasource.FakeQuerier{},
|
||||||
|
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||||
}
|
}
|
||||||
_, cleanup := notifier.InitFakeNotifier()
|
|
||||||
defer cleanup()
|
|
||||||
|
|
||||||
cfgInit := loadCfg(t, []string{initPath}, true, true)
|
cfgInit := loadCfg(t, []string{initPath}, true, true)
|
||||||
if err := m.update(ctx, cfgInit, false); err != nil {
|
if err := m.update(ctx, cfgInit, false); err != nil {
|
||||||
@@ -259,7 +259,7 @@ func compareGroups(t *testing.T, a, b *rule.Group) {
|
|||||||
for i, r := range a.Rules {
|
for i, r := range a.Rules {
|
||||||
got, want := r, b.Rules[i]
|
got, want := r, b.Rules[i]
|
||||||
if a.CreateID() != b.CreateID() {
|
if a.CreateID() != b.CreateID() {
|
||||||
t.Fatalf("expected to have rule %d; got %d", want.ID(), got.ID())
|
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
|
||||||
}
|
}
|
||||||
if err := rule.CompareRules(t, want, got); err != nil {
|
if err := rule.CompareRules(t, want, got); err != nil {
|
||||||
t.Fatalf("comparison error: %s", err)
|
t.Fatalf("comparison error: %s", err)
|
||||||
@@ -277,8 +277,7 @@ func TestManagerUpdate_Failure(t *testing.T) {
|
|||||||
rw: rw,
|
rw: rw,
|
||||||
}
|
}
|
||||||
if notifiers != nil {
|
if notifiers != nil {
|
||||||
_, cleanup := notifier.InitFakeNotifier()
|
m.notifiers = func() []notifier.Notifier { return notifiers }
|
||||||
defer cleanup()
|
|
||||||
}
|
}
|
||||||
err := m.update(context.Background(), []config.Group{cfg}, false)
|
err := m.update(context.Background(), []config.Group{cfg}, false)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
|||||||
@@ -80,15 +80,14 @@ func (as AlertState) String() string {
|
|||||||
|
|
||||||
// AlertTplData is used to execute templating
|
// AlertTplData is used to execute templating
|
||||||
type AlertTplData struct {
|
type AlertTplData struct {
|
||||||
Type string
|
Type string
|
||||||
Labels map[string]string
|
Labels map[string]string
|
||||||
Value float64
|
Value float64
|
||||||
Expr string
|
Expr string
|
||||||
AlertID uint64
|
AlertID uint64
|
||||||
GroupID uint64
|
GroupID uint64
|
||||||
ActiveAt time.Time
|
ActiveAt time.Time
|
||||||
For time.Duration
|
For time.Duration
|
||||||
IsPartial bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var tplHeaders = []string{
|
var tplHeaders = []string{
|
||||||
@@ -102,7 +101,6 @@ var tplHeaders = []string{
|
|||||||
"{{ $groupID := .GroupID }}",
|
"{{ $groupID := .GroupID }}",
|
||||||
"{{ $activeAt := .ActiveAt }}",
|
"{{ $activeAt := .ActiveAt }}",
|
||||||
"{{ $for := .For }}",
|
"{{ $for := .For }}",
|
||||||
"{{ $isPartial := .IsPartial }}",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExecTemplate executes the Alert template for given
|
// ExecTemplate executes the Alert template for given
|
||||||
@@ -168,8 +166,8 @@ func templateAnnotations(annotations map[string]string, data AlertTplData, tmpl
|
|||||||
ctmpl, _ := tmpl.Clone()
|
ctmpl, _ := tmpl.Clone()
|
||||||
ctmpl = ctmpl.Option("missingkey=zero")
|
ctmpl = ctmpl.Option("missingkey=zero")
|
||||||
if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil {
|
if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil {
|
||||||
r[key] = err.Error()
|
r[key] = text
|
||||||
eg.Add(fmt.Errorf("(key: %q, value: %q): %w", key, text, err))
|
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
r[key] = buf.String()
|
r[key] = buf.String()
|
||||||
@@ -186,13 +184,13 @@ type tplData struct {
|
|||||||
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
|
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
|
||||||
tpl, err := tpl.Parse(text)
|
tpl, err := tpl.Parse(text)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error parsing template: %w", err)
|
return fmt.Errorf("error parsing annotation template: %w", err)
|
||||||
}
|
}
|
||||||
if !execute {
|
if !execute {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if err = tpl.Execute(dst, data); err != nil {
|
if err = tpl.Execute(dst, data); err != nil {
|
||||||
return fmt.Errorf("error evaluating template: %w", err)
|
return fmt.Errorf("error evaluating annotation template: %w", err)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ func TestAlertExecTemplate(t *testing.T) {
|
|||||||
)
|
)
|
||||||
extLabels["cluster"] = extCluster
|
extLabels["cluster"] = extCluster
|
||||||
extLabels["dc"] = extDC
|
extLabels["dc"] = extDC
|
||||||
err := Init(extLabels, extURL)
|
_, err := Init(nil, extLabels, extURL)
|
||||||
checkErr(t, err)
|
checkErr(t, err)
|
||||||
|
|
||||||
f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) {
|
f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) {
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package notifier
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -14,6 +13,7 @@ import (
|
|||||||
"github.com/VictoriaMetrics/metrics"
|
"github.com/VictoriaMetrics/metrics"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||||
@@ -22,11 +22,10 @@ import (
|
|||||||
// AlertManager represents integration provider with Prometheus alert manager
|
// AlertManager represents integration provider with Prometheus alert manager
|
||||||
// https://github.com/prometheus/alertmanager
|
// https://github.com/prometheus/alertmanager
|
||||||
type AlertManager struct {
|
type AlertManager struct {
|
||||||
addr *url.URL
|
addr *url.URL
|
||||||
argFunc AlertURLGenerator
|
argFunc AlertURLGenerator
|
||||||
client *http.Client
|
client *http.Client
|
||||||
timeout time.Duration
|
timeout time.Duration
|
||||||
lastError string
|
|
||||||
|
|
||||||
authCfg *promauth.Config
|
authCfg *promauth.Config
|
||||||
// stores already parsed RelabelConfigs object
|
// stores already parsed RelabelConfigs object
|
||||||
@@ -72,42 +71,24 @@ func (am AlertManager) Addr() string {
|
|||||||
return am.addr.Redacted()
|
return am.addr.Redacted()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (am *AlertManager) LastError() string {
|
|
||||||
return am.lastError
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send an alert or resolve message
|
// Send an alert or resolve message
|
||||||
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
|
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[string]string) error {
|
||||||
if len(alerts) != len(alertLabels) {
|
|
||||||
return fmt.Errorf("mismatched number of alerts and label sets after global alert relabeling")
|
|
||||||
}
|
|
||||||
am.metrics.alertsSent.Add(len(alerts))
|
am.metrics.alertsSent.Add(len(alerts))
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
err := am.send(ctx, alerts, alertLabels, headers)
|
err := am.send(ctx, alerts, headers)
|
||||||
am.metrics.alertsSendDuration.UpdateDuration(startTime)
|
am.metrics.alertsSendDuration.UpdateDuration(startTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// the context can be cancelled on graceful shutdown
|
|
||||||
// or on group update. So no need to handle the error as usual.
|
|
||||||
if errors.Is(err, context.Canceled) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
am.metrics.alertsSendErrors.Add(len(alerts))
|
am.metrics.alertsSendErrors.Add(len(alerts))
|
||||||
am.lastError = err.Error()
|
|
||||||
} else {
|
|
||||||
am.lastError = ""
|
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (am *AlertManager) send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
|
func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[string]string) error {
|
||||||
b := &bytes.Buffer{}
|
b := &bytes.Buffer{}
|
||||||
alertsToSend := make([]Alert, 0, len(alerts))
|
alertsToSend := make([]Alert, 0, len(alerts))
|
||||||
lblss := make([][]prompb.Label, 0, len(alerts))
|
lblss := make([][]prompb.Label, 0, len(alerts))
|
||||||
for i, a := range alerts {
|
for _, a := range alerts {
|
||||||
lbls := alertLabels[i]
|
lbls := a.applyRelabelingIfNeeded(am.relabelConfigs)
|
||||||
if am.relabelConfigs != nil {
|
|
||||||
lbls = am.relabelConfigs.Apply(lbls, 0)
|
|
||||||
}
|
|
||||||
if len(lbls) == 0 {
|
if len(lbls) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -171,6 +152,11 @@ const alertManagerPath = "/api/v2/alerts"
|
|||||||
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
|
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
|
||||||
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
|
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
|
||||||
) (*AlertManager, error) {
|
) (*AlertManager, error) {
|
||||||
|
|
||||||
|
if err := httputil.CheckURL(alertManagerURL); err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid alertmanager URL: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
tls := &promauth.TLSConfig{}
|
tls := &promauth.TLSConfig{}
|
||||||
if authCfg.TLSConfig != nil {
|
if authCfg.TLSConfig != nil {
|
||||||
tls = authCfg.TLSConfig
|
tls = authCfg.TLSConfig
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -146,11 +145,11 @@ func TestAlertManager_Send(t *testing.T) {
|
|||||||
t.Fatalf("unexpected error: %s", err)
|
t.Fatalf("unexpected error: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
|
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
|
||||||
t.Fatalf("expected connection error got nil")
|
t.Fatalf("expected connection error got nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
|
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
|
||||||
t.Fatalf("expected wrong http code error got nil")
|
t.Fatalf("expected wrong http code error got nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -161,7 +160,7 @@ func TestAlertManager_Send(t *testing.T) {
|
|||||||
End: time.Now().UTC(),
|
End: time.Now().UTC(),
|
||||||
Labels: map[string]string{"alertname": "alert0"},
|
Labels: map[string]string{"alertname": "alert0"},
|
||||||
Annotations: map[string]string{"a": "b", "c": "d"},
|
Annotations: map[string]string{"a": "b", "c": "d"},
|
||||||
}}, [][]prompb.Label{{{Name: "alertname", Value: "alert0"}}}, map[string]string{headerKey: "bar"}); err != nil {
|
}}, map[string]string{headerKey: "bar"}); err != nil {
|
||||||
t.Fatalf("unexpected error %s", err)
|
t.Fatalf("unexpected error %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -175,7 +174,7 @@ func TestAlertManager_Send(t *testing.T) {
|
|||||||
Name: "alert2",
|
Name: "alert2",
|
||||||
Labels: map[string]string{"rule": "test", "tenant": "1"},
|
Labels: map[string]string{"rule": "test", "tenant": "1"},
|
||||||
},
|
},
|
||||||
}, [][]prompb.Label{{{Name: "rule", Value: "test"}, {Name: "tenant", Value: "0"}}, {{Name: "rule", Value: "test"}, {Name: "tenant", Value: "1"}}}, map[string]string{headerKey: "bar"}); err != nil {
|
}, map[string]string{headerKey: "bar"}); err != nil {
|
||||||
t.Fatalf("unexpected error %s", err)
|
t.Fatalf("unexpected error %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -188,7 +187,7 @@ func TestAlertManager_Send(t *testing.T) {
|
|||||||
Name: "alert2",
|
Name: "alert2",
|
||||||
Labels: map[string]string{},
|
Labels: map[string]string{},
|
||||||
},
|
},
|
||||||
}, [][]prompb.Label{{{Name: "rule", Value: "test"}}, {{}}}, map[string]string{}); err != nil {
|
}, map[string]string{}); err != nil {
|
||||||
t.Fatalf("unexpected error %s", err)
|
t.Fatalf("unexpected error %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -27,9 +27,15 @@ type Config struct {
|
|||||||
// PathPrefix is added to URL path before adding alertManagerPath value
|
// PathPrefix is added to URL path before adding alertManagerPath value
|
||||||
PathPrefix string `yaml:"path_prefix,omitempty"`
|
PathPrefix string `yaml:"path_prefix,omitempty"`
|
||||||
|
|
||||||
ConsulSDConfigs []ConsulSDConfigs `yaml:"consul_sd_configs,omitempty"`
|
// ConsulSDConfigs contains list of settings for service discovery via Consul
|
||||||
DNSSDConfigs []DNSSDConfigs `yaml:"dns_sd_configs,omitempty"`
|
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
|
||||||
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
|
ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"`
|
||||||
|
// DNSSDConfigs contains list of settings for service discovery via DNS.
|
||||||
|
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
|
||||||
|
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"`
|
||||||
|
|
||||||
|
// StaticConfigs contains list of static targets
|
||||||
|
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
|
||||||
|
|
||||||
// HTTPClientConfig contains HTTP configuration for Notifier clients
|
// HTTPClientConfig contains HTTP configuration for Notifier clients
|
||||||
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
||||||
@@ -56,29 +62,14 @@ type Config struct {
|
|||||||
parsedAlertRelabelConfigs *promrelabel.ParsedConfigs
|
parsedAlertRelabelConfigs *promrelabel.ParsedConfigs
|
||||||
}
|
}
|
||||||
|
|
||||||
// staticConfig contains list of static targets in the following form:
|
// StaticConfig contains list of static targets in the following form:
|
||||||
//
|
//
|
||||||
// targets:
|
// targets:
|
||||||
// [ - '<host>' ]
|
// [ - '<host>' ]
|
||||||
type StaticConfig struct {
|
type StaticConfig struct {
|
||||||
Targets []string `yaml:"targets"`
|
Targets []string `yaml:"targets"`
|
||||||
// HTTPClientConfig contains HTTP configuration for the Targets
|
// HTTPClientConfig contains HTTP configuration for the Targets
|
||||||
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
||||||
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ConsulSDConfigs contains list of settings for service discovery via Consul,
|
|
||||||
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
|
|
||||||
type ConsulSDConfigs struct {
|
|
||||||
consul.SDConfig `yaml:",inline"`
|
|
||||||
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// DNSSDConfigs contains list of settings for service discovery via DNS,
|
|
||||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
|
|
||||||
type DNSSDConfigs struct {
|
|
||||||
dns.SDConfig `yaml:",inline"`
|
|
||||||
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||||
@@ -104,31 +95,6 @@ func (cfg *Config) UnmarshalYAML(unmarshal func(any) error) error {
|
|||||||
}
|
}
|
||||||
cfg.parsedAlertRelabelConfigs = arCfg
|
cfg.parsedAlertRelabelConfigs = arCfg
|
||||||
|
|
||||||
for _, s := range cfg.StaticConfigs {
|
|
||||||
if len(s.AlertRelabelConfigs) > 0 {
|
|
||||||
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to parse alert_relabel_configs in static_config: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, s := range cfg.ConsulSDConfigs {
|
|
||||||
if len(s.AlertRelabelConfigs) > 0 {
|
|
||||||
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to parse alert_relabel_configs in consul_sd_config: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, s := range cfg.DNSSDConfigs {
|
|
||||||
if len(s.AlertRelabelConfigs) > 0 {
|
|
||||||
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to parse alert_relabel_configs in dns_sd_config: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
b, err := yaml.Marshal(cfg)
|
b, err := yaml.Marshal(cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to marshal configuration for checksum: %w", err)
|
return fmt.Errorf("failed to marshal configuration for checksum: %w", err)
|
||||||
|
|||||||
@@ -35,6 +35,4 @@ func TestParseConfig_Failure(t *testing.T) {
|
|||||||
|
|
||||||
f("testdata/unknownFields.bad.yaml", "unknown field")
|
f("testdata/unknownFields.bad.yaml", "unknown field")
|
||||||
f("non-existing-file", "error reading")
|
f("non-existing-file", "error reading")
|
||||||
f("testdata/consul.bad.yaml", "failed to parse alert_relabel_configs in consul_sd_config")
|
|
||||||
f("testdata/dns.bad.yaml", "failed to parse alert relabeling config")
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import (
|
|||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||||
@@ -29,7 +28,11 @@ type configWatcher struct {
|
|||||||
targets map[TargetType][]Target
|
targets map[TargetType][]Target
|
||||||
}
|
}
|
||||||
|
|
||||||
func newWatcher(cfg *Config, gen AlertURLGenerator) (*configWatcher, error) {
|
func newWatcher(path string, gen AlertURLGenerator) (*configWatcher, error) {
|
||||||
|
cfg, err := parseConfig(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
cw := &configWatcher{
|
cw := &configWatcher{
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
wg: sync.WaitGroup{},
|
wg: sync.WaitGroup{},
|
||||||
@@ -85,15 +88,18 @@ func (cw *configWatcher) reload(path string) error {
|
|||||||
return cw.start()
|
return cw.start()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn getTargets) error {
|
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
|
||||||
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
|
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
|
||||||
for _, err := range errors {
|
for _, err := range errors {
|
||||||
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
|
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||||
|
|
||||||
cw.wg.Go(func() {
|
cw.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer cw.wg.Done()
|
||||||
|
|
||||||
ticker := time.NewTicker(interval)
|
ticker := time.NewTicker(interval)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
@@ -103,77 +109,62 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn
|
|||||||
return
|
return
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
}
|
}
|
||||||
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
|
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
|
||||||
for _, err := range errors {
|
for _, err := range errors {
|
||||||
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
|
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||||
}
|
}
|
||||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type targetMetadata struct {
|
func getTargetMetadata(labelsFn getLabels, cfg *Config) (map[string]*promutil.Labels, []error) {
|
||||||
*promutil.Labels
|
metaLabels, err := labelsFn()
|
||||||
alertRelabelConfigs *promrelabel.ParsedConfigs
|
|
||||||
}
|
|
||||||
|
|
||||||
func getTargetMetadata(targetsFn getTargets, cfg *Config) (map[string]targetMetadata, []error) {
|
|
||||||
metaLabelsList, alertRelabelCfgs, err := targetsFn()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
|
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
|
||||||
}
|
}
|
||||||
targetMts := make(map[string]targetMetadata, len(metaLabelsList))
|
targetMetadata := make(map[string]*promutil.Labels, len(metaLabels))
|
||||||
var errors []error
|
var errors []error
|
||||||
duplicates := make(map[string]struct{})
|
duplicates := make(map[string]struct{})
|
||||||
for i := range metaLabelsList {
|
for _, labels := range metaLabels {
|
||||||
metaLabels := metaLabelsList[i]
|
target := labels.Get("__address__")
|
||||||
alertRelabelCfg := alertRelabelCfgs[i]
|
u, processedLabels, err := parseLabels(target, labels, cfg)
|
||||||
for _, labels := range metaLabels {
|
if err != nil {
|
||||||
target := labels.Get("__address__")
|
errors = append(errors, err)
|
||||||
u, processedLabels, err := parseLabels(target, labels, cfg)
|
continue
|
||||||
if err != nil {
|
|
||||||
errors = append(errors, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if len(u) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// check for duplicated targets
|
|
||||||
// targets with same address but different alert_relabel_configs are still considered duplicates since it's mostly due to misconfiguration and could cause duplicated notifications.
|
|
||||||
if _, ok := duplicates[u]; ok {
|
|
||||||
if !*suppressDuplicateTargetErrors {
|
|
||||||
logger.Errorf("skipping duplicate target with identical address %q; "+
|
|
||||||
"make sure service discovery and relabeling is set up properly; "+
|
|
||||||
"original labels: %s; resulting labels: %s",
|
|
||||||
u, labels, processedLabels)
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
duplicates[u] = struct{}{}
|
|
||||||
targetMts[u] = targetMetadata{
|
|
||||||
Labels: processedLabels,
|
|
||||||
alertRelabelConfigs: alertRelabelCfg,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if len(u) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok := duplicates[u]; ok { // check for duplicates
|
||||||
|
if !*suppressDuplicateTargetErrors {
|
||||||
|
logger.Errorf("skipping duplicate target with identical address %q; "+
|
||||||
|
"make sure service discovery and relabeling is set up properly; "+
|
||||||
|
"original labels: %s; resulting labels: %s",
|
||||||
|
u, labels, processedLabels)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
duplicates[u] = struct{}{}
|
||||||
|
targetMetadata[u] = processedLabels
|
||||||
}
|
}
|
||||||
return targetMts, errors
|
return targetMetadata, errors
|
||||||
}
|
}
|
||||||
|
|
||||||
type getTargets func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error)
|
type getLabels func() ([]*promutil.Labels, error)
|
||||||
|
|
||||||
func (cw *configWatcher) start() error {
|
func (cw *configWatcher) start() error {
|
||||||
if len(cw.cfg.StaticConfigs) > 0 {
|
if len(cw.cfg.StaticConfigs) > 0 {
|
||||||
var targets []Target
|
var targets []Target
|
||||||
for i, cfg := range cw.cfg.StaticConfigs {
|
for _, cfg := range cw.cfg.StaticConfigs {
|
||||||
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.StaticConfigs[i].AlertRelabelConfigs)
|
|
||||||
httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig)
|
httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig)
|
||||||
for _, target := range cfg.Targets {
|
for _, target := range cfg.Targets {
|
||||||
address, labels, err := parseLabels(target, nil, cw.cfg)
|
address, labels, err := parseLabels(target, nil, cw.cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
|
return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
|
||||||
}
|
}
|
||||||
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, alertRelabelConfig, cw.cfg.Timeout.Duration())
|
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
|
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
|
||||||
}
|
}
|
||||||
@@ -187,20 +178,17 @@ func (cw *configWatcher) start() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(cw.cfg.ConsulSDConfigs) > 0 {
|
if len(cw.cfg.ConsulSDConfigs) > 0 {
|
||||||
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
|
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]*promutil.Labels, error) {
|
||||||
var labels [][]*promutil.Labels
|
var labels []*promutil.Labels
|
||||||
var alertRelabelConfigs []*promrelabel.ParsedConfigs
|
|
||||||
for i := range cw.cfg.ConsulSDConfigs {
|
for i := range cw.cfg.ConsulSDConfigs {
|
||||||
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.ConsulSDConfigs[i].AlertRelabelConfigs)
|
|
||||||
sdc := &cw.cfg.ConsulSDConfigs[i]
|
sdc := &cw.cfg.ConsulSDConfigs[i]
|
||||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("got labels err: %w", err)
|
return nil, fmt.Errorf("got labels err: %w", err)
|
||||||
}
|
}
|
||||||
labels = append(labels, targetLabels)
|
labels = append(labels, targetLabels...)
|
||||||
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
|
|
||||||
}
|
}
|
||||||
return labels, alertRelabelConfigs, nil
|
return labels, nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to start consulSD discovery: %w", err)
|
return fmt.Errorf("failed to start consulSD discovery: %w", err)
|
||||||
@@ -208,21 +196,17 @@ func (cw *configWatcher) start() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(cw.cfg.DNSSDConfigs) > 0 {
|
if len(cw.cfg.DNSSDConfigs) > 0 {
|
||||||
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
|
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]*promutil.Labels, error) {
|
||||||
var labels [][]*promutil.Labels
|
var labels []*promutil.Labels
|
||||||
var alertRelabelConfigs []*promrelabel.ParsedConfigs
|
|
||||||
for i := range cw.cfg.DNSSDConfigs {
|
for i := range cw.cfg.DNSSDConfigs {
|
||||||
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.DNSSDConfigs[i].AlertRelabelConfigs)
|
|
||||||
sdc := &cw.cfg.DNSSDConfigs[i]
|
sdc := &cw.cfg.DNSSDConfigs[i]
|
||||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("got labels err: %w", err)
|
return nil, fmt.Errorf("got labels err: %w", err)
|
||||||
}
|
}
|
||||||
labels = append(labels, targetLabels)
|
labels = append(labels, targetLabels...)
|
||||||
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
|
|
||||||
|
|
||||||
}
|
}
|
||||||
return labels, alertRelabelConfigs, nil
|
return labels, nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to start DNSSD discovery: %w", err)
|
return fmt.Errorf("failed to start DNSSD discovery: %w", err)
|
||||||
@@ -256,30 +240,30 @@ func (cw *configWatcher) setTargets(key TargetType, targets []Target) {
|
|||||||
cw.targetsMu.Unlock()
|
cw.targetsMu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cw *configWatcher) updateTargets(key TargetType, targetMts map[string]targetMetadata, cfg *Config, genFn AlertURLGenerator) {
|
func (cw *configWatcher) updateTargets(key TargetType, targetMetadata map[string]*promutil.Labels, cfg *Config, genFn AlertURLGenerator) {
|
||||||
cw.targetsMu.Lock()
|
cw.targetsMu.Lock()
|
||||||
defer cw.targetsMu.Unlock()
|
defer cw.targetsMu.Unlock()
|
||||||
oldTargets := cw.targets[key]
|
oldTargets := cw.targets[key]
|
||||||
var updatedTargets []Target
|
var updatedTargets []Target
|
||||||
for _, ot := range oldTargets {
|
for _, ot := range oldTargets {
|
||||||
if _, ok := targetMts[ot.Addr()]; !ok {
|
if _, ok := targetMetadata[ot.Addr()]; !ok {
|
||||||
// if target not exists in currentTargets, close it
|
// if target not exists in currentTargets, close it
|
||||||
ot.Close()
|
ot.Close()
|
||||||
} else {
|
} else {
|
||||||
updatedTargets = append(updatedTargets, ot)
|
updatedTargets = append(updatedTargets, ot)
|
||||||
delete(targetMts, ot.Addr())
|
delete(targetMetadata, ot.Addr())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// create new resources for the new targets
|
// create new resources for the new targets
|
||||||
for addr, metadata := range targetMts {
|
for addr, labels := range targetMetadata {
|
||||||
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, metadata.alertRelabelConfigs, cfg.Timeout.Duration())
|
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
|
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
updatedTargets = append(updatedTargets, Target{
|
updatedTargets = append(updatedTargets, Target{
|
||||||
Notifier: am,
|
Notifier: am,
|
||||||
Labels: metadata.Labels,
|
Labels: labels,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ import (
|
|||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"os"
|
"os"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -29,11 +28,7 @@ static_configs:
|
|||||||
- localhost:9093
|
- localhost:9093
|
||||||
- localhost:9094
|
- localhost:9094
|
||||||
`)
|
`)
|
||||||
cfg, err := parseConfig(f.Name())
|
cw, err := newWatcher(f.Name(), nil)
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to parse config: %s", err)
|
|
||||||
}
|
|
||||||
cw, err := newWatcher(cfg, nil)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to start config watcher: %s", err)
|
t.Fatalf("failed to start config watcher: %s", err)
|
||||||
}
|
}
|
||||||
@@ -88,64 +83,33 @@ consul_sd_configs:
|
|||||||
- server: %s
|
- server: %s
|
||||||
services:
|
services:
|
||||||
- alertmanager
|
- alertmanager
|
||||||
- server: %s
|
`, consulSDServer.URL))
|
||||||
services:
|
|
||||||
- alertmanager
|
|
||||||
alert_relabel_configs:
|
|
||||||
- target_label: "foo"
|
|
||||||
replacement: "tar"
|
|
||||||
`, consulSDServer.URL, consulSDServer.URL))
|
|
||||||
|
|
||||||
cfg, err := parseConfig(consulSDFile.Name())
|
cw, err := newWatcher(consulSDFile.Name(), nil)
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to parse config: %s", err)
|
|
||||||
}
|
|
||||||
cw, err := newWatcher(cfg, nil)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to start config watcher: %s", err)
|
t.Fatalf("failed to start config watcher: %s", err)
|
||||||
}
|
}
|
||||||
defer cw.mustStop()
|
defer cw.mustStop()
|
||||||
|
|
||||||
if len(cw.notifiers()) != 3 {
|
if len(cw.notifiers()) != 2 {
|
||||||
t.Fatalf("expected to get 3 notifiers; got %d", len(cw.notifiers()))
|
t.Fatalf("expected to get 2 notifiers; got %d", len(cw.notifiers()))
|
||||||
}
|
}
|
||||||
|
|
||||||
expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1)
|
expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1)
|
||||||
expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2)
|
expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2)
|
||||||
expAddr3 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService3)
|
|
||||||
|
|
||||||
n1, n2, n3 := cw.notifiers()[0], cw.notifiers()[1], cw.notifiers()[2]
|
n1, n2 := cw.notifiers()[0], cw.notifiers()[1]
|
||||||
if n1.Addr() != expAddr1 {
|
if n1.Addr() != expAddr1 {
|
||||||
t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr())
|
t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr())
|
||||||
}
|
}
|
||||||
if n2.Addr() != expAddr2 {
|
if n2.Addr() != expAddr2 {
|
||||||
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
|
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
|
||||||
}
|
}
|
||||||
if n3.Addr() != expAddr3 {
|
|
||||||
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
|
|
||||||
}
|
|
||||||
|
|
||||||
if n1.(*AlertManager).relabelConfigs.String() != "" {
|
|
||||||
t.Fatalf("unexpected relabel configs: %q", n1.(*AlertManager).relabelConfigs.String())
|
|
||||||
}
|
|
||||||
if n2.(*AlertManager).relabelConfigs.String() != "" {
|
|
||||||
t.Fatalf("unexpected relabel configs: %q", n2.(*AlertManager).relabelConfigs.String())
|
|
||||||
}
|
|
||||||
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
|
|
||||||
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
f := func() bool { return len(cw.notifiers()) == 1 }
|
f := func() bool { return len(cw.notifiers()) == 1 }
|
||||||
if !waitFor(f, time.Second) {
|
if !waitFor(f, time.Second) {
|
||||||
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
|
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
|
||||||
}
|
}
|
||||||
n3 = cw.notifiers()[0]
|
|
||||||
if n3.Addr() != expAddr3 {
|
|
||||||
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
|
|
||||||
}
|
|
||||||
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
|
|
||||||
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestConfigWatcherReloadConcurrent supposed to test concurrent
|
// TestConfigWatcherReloadConcurrent supposed to test concurrent
|
||||||
@@ -200,11 +164,7 @@ consul_sd_configs:
|
|||||||
"unknownFields.bad.yaml",
|
"unknownFields.bad.yaml",
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg, err := parseConfig(paths[0])
|
cw, err := newWatcher(paths[0], nil)
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to parse config: %s", err)
|
|
||||||
}
|
|
||||||
cw, err := newWatcher(cfg, nil)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to start config watcher: %s", err)
|
t.Fatalf("failed to start config watcher: %s", err)
|
||||||
}
|
}
|
||||||
@@ -212,16 +172,18 @@ consul_sd_configs:
|
|||||||
|
|
||||||
const workers = 500
|
const workers = 500
|
||||||
const iterations = 10
|
const iterations = 10
|
||||||
var wg sync.WaitGroup
|
wg := sync.WaitGroup{}
|
||||||
for n := range workers {
|
wg.Add(workers)
|
||||||
wg.Go(func() {
|
for i := 0; i < workers; i++ {
|
||||||
|
go func(n int) {
|
||||||
|
defer wg.Done()
|
||||||
r := rand.New(rand.NewSource(int64(n)))
|
r := rand.New(rand.NewSource(int64(n)))
|
||||||
for range iterations {
|
for i := 0; i < iterations; i++ {
|
||||||
rnd := r.Intn(len(paths))
|
rnd := r.Intn(len(paths))
|
||||||
_ = cw.reload(paths[rnd]) // update can fail and this is expected
|
_ = cw.reload(paths[rnd]) // update can fail and this is expected
|
||||||
_ = cw.notifiers()
|
_ = cw.notifiers()
|
||||||
}
|
}
|
||||||
})
|
}(i)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
@@ -240,11 +202,10 @@ func checkErr(t *testing.T, err error) {
|
|||||||
const (
|
const (
|
||||||
fakeConsulService1 = "127.0.0.1:9093"
|
fakeConsulService1 = "127.0.0.1:9093"
|
||||||
fakeConsulService2 = "127.0.0.1:9095"
|
fakeConsulService2 = "127.0.0.1:9095"
|
||||||
fakeConsulService3 = "127.0.0.1:9097"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func newFakeConsulServer() *httptest.Server {
|
func newFakeConsulServer() *httptest.Server {
|
||||||
var requestCount atomic.Int32
|
requestCount := 0
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
|
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
|
||||||
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
|
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
|
||||||
@@ -259,7 +220,7 @@ func newFakeConsulServer() *httptest.Server {
|
|||||||
}`))
|
}`))
|
||||||
})
|
})
|
||||||
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
|
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
|
||||||
if requestCount.Load() == 0 {
|
if requestCount == 0 {
|
||||||
rw.Header().Set("X-Consul-Index", "1")
|
rw.Header().Set("X-Consul-Index", "1")
|
||||||
rw.Write([]byte(`
|
rw.Write([]byte(`
|
||||||
[
|
[
|
||||||
@@ -399,7 +360,7 @@ func newFakeConsulServer() *httptest.Server {
|
|||||||
}
|
}
|
||||||
]`))
|
]`))
|
||||||
}
|
}
|
||||||
requestCount.Add(1)
|
requestCount++
|
||||||
})
|
})
|
||||||
|
|
||||||
return httptest.NewServer(mux)
|
return httptest.NewServer(mux)
|
||||||
|
|||||||
@@ -5,8 +5,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// FakeNotifier is a mock notifier
|
// FakeNotifier is a mock notifier
|
||||||
@@ -17,32 +15,14 @@ type FakeNotifier struct {
|
|||||||
counter int
|
counter int
|
||||||
}
|
}
|
||||||
|
|
||||||
// InitFakeNotifier initializes global notifier to FakeNotifier,
|
|
||||||
// and returns a cleanup function to restore the original getActiveNotifiers.
|
|
||||||
func InitFakeNotifier() (*FakeNotifier, func()) {
|
|
||||||
originalGetActiveNotifiers := getActiveNotifiers
|
|
||||||
fn := &FakeNotifier{}
|
|
||||||
getActiveNotifiers = func() []Notifier {
|
|
||||||
return []Notifier{fn}
|
|
||||||
}
|
|
||||||
return fn, func() {
|
|
||||||
getActiveNotifiers = originalGetActiveNotifiers
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close does nothing
|
// Close does nothing
|
||||||
func (*FakeNotifier) Close() {}
|
func (*FakeNotifier) Close() {}
|
||||||
|
|
||||||
// LastError returns last error message
|
|
||||||
func (*FakeNotifier) LastError() string {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// Addr returns ""
|
// Addr returns ""
|
||||||
func (*FakeNotifier) Addr() string { return "" }
|
func (*FakeNotifier) Addr() string { return "" }
|
||||||
|
|
||||||
// Send sets alerts and increases counter
|
// Send sets alerts and increases counter
|
||||||
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error {
|
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error {
|
||||||
fn.Lock()
|
fn.Lock()
|
||||||
defer fn.Unlock()
|
defer fn.Unlock()
|
||||||
fn.counter += len(alerts)
|
fn.counter += len(alerts)
|
||||||
|
|||||||
@@ -1,22 +1,14 @@
|
|||||||
package notifier
|
package notifier
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -65,61 +57,11 @@ var (
|
|||||||
sendTimeout = flagutil.NewArrayDuration("notifier.sendTimeout", 10*time.Second, "Timeout when sending alerts to the corresponding -notifier.url")
|
sendTimeout = flagutil.NewArrayDuration("notifier.sendTimeout", 10*time.Second, "Timeout when sending alerts to the corresponding -notifier.url")
|
||||||
)
|
)
|
||||||
|
|
||||||
// AlertURLGeneratorFn returns a URL to the passed alert object.
|
// cw holds a configWatcher for configPath configuration file
|
||||||
// Call InitAlertURLGeneratorFn before using this function.
|
// configWatcher provides a list of Notifier objects discovered
|
||||||
var AlertURLGeneratorFn AlertURLGenerator
|
// from static config or via service discovery.
|
||||||
|
// cw is not nil only if configPath is provided.
|
||||||
// InitAlertURLGeneratorFn populates AlertURLGeneratorFn
|
var cw *configWatcher
|
||||||
func InitAlertURLGeneratorFn(externalURL *url.URL, externalAlertSource string, validateTemplate bool) error {
|
|
||||||
if externalAlertSource == "" {
|
|
||||||
AlertURLGeneratorFn = func(a Alert) string {
|
|
||||||
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
|
|
||||||
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, "group_id", gID, "alert_id", aID)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if validateTemplate {
|
|
||||||
if err := ValidateTemplates(map[string]string{
|
|
||||||
"tpl": externalAlertSource,
|
|
||||||
}); err != nil {
|
|
||||||
return fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
m := map[string]string{
|
|
||||||
"tpl": externalAlertSource,
|
|
||||||
}
|
|
||||||
AlertURLGeneratorFn = func(alert Alert) string {
|
|
||||||
qFn := func(_ string) ([]datasource.Metric, error) {
|
|
||||||
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
|
|
||||||
}
|
|
||||||
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
|
|
||||||
if err != nil {
|
|
||||||
logger.Errorf("cannot template alert source: %s", err)
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
// getActiveNotifiers returns the current list of Notifier objects.
|
|
||||||
getActiveNotifiers func() []Notifier
|
|
||||||
// globalRelabelCfg stores the parsed alert relabeling config from the config file if there is
|
|
||||||
globalRelabelCfg *promrelabel.ParsedConfigs
|
|
||||||
|
|
||||||
// cw holds a configWatcher for configPath configuration file
|
|
||||||
// configWatcher provides a list of Notifier objects discovered
|
|
||||||
// from static config or via service discovery.
|
|
||||||
// cw is not nil only if configPath is provided.
|
|
||||||
cw *configWatcher
|
|
||||||
|
|
||||||
// externalLabels is a global variable for holding external labels configured via flags
|
|
||||||
// It is supposed to be inited via Init function only.
|
|
||||||
externalLabels map[string]string
|
|
||||||
// externalURL is a global variable for holding external URL value configured via flag
|
|
||||||
// It is supposed to be inited via Init function only.
|
|
||||||
externalURL string
|
|
||||||
)
|
|
||||||
|
|
||||||
// Reload checks the changes in configPath configuration file
|
// Reload checks the changes in configPath configuration file
|
||||||
// and applies changes if any.
|
// and applies changes if any.
|
||||||
@@ -130,62 +72,66 @@ func Reload() error {
|
|||||||
return cw.reload(*configPath)
|
return cw.reload(*configPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var staticNotifiersFn func() []Notifier
|
||||||
|
|
||||||
|
var (
|
||||||
|
// externalLabels is a global variable for holding external labels configured via flags
|
||||||
|
// It is supposed to be inited via Init function only.
|
||||||
|
externalLabels map[string]string
|
||||||
|
// externalURL is a global variable for holding external URL value configured via flag
|
||||||
|
// It is supposed to be inited via Init function only.
|
||||||
|
externalURL string
|
||||||
|
)
|
||||||
|
|
||||||
|
// Init returns a function for retrieving actual list of Notifier objects.
|
||||||
// Init works in two mods:
|
// Init works in two mods:
|
||||||
// - configuration via flags (for backward compatibility). Is always static
|
// - configuration via flags (for backward compatibility). Is always static
|
||||||
// and don't support live reloads.
|
// and don't support live reloads.
|
||||||
// - configuration via file. Supports live reloads and service discovery.
|
// - configuration via file. Supports live reloads and service discovery.
|
||||||
//
|
//
|
||||||
// Init returns an error if both mods are used.
|
// Init returns an error if both mods are used.
|
||||||
func Init(extLabels map[string]string, extURL string) error {
|
func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (func() []Notifier, error) {
|
||||||
externalURL = extURL
|
externalURL = extURL
|
||||||
externalLabels = extLabels
|
externalLabels = extLabels
|
||||||
_, err := url.Parse(externalURL)
|
_, err := url.Parse(externalURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to parse external URL: %w", err)
|
return nil, fmt.Errorf("failed to parse external URL: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if *blackHole {
|
if *blackHole {
|
||||||
if len(*addrs) > 0 || *configPath != "" {
|
if len(*addrs) > 0 || *configPath != "" {
|
||||||
return fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
|
return nil, fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
|
||||||
}
|
}
|
||||||
notifier := newBlackHoleNotifier()
|
notifier := newBlackHoleNotifier()
|
||||||
getActiveNotifiers = func() []Notifier {
|
staticNotifiersFn = func() []Notifier {
|
||||||
return []Notifier{notifier}
|
return []Notifier{notifier}
|
||||||
}
|
}
|
||||||
return nil
|
return staticNotifiersFn, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if *configPath == "" && len(*addrs) == 0 {
|
if *configPath == "" && len(*addrs) == 0 {
|
||||||
return nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
if *configPath != "" && len(*addrs) > 0 {
|
if *configPath != "" && len(*addrs) > 0 {
|
||||||
return fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
|
return nil, fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(*addrs) > 0 {
|
if len(*addrs) > 0 {
|
||||||
notifiers, err := notifiersFromFlags(AlertURLGeneratorFn)
|
notifiers, err := notifiersFromFlags(gen)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to create notifier from flag values: %w", err)
|
return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
|
||||||
}
|
}
|
||||||
getActiveNotifiers = func() []Notifier {
|
staticNotifiersFn = func() []Notifier {
|
||||||
return notifiers
|
return notifiers
|
||||||
}
|
}
|
||||||
return nil
|
return staticNotifiersFn, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg, err := parseConfig(*configPath)
|
cw, err = newWatcher(*configPath, gen)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, fmt.Errorf("failed to init config watcher: %w", err)
|
||||||
}
|
}
|
||||||
if cfg.AlertRelabelConfigs != nil {
|
return cw.notifiers, nil
|
||||||
globalRelabelCfg = cfg.parsedAlertRelabelConfigs
|
|
||||||
}
|
|
||||||
cw, err = newWatcher(cfg, AlertURLGeneratorFn)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to init config watcher: %w", err)
|
|
||||||
}
|
|
||||||
getActiveNotifiers = cw.notifiers
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// InitSecretFlags must be called after flag.Parse and before any logging
|
// InitSecretFlags must be called after flag.Parse and before any logging
|
||||||
@@ -229,9 +175,6 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
|
|||||||
Headers: []string{headers.GetOptionalArg(i)},
|
Headers: []string{headers.GetOptionalArg(i)},
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := httputil.CheckURL(addr); err != nil {
|
|
||||||
return nil, fmt.Errorf("invalid notifier.url %q: %w", addr, err)
|
|
||||||
}
|
|
||||||
addr = strings.TrimSuffix(addr, "/")
|
addr = strings.TrimSuffix(addr, "/")
|
||||||
am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i))
|
am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -263,58 +206,23 @@ const (
|
|||||||
|
|
||||||
// GetTargets returns list of static or discovered targets
|
// GetTargets returns list of static or discovered targets
|
||||||
// via notifier configuration.
|
// via notifier configuration.
|
||||||
//
|
|
||||||
// Must be called after Init.
|
|
||||||
func GetTargets() map[TargetType][]Target {
|
func GetTargets() map[TargetType][]Target {
|
||||||
if getActiveNotifiers == nil {
|
var targets = make(map[TargetType][]Target)
|
||||||
return nil
|
|
||||||
|
if staticNotifiersFn != nil {
|
||||||
|
for _, ns := range staticNotifiersFn() {
|
||||||
|
targets[TargetStatic] = append(targets[TargetStatic], Target{
|
||||||
|
Notifier: ns,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
targets := make(map[TargetType][]Target)
|
|
||||||
// use cached targets from configWatcher instead of getActiveNotifiers for the extra target labels
|
|
||||||
if cw != nil {
|
if cw != nil {
|
||||||
cw.targetsMu.RLock()
|
cw.targetsMu.RLock()
|
||||||
for key, ns := range cw.targets {
|
for key, ns := range cw.targets {
|
||||||
targets[key] = append(targets[key], ns...)
|
targets[key] = append(targets[key], ns...)
|
||||||
}
|
}
|
||||||
cw.targetsMu.RUnlock()
|
cw.targetsMu.RUnlock()
|
||||||
return targets
|
|
||||||
}
|
|
||||||
|
|
||||||
// static notifiers don't have labels
|
|
||||||
for _, ns := range getActiveNotifiers() {
|
|
||||||
targets[TargetStatic] = append(targets[TargetStatic], Target{
|
|
||||||
Notifier: ns,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
return targets
|
return targets
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send sends alerts to all active notifiers
|
|
||||||
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) chan error {
|
|
||||||
alertsToSend := make([]Alert, 0, len(alerts))
|
|
||||||
lblss := make([][]prompb.Label, 0, len(alerts))
|
|
||||||
// apply global relabel config first without modifying original alerts in alerts
|
|
||||||
for _, a := range alerts {
|
|
||||||
lbls := a.applyRelabelingIfNeeded(globalRelabelCfg)
|
|
||||||
if len(lbls) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
alertsToSend = append(alertsToSend, a)
|
|
||||||
lblss = append(lblss, lbls)
|
|
||||||
}
|
|
||||||
|
|
||||||
wg := sync.WaitGroup{}
|
|
||||||
activeNotifiers := getActiveNotifiers()
|
|
||||||
errCh := make(chan error, len(activeNotifiers))
|
|
||||||
defer close(errCh)
|
|
||||||
for i := range activeNotifiers {
|
|
||||||
nt := activeNotifiers[i]
|
|
||||||
wg.Go(func() {
|
|
||||||
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
|
|
||||||
errCh <- fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
wg.Wait()
|
|
||||||
return errCh
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,17 +1,9 @@
|
|||||||
package notifier
|
package notifier
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"net/http"
|
|
||||||
"net/http/httptest"
|
|
||||||
"net/url"
|
|
||||||
"os"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestInit(t *testing.T) {
|
func TestInit(t *testing.T) {
|
||||||
@@ -20,13 +12,14 @@ func TestInit(t *testing.T) {
|
|||||||
|
|
||||||
*addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"}
|
*addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"}
|
||||||
|
|
||||||
err := Init(nil, "")
|
fn, err := Init(nil, nil, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("%s", err)
|
t.Fatalf("%s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(getActiveNotifiers()) != 2 {
|
nfs := fn()
|
||||||
t.Fatalf("expected to get 2 notifiers; got %d", len(getActiveNotifiers()))
|
if len(nfs) != 2 {
|
||||||
|
t.Fatalf("expected to get 2 notifiers; got %d", len(nfs))
|
||||||
}
|
}
|
||||||
|
|
||||||
targets := GetTargets()
|
targets := GetTargets()
|
||||||
@@ -55,22 +48,19 @@ func TestInitNegative(t *testing.T) {
|
|||||||
*blackHole = oldBlackHole
|
*blackHole = oldBlackHole
|
||||||
}()
|
}()
|
||||||
|
|
||||||
f := func(path string, addr []string, bh bool) {
|
f := func(path, addr string, bh bool) {
|
||||||
*configPath = path
|
*configPath = path
|
||||||
*addrs = flagutil.ArrayString(addr)
|
*addrs = flagutil.ArrayString{addr}
|
||||||
*blackHole = bh
|
*blackHole = bh
|
||||||
if err := Init(nil, ""); err == nil {
|
if _, err := Init(nil, nil, ""); err == nil {
|
||||||
t.Fatalf("expected to get error; got nil instead")
|
t.Fatalf("expected to get error; got nil instead")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// *configPath, *addrs and *blackhole are mutually exclusive
|
// *configPath, *addrs and *blackhole are mutually exclusive
|
||||||
f("/dummy/path", []string{"127.0.0.1"}, false)
|
f("/dummy/path", "127.0.0.1", false)
|
||||||
f("/dummy/path", []string{}, true)
|
f("/dummy/path", "", true)
|
||||||
f("", []string{"127.0.0.1"}, true)
|
f("", "127.0.0.1", true)
|
||||||
// addr cannot be ""
|
|
||||||
f("", []string{""}, false)
|
|
||||||
f("", []string{"127.0.0.1", ""}, false)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBlackHole(t *testing.T) {
|
func TestBlackHole(t *testing.T) {
|
||||||
@@ -79,13 +69,14 @@ func TestBlackHole(t *testing.T) {
|
|||||||
|
|
||||||
*blackHole = true
|
*blackHole = true
|
||||||
|
|
||||||
err := Init(nil, "")
|
fn, err := Init(nil, nil, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("%s", err)
|
t.Fatalf("%s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(getActiveNotifiers()) != 1 {
|
nfs := fn()
|
||||||
t.Fatalf("expected to get 1 notifier; got %d", len(getActiveNotifiers()))
|
if len(nfs) != 1 {
|
||||||
|
t.Fatalf("expected to get 1 notifier; got %d", len(nfs))
|
||||||
}
|
}
|
||||||
|
|
||||||
targets := GetTargets()
|
targets := GetTargets()
|
||||||
@@ -100,114 +91,3 @@ func TestBlackHole(t *testing.T) {
|
|||||||
t.Fatalf("expected to get \"blackhole\"; got %q instead", nf1.Addr())
|
t.Fatalf("expected to get \"blackhole\"; got %q instead", nf1.Addr())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetAlertURLGenerator(t *testing.T) {
|
|
||||||
oldAlertURLGeneratorFn := AlertURLGeneratorFn
|
|
||||||
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
|
|
||||||
|
|
||||||
testAlert := Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
|
|
||||||
u, _ := url.Parse("https://victoriametrics.com/path")
|
|
||||||
err := InitAlertURLGeneratorFn(u, "", false)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error %s", err)
|
|
||||||
}
|
|
||||||
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", "group_id", "alert_id")
|
|
||||||
if exp != AlertURLGeneratorFn(testAlert) {
|
|
||||||
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
|
|
||||||
}
|
|
||||||
err = InitAlertURLGeneratorFn(nil, "foo?{{invalid}}", true)
|
|
||||||
if err == nil {
|
|
||||||
t.Fatalf("expected template validation error got nil")
|
|
||||||
}
|
|
||||||
err = InitAlertURLGeneratorFn(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error %s", err)
|
|
||||||
}
|
|
||||||
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != AlertURLGeneratorFn(testAlert) {
|
|
||||||
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSendAlerts(t *testing.T) {
|
|
||||||
oldAlertURLGeneratorFn := AlertURLGeneratorFn
|
|
||||||
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
|
|
||||||
AlertURLGeneratorFn = func(alert Alert) string {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
mux := http.NewServeMux()
|
|
||||||
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
|
|
||||||
t.Fatalf("should not be called")
|
|
||||||
})
|
|
||||||
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
var a []struct {
|
|
||||||
Labels map[string]string `json:"labels"`
|
|
||||||
}
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&a); err != nil {
|
|
||||||
t.Fatalf("can not unmarshal data into alert %s", err)
|
|
||||||
}
|
|
||||||
if len(a) != 2 {
|
|
||||||
t.Fatalf("expected 2 alert in array got %d", len(a))
|
|
||||||
}
|
|
||||||
if len(a[0].Labels) != 4 {
|
|
||||||
t.Fatalf("expected 4 labels got %d", len(a[0].Labels))
|
|
||||||
}
|
|
||||||
if a[0].Labels["env"] != "prod" {
|
|
||||||
t.Fatalf("expected env label to be prod during relabeling, got %s", a[0].Labels["env"])
|
|
||||||
}
|
|
||||||
if a[0].Labels["c"] != "baz" {
|
|
||||||
t.Fatalf("expected c label to be baz during relabeling, got %s", a[0].Labels["c"])
|
|
||||||
}
|
|
||||||
if len(a[1].Labels) != 1 {
|
|
||||||
t.Fatalf("expected 1 labels got %d", len(a[1].Labels))
|
|
||||||
}
|
|
||||||
})
|
|
||||||
srv := httptest.NewServer(mux)
|
|
||||||
defer srv.Close()
|
|
||||||
|
|
||||||
f, err := os.CreateTemp("", "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer fs.MustRemovePath(f.Name())
|
|
||||||
|
|
||||||
rawConfig := `
|
|
||||||
static_configs:
|
|
||||||
- targets:
|
|
||||||
- %s
|
|
||||||
alert_relabel_configs:
|
|
||||||
- source_labels: [b]
|
|
||||||
target_label: "c"
|
|
||||||
alert_relabel_configs:
|
|
||||||
- source_labels: [a]
|
|
||||||
target_label: "b"
|
|
||||||
- target_label: "env"
|
|
||||||
replacement: "prod"
|
|
||||||
`
|
|
||||||
config := fmt.Sprintf(rawConfig, srv.URL+alertManagerPath)
|
|
||||||
writeToFile(f.Name(), config)
|
|
||||||
|
|
||||||
oldConfigPath := configPath
|
|
||||||
defer func() { configPath = oldConfigPath }()
|
|
||||||
*configPath = f.Name()
|
|
||||||
err = Init(nil, "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error when parse notifier config: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
firingAlerts := []Alert{
|
|
||||||
{
|
|
||||||
Name: "alert1",
|
|
||||||
Labels: map[string]string{"a": "baz"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: "alert2",
|
|
||||||
Labels: map[string]string{},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
errG := Send(context.Background(), firingAlerts, nil)
|
|
||||||
for err := range errG {
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("unexpected error when sending alerts: %s", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,21 +1,15 @@
|
|||||||
package notifier
|
package notifier
|
||||||
|
|
||||||
import (
|
import "context"
|
||||||
"context"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Notifier is a common interface for alert manager provider
|
// Notifier is a common interface for alert manager provider
|
||||||
type Notifier interface {
|
type Notifier interface {
|
||||||
// Send sends the given list of alerts.
|
// Send sends the given list of alerts.
|
||||||
// Returns an error if fails to send the alerts.
|
// Returns an error if fails to send the alerts.
|
||||||
// Must unblock if the given ctx is cancelled.
|
// Must unblock if the given ctx is cancelled.
|
||||||
Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, notifierHeaders map[string]string) error
|
Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) error
|
||||||
// Addr returns address where alerts are sent.
|
// Addr returns address where alerts are sent.
|
||||||
Addr() string
|
Addr() string
|
||||||
// LastError returns error, that occured during last attempt to send data
|
|
||||||
LastError() string
|
|
||||||
// Close is a destructor for the Notifier
|
// Close is a destructor for the Notifier
|
||||||
Close()
|
Close()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
package notifier
|
package notifier
|
||||||
|
|
||||||
import (
|
import "context"
|
||||||
"context"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
)
|
|
||||||
|
|
||||||
// blackHoleNotifier is a Notifier stub, used when no notifications need
|
// blackHoleNotifier is a Notifier stub, used when no notifications need
|
||||||
// to be sent.
|
// to be sent.
|
||||||
@@ -14,7 +10,7 @@ type blackHoleNotifier struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Send will send no notifications, but increase the metric.
|
// Send will send no notifications, but increase the metric.
|
||||||
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error { //nolint:revive
|
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error { //nolint:revive
|
||||||
bh.metrics.alertsSent.Add(len(alerts))
|
bh.metrics.alertsSent.Add(len(alerts))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -29,11 +25,6 @@ func (bh *blackHoleNotifier) Close() {
|
|||||||
bh.metrics.close()
|
bh.metrics.close()
|
||||||
}
|
}
|
||||||
|
|
||||||
// LastError return last notifier's error
|
|
||||||
func (bh *blackHoleNotifier) LastError() string {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// newBlackHoleNotifier creates a new blackHoleNotifier
|
// newBlackHoleNotifier creates a new blackHoleNotifier
|
||||||
func newBlackHoleNotifier() *blackHoleNotifier {
|
func newBlackHoleNotifier() *blackHoleNotifier {
|
||||||
address := "blackhole"
|
address := "blackhole"
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
metricset "github.com/VictoriaMetrics/metrics"
|
metricset "github.com/VictoriaMetrics/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -17,7 +16,7 @@ func TestBlackHoleNotifier_Send(t *testing.T) {
|
|||||||
Start: time.Now().UTC(),
|
Start: time.Now().UTC(),
|
||||||
End: time.Now().UTC(),
|
End: time.Now().UTC(),
|
||||||
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
||||||
}}, [][]prompb.Label{{}}, nil); err != nil {
|
}}, nil); err != nil {
|
||||||
t.Fatalf("unexpected error %s", err)
|
t.Fatalf("unexpected error %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -35,7 +34,7 @@ func TestBlackHoleNotifier_Close(t *testing.T) {
|
|||||||
Start: time.Now().UTC(),
|
Start: time.Now().UTC(),
|
||||||
End: time.Now().UTC(),
|
End: time.Now().UTC(),
|
||||||
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
||||||
}}, [][]prompb.Label{{}}, nil); err != nil {
|
}}, nil); err != nil {
|
||||||
t.Fatalf("unexpected error %s", err)
|
t.Fatalf("unexpected error %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
19
app/vmalert/notifier/testdata/consul.bad.yaml
vendored
19
app/vmalert/notifier/testdata/consul.bad.yaml
vendored
@@ -1,19 +0,0 @@
|
|||||||
consul_sd_configs:
|
|
||||||
- server: localhost:8500
|
|
||||||
scheme: http
|
|
||||||
services:
|
|
||||||
- alertmanager
|
|
||||||
alert_relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: "prod"
|
|
||||||
- server: localhost:8500
|
|
||||||
services:
|
|
||||||
- consul
|
|
||||||
alert_relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: "(abc"
|
|
||||||
alert_relabel_configs:
|
|
||||||
- target_label: "foo"
|
|
||||||
replacement: "aaa"
|
|
||||||
13
app/vmalert/notifier/testdata/dns.bad.yaml
vendored
13
app/vmalert/notifier/testdata/dns.bad.yaml
vendored
@@ -1,13 +0,0 @@
|
|||||||
dns_sd_configs:
|
|
||||||
- names:
|
|
||||||
- cloudflare.com
|
|
||||||
type: 'A'
|
|
||||||
port: 9093
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__meta_dns_name]
|
|
||||||
replacement: '${1}'
|
|
||||||
target_label: dns_name
|
|
||||||
alert_relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: "(abc"
|
|
||||||
15
app/vmalert/notifier/testdata/mixed.good.yaml
vendored
15
app/vmalert/notifier/testdata/mixed.good.yaml
vendored
@@ -2,19 +2,12 @@ static_configs:
|
|||||||
- targets:
|
- targets:
|
||||||
- localhost:9093
|
- localhost:9093
|
||||||
- localhost:9095
|
- localhost:9095
|
||||||
alert_relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: "static"
|
|
||||||
consul_sd_configs:
|
consul_sd_configs:
|
||||||
- server: localhost:8500
|
- server: localhost:8500
|
||||||
scheme: http
|
scheme: http
|
||||||
services:
|
services:
|
||||||
- alertmanager
|
- alertmanager
|
||||||
alert_relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: "consul"
|
|
||||||
- server: localhost:8500
|
- server: localhost:8500
|
||||||
services:
|
services:
|
||||||
- consul
|
- consul
|
||||||
@@ -24,10 +17,6 @@ dns_sd_configs:
|
|||||||
- cloudflare.com
|
- cloudflare.com
|
||||||
type: 'A'
|
type: 'A'
|
||||||
port: 9093
|
port: 9093
|
||||||
alert_relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: "dns"
|
|
||||||
|
|
||||||
relabel_configs:
|
relabel_configs:
|
||||||
- source_labels: [__meta_consul_tags]
|
- source_labels: [__meta_consul_tags]
|
||||||
@@ -36,4 +25,4 @@ relabel_configs:
|
|||||||
target_label: __scheme__
|
target_label: __scheme__
|
||||||
- source_labels: [__meta_dns_name]
|
- source_labels: [__meta_dns_name]
|
||||||
replacement: '${1}'
|
replacement: '${1}'
|
||||||
target_label: dns_name
|
target_label: dns_name
|
||||||
26
app/vmalert/notifier/testdata/static.good.yaml
vendored
26
app/vmalert/notifier/testdata/static.good.yaml
vendored
@@ -1,14 +1,22 @@
|
|||||||
|
headers:
|
||||||
|
- 'CustomHeader: foo'
|
||||||
|
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets:
|
- targets:
|
||||||
- http://192.168.0.101:9093
|
- localhost:9093
|
||||||
alert_relabel_configs:
|
- localhost:9095
|
||||||
- target_label: "foo"
|
- https://localhost:9093/test/api/v2/alerts
|
||||||
replacement: "aaa"
|
basic_auth:
|
||||||
|
username: foo
|
||||||
|
password: bar
|
||||||
|
|
||||||
- targets:
|
- targets:
|
||||||
- http://192.168.0.101:9093
|
- localhost:9096
|
||||||
alert_relabel_configs:
|
- localhost:9097
|
||||||
- target_label: "foo"
|
basic_auth:
|
||||||
replacement: "ccc"
|
username: foo
|
||||||
|
password: baz
|
||||||
|
|
||||||
|
alert_relabel_configs:
|
||||||
|
- target_label: "foo"
|
||||||
|
replacement: "aaa"
|
||||||
|
|||||||
@@ -1,19 +0,0 @@
|
|||||||
package notifier
|
|
||||||
|
|
||||||
// ApiNotifier represents a Notifier configuration for WEB view
|
|
||||||
type ApiNotifier struct {
|
|
||||||
// Kind is a Notifier type
|
|
||||||
Kind TargetType `json:"kind"`
|
|
||||||
// Targets is a list of Notifier targets
|
|
||||||
Targets []*ApiTarget `json:"targets"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ApiTarget represents a specific Notifier target for WEB view
|
|
||||||
type ApiTarget struct {
|
|
||||||
// Address is a URL for sending notifications
|
|
||||||
Address string `json:"address"`
|
|
||||||
// Labels is a list of labels to add to each sent notification
|
|
||||||
Labels map[string]string `json:"labels"`
|
|
||||||
// LastError contains the error faced while sending to notifier.
|
|
||||||
LastError string `json:"lastError"`
|
|
||||||
}
|
|
||||||
@@ -14,9 +14,9 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect. "+
|
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect."+
|
||||||
"Remote read is used to restore alerts state. "+
|
"Remote read is used to restore alerts state."+
|
||||||
"This configuration makes sense only if vmalert was configured with '-remoteWrite.url' before and has been successfully persisted its state. "+
|
"This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state. "+
|
||||||
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
|
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
|
||||||
"See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.")
|
"See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.")
|
||||||
|
|
||||||
|
|||||||
@@ -113,7 +113,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
|||||||
input: make(chan prompb.TimeSeries, cfg.MaxQueueSize),
|
input: make(chan prompb.TimeSeries, cfg.MaxQueueSize),
|
||||||
}
|
}
|
||||||
|
|
||||||
for range cc {
|
for i := 0; i < cc; i++ {
|
||||||
c.run(ctx)
|
c.run(ctx)
|
||||||
}
|
}
|
||||||
return c, nil
|
return c, nil
|
||||||
@@ -173,8 +173,9 @@ func (c *Client) run(ctx context.Context) {
|
|||||||
|
|
||||||
cancel()
|
cancel()
|
||||||
}
|
}
|
||||||
|
c.wg.Add(1)
|
||||||
c.wg.Go(func() {
|
go func() {
|
||||||
|
defer c.wg.Done()
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@@ -196,7 +197,7 @@ func (c *Client) run(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -238,10 +239,8 @@ func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
|
|||||||
defer func() {
|
defer func() {
|
||||||
sendDuration.Add(time.Since(timeStart).Seconds())
|
sendDuration.Add(time.Since(timeStart).Seconds())
|
||||||
}()
|
}()
|
||||||
|
|
||||||
attempts := 0
|
|
||||||
L:
|
L:
|
||||||
for {
|
for attempts := 0; ; attempts++ {
|
||||||
err := c.send(ctx, b)
|
err := c.send(ctx, b)
|
||||||
if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) {
|
if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) {
|
||||||
// Something in the middle between client and destination might be closing
|
// Something in the middle between client and destination might be closing
|
||||||
@@ -283,7 +282,6 @@ L:
|
|||||||
time.Sleep(retryInterval)
|
time.Sleep(retryInterval)
|
||||||
retryInterval *= 2
|
retryInterval *= 2
|
||||||
|
|
||||||
attempts++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rwErrors.Inc()
|
rwErrors.Inc()
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ func TestClient_Push(t *testing.T) {
|
|||||||
|
|
||||||
r := rand.New(rand.NewSource(1))
|
r := rand.New(rand.NewSource(1))
|
||||||
const rowsN = int(1e4)
|
const rowsN = int(1e4)
|
||||||
for range rowsN {
|
for i := 0; i < rowsN; i++ {
|
||||||
s := prompb.TimeSeries{
|
s := prompb.TimeSeries{
|
||||||
Samples: []prompb.Sample{{
|
Samples: []prompb.Sample{{
|
||||||
Value: r.Float64(),
|
Value: r.Float64(),
|
||||||
@@ -102,7 +102,7 @@ func TestClient_run_maxBatchSizeDuringShutdown(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// push time series to the client.
|
// push time series to the client.
|
||||||
for range pushCnt {
|
for i := 0; i < pushCnt; i++ {
|
||||||
if err = rwClient.Push(prompb.TimeSeries{}); err != nil {
|
if err = rwClient.Push(prompb.TimeSeries{}); err != nil {
|
||||||
t.Fatalf("cannot time series to the client: %s", err)
|
t.Fatalf("cannot time series to the client: %s", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ func TestDebugClient_Push(t *testing.T) {
|
|||||||
|
|
||||||
const rowsN = 100
|
const rowsN = 100
|
||||||
var sent int
|
var sent int
|
||||||
for i := range rowsN {
|
for i := 0; i < rowsN; i++ {
|
||||||
s := prompb.TimeSeries{
|
s := prompb.TimeSeries{
|
||||||
Samples: []prompb.Sample{{
|
Samples: []prompb.Sample{{
|
||||||
Value: float64(i),
|
Value: float64(i),
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package rule
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash/fnv"
|
"hash/fnv"
|
||||||
"math"
|
"math"
|
||||||
@@ -188,54 +187,6 @@ func (ar *AlertingRule) ID() uint64 {
|
|||||||
return ar.RuleID
|
return ar.RuleID
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToAPI returns ApiRule representation of ar
|
|
||||||
func (ar *AlertingRule) ToAPI() ApiRule {
|
|
||||||
state := ar.state
|
|
||||||
lastState := state.getLast()
|
|
||||||
r := ApiRule{
|
|
||||||
Type: TypeAlerting,
|
|
||||||
DatasourceType: ar.Type.String(),
|
|
||||||
Name: ar.Name,
|
|
||||||
Query: ar.Expr,
|
|
||||||
Duration: ar.For.Seconds(),
|
|
||||||
KeepFiringFor: ar.KeepFiringFor.Seconds(),
|
|
||||||
Labels: ar.Labels,
|
|
||||||
Annotations: ar.Annotations,
|
|
||||||
LastEvaluation: lastState.Time,
|
|
||||||
EvaluationTime: lastState.Duration.Seconds(),
|
|
||||||
Health: "ok",
|
|
||||||
State: "inactive",
|
|
||||||
Alerts: ar.AlertsToAPI(),
|
|
||||||
LastSamples: lastState.Samples,
|
|
||||||
LastSeriesFetched: lastState.SeriesFetched,
|
|
||||||
MaxUpdates: state.size(),
|
|
||||||
Updates: state.getAll(),
|
|
||||||
Debug: ar.Debug,
|
|
||||||
|
|
||||||
// encode as strings to avoid rounding in JSON
|
|
||||||
ID: fmt.Sprintf("%d", ar.ID()),
|
|
||||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
|
||||||
GroupName: ar.GroupName,
|
|
||||||
File: ar.File,
|
|
||||||
}
|
|
||||||
if lastState.Err != nil {
|
|
||||||
r.LastError = lastState.Err.Error()
|
|
||||||
r.Health = "err"
|
|
||||||
}
|
|
||||||
// satisfy apiRule.State logic
|
|
||||||
if len(r.Alerts) > 0 {
|
|
||||||
r.State = notifier.StatePending.String()
|
|
||||||
stateFiring := notifier.StateFiring.String()
|
|
||||||
for _, a := range r.Alerts {
|
|
||||||
if a.State == stateFiring {
|
|
||||||
r.State = stateFiring
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetAlerts returns active alerts of rule
|
// GetAlerts returns active alerts of rule
|
||||||
func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
|
func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
|
||||||
ar.alertsMu.RLock()
|
ar.alertsMu.RLock()
|
||||||
@@ -247,6 +198,16 @@ func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
|
|||||||
return alerts
|
return alerts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetAlert returns alert if id exists
|
||||||
|
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
|
||||||
|
ar.alertsMu.RLock()
|
||||||
|
defer ar.alertsMu.RUnlock()
|
||||||
|
if ar.alerts == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return ar.alerts[id]
|
||||||
|
}
|
||||||
|
|
||||||
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) {
|
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) {
|
||||||
if !ar.Debug {
|
if !ar.Debug {
|
||||||
return
|
return
|
||||||
@@ -312,11 +273,6 @@ type labelSet struct {
|
|||||||
// On k conflicts in origin set, the original value is preferred and copied
|
// On k conflicts in origin set, the original value is preferred and copied
|
||||||
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
|
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
|
||||||
func (ls *labelSet) add(k, v string) {
|
func (ls *labelSet) add(k, v string) {
|
||||||
// do not add label with empty value, since it has no meaning.
|
|
||||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
|
|
||||||
if v == "" {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
ls.processed[k] = v
|
ls.processed[k] = v
|
||||||
ov, ok := ls.origin[k]
|
ov, ok := ls.origin[k]
|
||||||
if !ok {
|
if !ok {
|
||||||
@@ -346,13 +302,14 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
|||||||
ls.processed[l.Name] = l.Value
|
ls.processed[l.Name] = l.Value
|
||||||
}
|
}
|
||||||
|
|
||||||
// labels only support limited templating variables,
|
|
||||||
// including `labels`, `value` and `expr`, to avoid breaking alert states or causing cardinality issue with results
|
|
||||||
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
|
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
|
||||||
Labels: ls.origin,
|
Labels: ls.origin,
|
||||||
Value: m.Values[0],
|
Value: m.Values[0],
|
||||||
Expr: ar.Expr,
|
Expr: ar.Expr,
|
||||||
})
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to expand labels: %w", err)
|
||||||
|
}
|
||||||
for k, v := range extraLabels {
|
for k, v := range extraLabels {
|
||||||
ls.add(k, v)
|
ls.add(k, v)
|
||||||
}
|
}
|
||||||
@@ -363,7 +320,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
|||||||
if !*disableAlertGroupLabel && ar.GroupName != "" {
|
if !*disableAlertGroupLabel && ar.GroupName != "" {
|
||||||
ls.add(alertGroupNameLabel, ar.GroupName)
|
ls.add(alertGroupNameLabel, ar.GroupName)
|
||||||
}
|
}
|
||||||
return ls, err
|
return ls, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// execRange executes alerting rule on the given time range similarly to exec.
|
// execRange executes alerting rule on the given time range similarly to exec.
|
||||||
@@ -384,12 +341,16 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
|||||||
return []datasource.Metric{{Timestamps: []int64{0}, Values: []float64{math.NaN()}}}, nil
|
return []datasource.Metric{{Timestamps: []int64{0}, Values: []float64{math.NaN()}}}, nil
|
||||||
}
|
}
|
||||||
for _, s := range res.Data {
|
for _, s := range res.Data {
|
||||||
ls, err := ar.expandLabelTemplates(s, qFn)
|
ls, err := ar.expandLabelTemplates(s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
alertID := hash(ls.processed)
|
alertID := hash(ls.processed)
|
||||||
a := ar.newAlert(s, time.Time{}, ls.processed, nil) // initial alert
|
as, err := ar.expandAnnotationTemplates(s, qFn, time.Time{}, ls)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
a := ar.newAlert(s, time.Time{}, ls.processed, as) // initial alert
|
||||||
|
|
||||||
prevT := time.Time{}
|
prevT := time.Time{}
|
||||||
for i := range s.Values {
|
for i := range s.Values {
|
||||||
@@ -405,6 +366,8 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
|||||||
// reset to Pending if there are gaps > EvalInterval between DPs
|
// reset to Pending if there are gaps > EvalInterval between DPs
|
||||||
a.State = notifier.StatePending
|
a.State = notifier.StatePending
|
||||||
a.ActiveAt = at
|
a.ActiveAt = at
|
||||||
|
// re-template the annotations as active timestamp is changed
|
||||||
|
a.Annotations, _ = ar.expandAnnotationTemplates(s, qFn, at, ls)
|
||||||
a.Start = time.Time{}
|
a.Start = time.Time{}
|
||||||
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
|
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
|
||||||
a.State = notifier.StateFiring
|
a.State = notifier.StateFiring
|
||||||
@@ -450,7 +413,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
ar.state.add(curState)
|
ar.state.add(curState)
|
||||||
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
|
if curState.Err != nil {
|
||||||
ar.metrics.errors.Inc()
|
ar.metrics.errors.Inc()
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@@ -459,8 +422,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||||||
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
isPartial := isPartialResponse(res)
|
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartialResponse(res))
|
||||||
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartial)
|
|
||||||
qFn := func(query string) ([]datasource.Metric, error) {
|
qFn := func(query string) ([]datasource.Metric, error) {
|
||||||
res, _, err := ar.q.Query(ctx, query, ts)
|
res, _, err := ar.q.Query(ctx, query, ts)
|
||||||
return res.Data, err
|
return res.Data, err
|
||||||
@@ -472,11 +434,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||||||
expandedLabels := make([]*labelSet, len(res.Data))
|
expandedLabels := make([]*labelSet, len(res.Data))
|
||||||
expandedAnnotations := make([]map[string]string, len(res.Data))
|
expandedAnnotations := make([]map[string]string, len(res.Data))
|
||||||
for i, m := range res.Data {
|
for i, m := range res.Data {
|
||||||
ls, err := ar.expandLabelTemplates(m, qFn)
|
ls, err := ar.expandLabelTemplates(m)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// only set error in current state, but do not break alert processing
|
|
||||||
curState.Err = err
|
curState.Err = err
|
||||||
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
|
return nil, curState.Err
|
||||||
}
|
}
|
||||||
at := ts
|
at := ts
|
||||||
alertID := hash(ls.processed)
|
alertID := hash(ls.processed)
|
||||||
@@ -486,11 +447,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||||||
at = a.ActiveAt
|
at = a.ActiveAt
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls, isPartial)
|
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// only set error in current state, but do not break alert processing
|
|
||||||
curState.Err = err
|
curState.Err = err
|
||||||
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
|
return nil, curState.Err
|
||||||
}
|
}
|
||||||
expandedLabels[i] = ls
|
expandedLabels[i] = ls
|
||||||
expandedAnnotations[i] = as
|
expandedAnnotations[i] = as
|
||||||
@@ -596,29 +556,31 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||||||
return append(tss, ar.toTimeSeries(ts.Unix())...), nil
|
return append(tss, ar.toTimeSeries(ts.Unix())...), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
|
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric) (*labelSet, error) {
|
||||||
|
qFn := func(_ string) ([]datasource.Metric, error) {
|
||||||
|
return nil, fmt.Errorf("`query` template isn't supported in rule label")
|
||||||
|
}
|
||||||
ls, err := ar.toLabels(m, qFn)
|
ls, err := ar.toLabels(m, qFn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return ls, fmt.Errorf("failed to expand label templates: %s", err)
|
return nil, fmt.Errorf("failed to expand label templates: %s", err)
|
||||||
}
|
}
|
||||||
return ls, nil
|
return ls, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet, isPartial bool) (map[string]string, error) {
|
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet) (map[string]string, error) {
|
||||||
tplData := notifier.AlertTplData{
|
tplData := notifier.AlertTplData{
|
||||||
Value: m.Values[0],
|
Value: m.Values[0],
|
||||||
Type: ar.Type.String(),
|
Type: ar.Type.String(),
|
||||||
Labels: ls.origin,
|
Labels: ls.origin,
|
||||||
Expr: ar.Expr,
|
Expr: ar.Expr,
|
||||||
AlertID: hash(ls.processed),
|
AlertID: hash(ls.processed),
|
||||||
GroupID: ar.GroupID,
|
GroupID: ar.GroupID,
|
||||||
ActiveAt: activeAt,
|
ActiveAt: activeAt,
|
||||||
For: ar.For,
|
For: ar.For,
|
||||||
IsPartial: isPartial,
|
|
||||||
}
|
}
|
||||||
as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData)
|
as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return as, fmt.Errorf("failed to expand annotation templates: %s", err)
|
return nil, fmt.Errorf("failed to expand annotation templates: %s", err)
|
||||||
}
|
}
|
||||||
return as, nil
|
return as, nil
|
||||||
}
|
}
|
||||||
@@ -818,9 +780,7 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
|
|||||||
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
|
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
|
||||||
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
|
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
|
||||||
|
|
||||||
// query ALERTS_FOR_STATE at `ts-1s` instead `ts` to avoid retrieving data written in the current run,
|
res, _, err := q.Query(ctx, expr, ts)
|
||||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10335
|
|
||||||
res, _, err := q.Query(ctx, expr, ts.Add(-1*time.Second))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
|
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,106 +0,0 @@
|
|||||||
//go:build synctest
|
|
||||||
|
|
||||||
package rule
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
"testing/synctest"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
|
||||||
)
|
|
||||||
|
|
||||||
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
|
|
||||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
|
|
||||||
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
|
|
||||||
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
|
|
||||||
// wrap into synctest because of time manipulations
|
|
||||||
synctest.Test(t, func(t *testing.T) {
|
|
||||||
fq := &datasource.FakeQuerier{}
|
|
||||||
|
|
||||||
ar := &AlertingRule{
|
|
||||||
Name: "TestActiveAtPreservation",
|
|
||||||
Labels: map[string]string{
|
|
||||||
"test_query_in_label": `{{ "static_value" }}`,
|
|
||||||
},
|
|
||||||
Annotations: map[string]string{
|
|
||||||
"description": "Alert active since {{ $activeAt }}",
|
|
||||||
},
|
|
||||||
alerts: make(map[uint64]*notifier.Alert),
|
|
||||||
q: fq,
|
|
||||||
state: &ruleState{
|
|
||||||
entries: make([]StateEntry, 10),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mock query result - return empty result to make suppress_for_mass_alert = false
|
|
||||||
// (no need to add anything to fq for empty result)
|
|
||||||
|
|
||||||
// Add a metric that should trigger the alert
|
|
||||||
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
|
|
||||||
|
|
||||||
// First execution - creates new alert
|
|
||||||
ts1 := time.Now()
|
|
||||||
_, err := ar.exec(context.TODO(), ts1, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error on first exec: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(ar.alerts) != 1 {
|
|
||||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
|
||||||
}
|
|
||||||
|
|
||||||
firstAlert := ar.GetAlerts()[0]
|
|
||||||
// Verify first execution: activeAt should be ts1 and annotation should reflect it
|
|
||||||
if !firstAlert.ActiveAt.Equal(ts1) {
|
|
||||||
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
|
|
||||||
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
|
|
||||||
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
|
|
||||||
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Second execution - should preserve activeAt in annotation
|
|
||||||
|
|
||||||
// Ensure different timestamp with different seconds
|
|
||||||
// sleep is non-blocking thanks to synctest
|
|
||||||
time.Sleep(2 * time.Second)
|
|
||||||
ts2 := time.Now()
|
|
||||||
_, err = ar.exec(context.TODO(), ts2, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error on second exec: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the alert again (should be the same alert)
|
|
||||||
if len(ar.alerts) != 1 {
|
|
||||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
|
||||||
}
|
|
||||||
secondAlert := ar.GetAlerts()[0]
|
|
||||||
|
|
||||||
// Critical test: activeAt should still be ts1, not ts2
|
|
||||||
if !secondAlert.ActiveAt.Equal(ts1) {
|
|
||||||
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Critical test: annotation should still contain ts1 time, not ts2
|
|
||||||
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
|
|
||||||
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Additional verification: annotation should NOT contain ts2 time
|
|
||||||
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
|
|
||||||
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
|
|
||||||
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify query template in labels still works (this would fail if query templates were broken)
|
|
||||||
if firstAlert.Labels["test_query_in_label"] != "static_value" {
|
|
||||||
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
@@ -663,7 +663,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
|
|||||||
Name: "for-pending",
|
Name: "for-pending",
|
||||||
Type: config.NewPrometheusType().String(),
|
Type: config.NewPrometheusType().String(),
|
||||||
Labels: map[string]string{"alertname": "for-pending"},
|
Labels: map[string]string{"alertname": "for-pending"},
|
||||||
Annotations: map[string]string{},
|
Annotations: map[string]string{"activeAt": "5000"},
|
||||||
State: notifier.StatePending,
|
State: notifier.StatePending,
|
||||||
ActiveAt: time.Unix(5, 0),
|
ActiveAt: time.Unix(5, 0),
|
||||||
Value: 1,
|
Value: 1,
|
||||||
@@ -683,7 +683,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
|
|||||||
Name: "for-firing",
|
Name: "for-firing",
|
||||||
Type: config.NewPrometheusType().String(),
|
Type: config.NewPrometheusType().String(),
|
||||||
Labels: map[string]string{"alertname": "for-firing"},
|
Labels: map[string]string{"alertname": "for-firing"},
|
||||||
Annotations: map[string]string{},
|
Annotations: map[string]string{"activeAt": "1000"},
|
||||||
State: notifier.StateFiring,
|
State: notifier.StateFiring,
|
||||||
ActiveAt: time.Unix(1, 0),
|
ActiveAt: time.Unix(1, 0),
|
||||||
Start: time.Unix(5, 0),
|
Start: time.Unix(5, 0),
|
||||||
@@ -704,7 +704,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
|
|||||||
Name: "for-hold-pending",
|
Name: "for-hold-pending",
|
||||||
Type: config.NewPrometheusType().String(),
|
Type: config.NewPrometheusType().String(),
|
||||||
Labels: map[string]string{"alertname": "for-hold-pending"},
|
Labels: map[string]string{"alertname": "for-hold-pending"},
|
||||||
Annotations: map[string]string{},
|
Annotations: map[string]string{"activeAt": "5000"},
|
||||||
State: notifier.StatePending,
|
State: notifier.StatePending,
|
||||||
ActiveAt: time.Unix(5, 0),
|
ActiveAt: time.Unix(5, 0),
|
||||||
Value: 1,
|
Value: 1,
|
||||||
@@ -826,9 +826,12 @@ func TestGroup_Restore(t *testing.T) {
|
|||||||
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
||||||
fg.Init()
|
fg.Init()
|
||||||
wg := sync.WaitGroup{}
|
wg := sync.WaitGroup{}
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
fg.Start(context.Background(), nil, fqr)
|
go func() {
|
||||||
})
|
nts := func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} }
|
||||||
|
fg.Start(context.Background(), nts, nil, fqr)
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
fg.Close()
|
fg.Close()
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
@@ -1119,7 +1122,7 @@ func TestAlertingRuleLimit_Success(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestAlertingRule_Template(t *testing.T) {
|
func TestAlertingRule_Template(t *testing.T) {
|
||||||
f := func(rule *AlertingRule, metrics []datasource.Metric, isResponsePartial bool, alertsExpected map[uint64]*notifier.Alert) {
|
f := func(rule *AlertingRule, metrics []datasource.Metric, alertsExpected map[uint64]*notifier.Alert) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
fakeGroup := Group{
|
fakeGroup := Group{
|
||||||
@@ -1132,7 +1135,6 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
entries: make([]StateEntry, 10),
|
entries: make([]StateEntry, 10),
|
||||||
}
|
}
|
||||||
fq.Add(metrics...)
|
fq.Add(metrics...)
|
||||||
fq.SetPartialResponse(isResponsePartial)
|
|
||||||
|
|
||||||
if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
||||||
t.Fatalf("unexpected error: %s", err)
|
t.Fatalf("unexpected error: %s", err)
|
||||||
@@ -1163,7 +1165,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
}, []datasource.Metric{
|
}, []datasource.Metric{
|
||||||
metricWithValueAndLabels(t, 1, "instance", "foo"),
|
metricWithValueAndLabels(t, 1, "instance", "foo"),
|
||||||
metricWithValueAndLabels(t, 1, "instance", "bar"),
|
metricWithValueAndLabels(t, 1, "instance", "bar"),
|
||||||
}, false, map[uint64]*notifier.Alert{
|
}, map[uint64]*notifier.Alert{
|
||||||
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
|
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
"summary": `common: Too high connection number for "foo"`,
|
"summary": `common: Too high connection number for "foo"`,
|
||||||
@@ -1192,14 +1194,14 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
"instance": "{{ $labels.instance }}",
|
"instance": "{{ $labels.instance }}",
|
||||||
},
|
},
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}".{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
|
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}"`,
|
||||||
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
|
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
|
||||||
},
|
},
|
||||||
alerts: make(map[uint64]*notifier.Alert),
|
alerts: make(map[uint64]*notifier.Alert),
|
||||||
}, []datasource.Metric{
|
}, []datasource.Metric{
|
||||||
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
|
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
|
||||||
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
|
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
|
||||||
}, false, map[uint64]*notifier.Alert{
|
}, map[uint64]*notifier.Alert{
|
||||||
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
|
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
alertNameLabel: "override label",
|
alertNameLabel: "override label",
|
||||||
@@ -1207,7 +1209,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
"instance": "foo",
|
"instance": "foo",
|
||||||
},
|
},
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
"summary": `first: Too high connection number for "foo".`,
|
"summary": `first: Too high connection number for "foo"`,
|
||||||
"description": `override: It is 2 connections for "foo"`,
|
"description": `override: It is 2 connections for "foo"`,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -1218,7 +1220,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
"instance": "bar",
|
"instance": "bar",
|
||||||
},
|
},
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
"summary": `second: Too high connection number for "bar".`,
|
"summary": `second: Too high connection number for "bar"`,
|
||||||
"description": `override: It is 10 connections for "bar"`,
|
"description": `override: It is 10 connections for "bar"`,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -1231,7 +1233,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
"instance": "{{ $labels.instance }}",
|
"instance": "{{ $labels.instance }}",
|
||||||
},
|
},
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}.{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
|
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
|
||||||
},
|
},
|
||||||
alerts: make(map[uint64]*notifier.Alert),
|
alerts: make(map[uint64]*notifier.Alert),
|
||||||
}, []datasource.Metric{
|
}, []datasource.Metric{
|
||||||
@@ -1239,7 +1241,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
alertNameLabel, "originAlertname",
|
alertNameLabel, "originAlertname",
|
||||||
alertGroupNameLabel, "originGroupname",
|
alertGroupNameLabel, "originGroupname",
|
||||||
"instance", "foo"),
|
"instance", "foo"),
|
||||||
}, true, map[uint64]*notifier.Alert{
|
}, map[uint64]*notifier.Alert{
|
||||||
hash(map[string]string{
|
hash(map[string]string{
|
||||||
alertNameLabel: "OriginLabels",
|
alertNameLabel: "OriginLabels",
|
||||||
"exported_alertname": "originAlertname",
|
"exported_alertname": "originAlertname",
|
||||||
@@ -1255,7 +1257,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
"instance": "foo",
|
"instance": "foo",
|
||||||
},
|
},
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
"summary": `Alert "originAlertname(originGroupname)" for instance foo. WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.`,
|
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
@@ -1370,10 +1372,8 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
|||||||
|
|
||||||
ar := &AlertingRule{
|
ar := &AlertingRule{
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
"instance": "override", // this should override instance with new value
|
"instance": "override", // this should override instance with new value
|
||||||
"group": "vmalert", // this shouldn't have effect since value in metric is equal
|
"group": "vmalert", // this shouldn't have effect since value in metric is equal
|
||||||
"invalid_label": "{{ .Values.mustRuntimeFail }}",
|
|
||||||
"empty_label": "", // this should be dropped
|
|
||||||
},
|
},
|
||||||
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
|
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
|
||||||
Name: "AlertingRulesError",
|
Name: "AlertingRulesError",
|
||||||
@@ -1381,11 +1381,10 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
expectedOriginLabels := map[string]string{
|
expectedOriginLabels := map[string]string{
|
||||||
"instance": "0.0.0.0:8800",
|
"instance": "0.0.0.0:8800",
|
||||||
"group": "vmalert",
|
"group": "vmalert",
|
||||||
"alertname": "ConfigurationReloadFailure",
|
"alertname": "ConfigurationReloadFailure",
|
||||||
"alertgroup": "vmalert",
|
"alertgroup": "vmalert",
|
||||||
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
expectedProcessedLabels := map[string]string{
|
expectedProcessedLabels := map[string]string{
|
||||||
@@ -1395,12 +1394,11 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
|||||||
"exported_alertname": "ConfigurationReloadFailure",
|
"exported_alertname": "ConfigurationReloadFailure",
|
||||||
"group": "vmalert",
|
"group": "vmalert",
|
||||||
"alertgroup": "vmalert",
|
"alertgroup": "vmalert",
|
||||||
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ls, err := ar.toLabels(metric, nil)
|
ls, err := ar.toLabels(metric, nil)
|
||||||
if err == nil || !strings.Contains(err.Error(), "error evaluating template") {
|
if err != nil {
|
||||||
t.Fatalf("unexpected error %q", err.Error())
|
t.Fatalf("unexpected error: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
|
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
|
||||||
@@ -1431,50 +1429,3 @@ func TestAlertingRuleExec_Partial(t *testing.T) {
|
|||||||
t.Fatalf("unexpected error: %s", err)
|
t.Fatalf("unexpected error: %s", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAlertingRule_QueryTemplateInLabels(t *testing.T) {
|
|
||||||
fq := &datasource.FakeQuerier{}
|
|
||||||
fakeGroup := Group{
|
|
||||||
Name: "TestQueryTemplateInLabels",
|
|
||||||
}
|
|
||||||
|
|
||||||
ar := &AlertingRule{
|
|
||||||
Name: "test_alert",
|
|
||||||
Labels: map[string]string{
|
|
||||||
"suppress_for_mass_alert": `{{ if (printf "ALERTS{alertname='SomeAlert', alertstate='firing', device='%s'} == 1" $labels.device | query) }}true{{ else }}false{{ end }}`,
|
|
||||||
},
|
|
||||||
Annotations: map[string]string{
|
|
||||||
"summary": "Test alert with query template in labels",
|
|
||||||
},
|
|
||||||
alerts: make(map[uint64]*notifier.Alert),
|
|
||||||
}
|
|
||||||
ar.GroupID = fakeGroup.GetID()
|
|
||||||
ar.q = fq
|
|
||||||
ar.state = &ruleState{
|
|
||||||
entries: make([]StateEntry, 10),
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add a metric that should trigger the alert
|
|
||||||
fq.Add(metricWithValueAndLabels(t, 1, "device", "sda1"))
|
|
||||||
|
|
||||||
ts := time.Now()
|
|
||||||
_, err := ar.exec(context.TODO(), ts, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error with query template in labels: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify that the alert was created and the query template was executed
|
|
||||||
if len(ar.alerts) != 1 {
|
|
||||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
|
||||||
}
|
|
||||||
|
|
||||||
alert := ar.GetAlerts()[0]
|
|
||||||
suppressLabel, exists := alert.Labels["suppress_for_mass_alert"]
|
|
||||||
if !exists {
|
|
||||||
t.Fatalf("expected 'suppress_for_mass_alert' label to exist")
|
|
||||||
}
|
|
||||||
// The query template should have been executed (even if it returns false due to mock data)
|
|
||||||
if suppressLabel != "true" && suppressLabel != "false" {
|
|
||||||
t.Fatalf("expected 'suppress_for_mass_alert' label to be 'true' or 'false', got '%s'", suppressLabel)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -2,11 +2,11 @@ package rule
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash/fnv"
|
"hash/fnv"
|
||||||
"maps"
|
|
||||||
"net/url"
|
"net/url"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -25,14 +25,10 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ruleResultsLimit = flag.Int("rule.resultsLimit", 0, "Limits the number of alerts or recording results a single rule can produce. "+
|
|
||||||
"Can be overridden by the limit option under group if specified. "+
|
|
||||||
"If exceeded, the rule will be marked with an error and all its results will be discarded. "+
|
|
||||||
"0 means no limit.")
|
|
||||||
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
||||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
||||||
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier.")
|
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
|
||||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
|
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
|
||||||
"which by default is 4 times evaluationInterval of the parent group")
|
"which by default is 4 times evaluationInterval of the parent group")
|
||||||
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+
|
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+
|
||||||
"Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+
|
"Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+
|
||||||
@@ -40,8 +36,6 @@ var (
|
|||||||
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
||||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+
|
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+
|
||||||
"For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
"For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||||
maxStartDelay = flag.Duration("group.maxStartDelay", 5*time.Minute, "Defines the max delay before starting the group evaluation. Group's start is artificially delayed for random duration on interval"+
|
|
||||||
" [0..min(--group.maxStartDelay, group.interval)]. This helps smoothing out the load on the configured datasource, so evaluations aren't executed too close to each other.")
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Group is an entity for grouping rules
|
// Group is an entity for grouping rules
|
||||||
@@ -98,7 +92,9 @@ type groupMetrics struct {
|
|||||||
// set2 has priority over set1.
|
// set2 has priority over set1.
|
||||||
func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string {
|
func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string {
|
||||||
r := map[string]string{}
|
r := map[string]string{}
|
||||||
maps.Copy(r, set1)
|
for k, v := range set1 {
|
||||||
|
r[k] = v
|
||||||
|
}
|
||||||
for k, v := range set2 {
|
for k, v := range set2 {
|
||||||
if prevV, ok := r[k]; ok {
|
if prevV, ok := r[k]; ok {
|
||||||
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
|
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
|
||||||
@@ -116,6 +112,7 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
|||||||
Name: cfg.Name,
|
Name: cfg.Name,
|
||||||
File: cfg.File,
|
File: cfg.File,
|
||||||
Interval: cfg.Interval.Duration(),
|
Interval: cfg.Interval.Duration(),
|
||||||
|
Limit: cfg.Limit,
|
||||||
Concurrency: cfg.Concurrency,
|
Concurrency: cfg.Concurrency,
|
||||||
checksum: cfg.Checksum,
|
checksum: cfg.Checksum,
|
||||||
Params: cfg.Params,
|
Params: cfg.Params,
|
||||||
@@ -132,11 +129,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
|||||||
if g.Interval == 0 {
|
if g.Interval == 0 {
|
||||||
g.Interval = defaultInterval
|
g.Interval = defaultInterval
|
||||||
}
|
}
|
||||||
if cfg.Limit != nil {
|
|
||||||
g.Limit = *cfg.Limit
|
|
||||||
} else {
|
|
||||||
g.Limit = *ruleResultsLimit
|
|
||||||
}
|
|
||||||
if g.Concurrency < 1 {
|
if g.Concurrency < 1 {
|
||||||
g.Concurrency = 1
|
g.Concurrency = 1
|
||||||
}
|
}
|
||||||
@@ -297,7 +289,7 @@ func (g *Group) InterruptEval() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close stops the group and its rules, unregisters group metrics
|
// Close stops the group and it's rules, unregisters group metrics
|
||||||
func (g *Group) Close() {
|
func (g *Group) Close() {
|
||||||
if g.doneCh == nil {
|
if g.doneCh == nil {
|
||||||
return
|
return
|
||||||
@@ -306,6 +298,10 @@ func (g *Group) Close() {
|
|||||||
g.InterruptEval()
|
g.InterruptEval()
|
||||||
<-g.finishedCh
|
<-g.finishedCh
|
||||||
|
|
||||||
|
g.closeGroupMetrics()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *Group) closeGroupMetrics() {
|
||||||
metrics.UnregisterSet(g.metrics.set, true)
|
metrics.UnregisterSet(g.metrics.set, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -331,13 +327,13 @@ func (g *Group) Init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Start starts group's evaluation
|
// Start starts group's evaluation
|
||||||
func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
|
func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
|
||||||
defer func() { close(g.finishedCh) }()
|
defer func() { close(g.finishedCh) }()
|
||||||
evalTS := time.Now()
|
evalTS := time.Now()
|
||||||
// sleep random duration to spread group rules evaluation
|
// sleep random duration to spread group rules evaluation
|
||||||
// over maxStartDelay to reduce the load on datasource.
|
// over time in order to reduce load on datasource.
|
||||||
if !SkipRandSleepOnGroupStart {
|
if !SkipRandSleepOnGroupStart {
|
||||||
sleepBeforeStart := g.delayBeforeStart(evalTS, *maxStartDelay)
|
sleepBeforeStart := delayBeforeStart(evalTS, g.GetID(), g.Interval, g.EvalOffset)
|
||||||
g.infof("will start in %v", sleepBeforeStart)
|
g.infof("will start in %v", sleepBeforeStart)
|
||||||
|
|
||||||
sleepTimer := time.NewTimer(sleepBeforeStart)
|
sleepTimer := time.NewTimer(sleepBeforeStart)
|
||||||
@@ -369,12 +365,13 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
|||||||
|
|
||||||
e := &executor{
|
e := &executor{
|
||||||
Rw: rw,
|
Rw: rw,
|
||||||
|
Notifiers: nts,
|
||||||
notifierHeaders: g.NotifierHeaders,
|
notifierHeaders: g.NotifierHeaders,
|
||||||
}
|
}
|
||||||
|
|
||||||
g.infof("started")
|
g.infof("started")
|
||||||
|
|
||||||
eval := func(ctx context.Context, ts time.Time) time.Time {
|
eval := func(ctx context.Context, ts time.Time) {
|
||||||
g.metrics.iterationTotal.Inc()
|
g.metrics.iterationTotal.Inc()
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
@@ -382,7 +379,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
|||||||
if len(g.Rules) < 1 {
|
if len(g.Rules) < 1 {
|
||||||
g.metrics.iterationDuration.UpdateDuration(start)
|
g.metrics.iterationDuration.UpdateDuration(start)
|
||||||
g.LastEvaluation = start
|
g.LastEvaluation = start
|
||||||
return ts
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
|
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
|
||||||
@@ -396,7 +393,6 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
|||||||
}
|
}
|
||||||
g.metrics.iterationDuration.UpdateDuration(start)
|
g.metrics.iterationDuration.UpdateDuration(start)
|
||||||
g.LastEvaluation = start
|
g.LastEvaluation = start
|
||||||
return ts
|
|
||||||
}
|
}
|
||||||
|
|
||||||
evalCtx, cancel := context.WithCancel(ctx)
|
evalCtx, cancel := context.WithCancel(ctx)
|
||||||
@@ -405,7 +401,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
|||||||
g.mu.Unlock()
|
g.mu.Unlock()
|
||||||
defer g.evalCancel()
|
defer g.evalCancel()
|
||||||
|
|
||||||
realEvalTS := eval(evalCtx, evalTS)
|
eval(evalCtx, evalTS)
|
||||||
|
|
||||||
t := time.NewTicker(g.Interval)
|
t := time.NewTicker(g.Interval)
|
||||||
defer t.Stop()
|
defer t.Stop()
|
||||||
@@ -413,7 +409,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
|||||||
// restore the rules state after the first evaluation
|
// restore the rules state after the first evaluation
|
||||||
// so only active alerts can be restored.
|
// so only active alerts can be restored.
|
||||||
if rr != nil {
|
if rr != nil {
|
||||||
err := g.restore(ctx, rr, realEvalTS, *remoteReadLookBack)
|
err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
|
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
|
||||||
}
|
}
|
||||||
@@ -476,28 +472,32 @@ func (g *Group) UpdateWith(newGroup *Group) {
|
|||||||
g.updateCh <- newGroup
|
g.updateCh <- newGroup
|
||||||
}
|
}
|
||||||
|
|
||||||
// delayBeforeStart returns duration for delaying the evaluation start
|
// DeepCopy returns a deep copy of group
|
||||||
// based on given ts and Group settings. The delay can't exceed maxDelay.
|
func (g *Group) DeepCopy() *Group {
|
||||||
// maxDelay is ignored if g.EvalOffset != nil.
|
g.mu.RLock()
|
||||||
//
|
data, _ := json.Marshal(g)
|
||||||
// Delaying is important to smooth out the load on the datasource when all groups start at the same time.
|
g.mu.RUnlock()
|
||||||
// delayBeforeStart calculates delay based on Group ID, so all groups will start at different moments of time.
|
newG := Group{}
|
||||||
func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Duration {
|
_ = json.Unmarshal(data, &newG)
|
||||||
if g.EvalOffset != nil {
|
newG.Rules = g.Rules
|
||||||
// if offset is specified, ignore the maxDelay and return a duration aligned with offset
|
newG.id = g.id
|
||||||
currentOffsetPoint := ts.Truncate(g.Interval).Add(*g.EvalOffset)
|
return &newG
|
||||||
|
}
|
||||||
|
|
||||||
|
// if offset is specified, delayBeforeStart returns a duration to help aligning timestamp with offset;
|
||||||
|
// otherwise, it returns a random duration between [0..interval] based on group key.
|
||||||
|
func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *time.Duration) time.Duration {
|
||||||
|
if offset != nil {
|
||||||
|
currentOffsetPoint := ts.Truncate(interval).Add(*offset)
|
||||||
if currentOffsetPoint.Before(ts) {
|
if currentOffsetPoint.Before(ts) {
|
||||||
// wait until the next offset point
|
// wait until the next offset point
|
||||||
return currentOffsetPoint.Add(g.Interval).Sub(ts)
|
return currentOffsetPoint.Add(interval).Sub(ts)
|
||||||
}
|
}
|
||||||
return currentOffsetPoint.Sub(ts)
|
return currentOffsetPoint.Sub(ts)
|
||||||
}
|
}
|
||||||
|
|
||||||
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
|
|
||||||
// artificially limit interval, so groups with big intervals could start sooner.
|
|
||||||
interval := min(g.Interval, maxDelay)
|
|
||||||
var randSleep time.Duration
|
var randSleep time.Duration
|
||||||
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64)))
|
randSleep = time.Duration(float64(interval) * (float64(key) / (1 << 64)))
|
||||||
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
|
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
|
||||||
if randSleep < sleepOffset {
|
if randSleep < sleepOffset {
|
||||||
randSleep += interval
|
randSleep += interval
|
||||||
@@ -559,13 +559,15 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
|
|||||||
if !disableProgressBar {
|
if !disableProgressBar {
|
||||||
bar = pb.StartNew(iterations * len(g.Rules))
|
bar = pb.StartNew(iterations * len(g.Rules))
|
||||||
}
|
}
|
||||||
for i := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
rule := g.Rules[i]
|
|
||||||
sem <- struct{}{}
|
sem <- struct{}{}
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
res <- replayRuleRange(rule, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
|
go func(r Rule, ri rangeIterator) {
|
||||||
|
// pass ri as a copy, so it can be modified within the replayRuleRange
|
||||||
|
res <- replayRuleRange(r, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
|
||||||
<-sem
|
<-sem
|
||||||
})
|
wg.Done()
|
||||||
|
}(r, ri)
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
@@ -595,10 +597,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
|
|||||||
res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1)
|
res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1)
|
||||||
for ri.next() {
|
for ri.next() {
|
||||||
sem <- struct{}{}
|
sem <- struct{}{}
|
||||||
start := ri.s
|
wg.Add(1)
|
||||||
end := ri.e
|
|
||||||
wg.Go(func() {
|
go func(s, e time.Time) {
|
||||||
n, err := replayRule(r, start, end, rw, replayRuleRetryAttempts)
|
n, err := replayRule(r, s, e, rw, replayRuleRetryAttempts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Fatalf("rule %q: %s", r, err)
|
logger.Fatalf("rule %q: %s", r, err)
|
||||||
}
|
}
|
||||||
@@ -607,7 +609,8 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
|
|||||||
}
|
}
|
||||||
res <- n
|
res <- n
|
||||||
<-sem
|
<-sem
|
||||||
})
|
wg.Done()
|
||||||
|
}(ri.s, ri.e)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
close(res)
|
close(res)
|
||||||
@@ -621,9 +624,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ExecOnce evaluates all the rules under group for once with given timestamp.
|
// ExecOnce evaluates all the rules under group for once with given timestamp.
|
||||||
func (g *Group) ExecOnce(ctx context.Context, rw remotewrite.RWClient, evalTS time.Time) chan error {
|
func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
|
||||||
e := &executor{
|
e := &executor{
|
||||||
Rw: rw,
|
Rw: rw,
|
||||||
|
Notifiers: nts,
|
||||||
notifierHeaders: g.NotifierHeaders,
|
notifierHeaders: g.NotifierHeaders,
|
||||||
}
|
}
|
||||||
if len(g.Rules) < 1 {
|
if len(g.Rules) < 1 {
|
||||||
@@ -698,6 +702,7 @@ func (g *Group) getEvalDelay() time.Duration {
|
|||||||
|
|
||||||
// executor contains group's notify and rw configs
|
// executor contains group's notify and rw configs
|
||||||
type executor struct {
|
type executor struct {
|
||||||
|
Notifiers func() []notifier.Notifier
|
||||||
notifierHeaders map[string]string
|
notifierHeaders map[string]string
|
||||||
|
|
||||||
Rw remotewrite.RWClient
|
Rw remotewrite.RWClient
|
||||||
@@ -718,13 +723,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
|
|||||||
sem := make(chan struct{}, concurrency)
|
sem := make(chan struct{}, concurrency)
|
||||||
go func() {
|
go func() {
|
||||||
wg := sync.WaitGroup{}
|
wg := sync.WaitGroup{}
|
||||||
for i := range rules {
|
for _, r := range rules {
|
||||||
rule := rules[i]
|
|
||||||
sem <- struct{}{}
|
sem <- struct{}{}
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
res <- e.exec(ctx, rule, ts, resolveDuration, limit)
|
go func(r Rule) {
|
||||||
|
res <- e.exec(ctx, r, ts, resolveDuration, limit)
|
||||||
<-sem
|
<-sem
|
||||||
})
|
wg.Done()
|
||||||
|
}(r)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
close(res)
|
close(res)
|
||||||
@@ -753,7 +759,6 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
|||||||
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
|
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var errG vmalertutil.ErrGroup
|
|
||||||
if e.Rw != nil {
|
if e.Rw != nil {
|
||||||
pushToRW := func(tss []prompb.TimeSeries) error {
|
pushToRW := func(tss []prompb.TimeSeries) error {
|
||||||
var lastErr error
|
var lastErr error
|
||||||
@@ -765,26 +770,31 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
|||||||
return lastErr
|
return lastErr
|
||||||
}
|
}
|
||||||
if err := pushToRW(tss); err != nil {
|
if err := pushToRW(tss); err != nil {
|
||||||
errG.Add(err)
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ar, ok := r.(*AlertingRule)
|
ar, ok := r.(*AlertingRule)
|
||||||
if !ok {
|
if !ok {
|
||||||
return errG.Err()
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
alerts := ar.alertsToSend(resolveDuration, *resendDelay)
|
alerts := ar.alertsToSend(resolveDuration, *resendDelay)
|
||||||
if len(alerts) < 1 {
|
if len(alerts) < 1 {
|
||||||
return errG.Err()
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
notifierErr := notifier.Send(ctx, alerts, e.notifierHeaders)
|
wg := sync.WaitGroup{}
|
||||||
for err := range notifierErr {
|
errGr := new(vmalertutil.ErrGroup)
|
||||||
if err != nil {
|
for _, nt := range e.Notifiers() {
|
||||||
errG.Add(fmt.Errorf("rule %q: notifier failure: %w", r, err))
|
wg.Add(1)
|
||||||
}
|
go func(nt notifier.Notifier) {
|
||||||
|
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
|
||||||
|
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
|
||||||
|
}
|
||||||
|
wg.Done()
|
||||||
|
}(nt)
|
||||||
}
|
}
|
||||||
|
wg.Wait()
|
||||||
return errG.Err()
|
return errGr.Err()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -262,7 +262,7 @@ func TestUpdateDuringRandSleep(t *testing.T) {
|
|||||||
updateCh: make(chan *Group),
|
updateCh: make(chan *Group),
|
||||||
}
|
}
|
||||||
g.Init()
|
g.Init()
|
||||||
go g.Start(context.Background(), nil, nil)
|
go g.Start(context.Background(), nil, nil, nil)
|
||||||
|
|
||||||
rule1 := AlertingRule{
|
rule1 := AlertingRule{
|
||||||
Name: "jobDown",
|
Name: "jobDown",
|
||||||
@@ -346,8 +346,7 @@ func TestGroupStart(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fs := &datasource.FakeQuerier{}
|
fs := &datasource.FakeQuerier{}
|
||||||
fn, cleanup := notifier.InitFakeNotifier()
|
fn := ¬ifier.FakeNotifier{}
|
||||||
defer cleanup()
|
|
||||||
|
|
||||||
const evalInterval = time.Millisecond
|
const evalInterval = time.Millisecond
|
||||||
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
|
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
|
||||||
@@ -396,7 +395,7 @@ func TestGroupStart(t *testing.T) {
|
|||||||
fs.Add(m2)
|
fs.Add(m2)
|
||||||
g.Init()
|
g.Init()
|
||||||
go func() {
|
go func() {
|
||||||
g.Start(context.Background(), nil, fs)
|
g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
|
||||||
close(finished)
|
close(finished)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
@@ -405,8 +404,7 @@ func TestGroupStart(t *testing.T) {
|
|||||||
|
|
||||||
var cur uint64
|
var cur uint64
|
||||||
prev := g.metrics.iterationTotal.Get()
|
prev := g.metrics.iterationTotal.Get()
|
||||||
i := 0
|
for i := 0; ; i++ {
|
||||||
for {
|
|
||||||
if i > 40 {
|
if i > 40 {
|
||||||
t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i)
|
t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i)
|
||||||
}
|
}
|
||||||
@@ -415,7 +413,6 @@ func TestGroupStart(t *testing.T) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
time.Sleep(interval)
|
time.Sleep(interval)
|
||||||
i++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -475,10 +472,15 @@ func TestFaultyNotifier(t *testing.T) {
|
|||||||
r := newTestAlertingRule("instant", 0)
|
r := newTestAlertingRule("instant", 0)
|
||||||
r.q = fq
|
r.q = fq
|
||||||
|
|
||||||
fn, cleanup := notifier.InitFakeNotifier()
|
fn := ¬ifier.FakeNotifier{}
|
||||||
defer cleanup()
|
e := &executor{
|
||||||
|
Notifiers: func() []notifier.Notifier {
|
||||||
e := &executor{}
|
return []notifier.Notifier{
|
||||||
|
¬ifier.FaultyNotifier{},
|
||||||
|
fn,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
delay := 5 * time.Second
|
delay := 5 * time.Second
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), delay)
|
ctx, cancel := context.WithTimeout(context.Background(), delay)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
@@ -551,7 +553,7 @@ func TestCloseWithEvalInterruption(t *testing.T) {
|
|||||||
g := NewGroup(groups[0], fq, evalInterval, nil)
|
g := NewGroup(groups[0], fq, evalInterval, nil)
|
||||||
g.Init()
|
g.Init()
|
||||||
|
|
||||||
go g.Start(context.Background(), nil, nil)
|
go g.Start(context.Background(), nil, nil, nil)
|
||||||
|
|
||||||
time.Sleep(evalInterval * 20)
|
time.Sleep(evalInterval * 20)
|
||||||
|
|
||||||
@@ -569,10 +571,9 @@ func TestCloseWithEvalInterruption(t *testing.T) {
|
|||||||
|
|
||||||
func TestGroupStartDelay(t *testing.T) {
|
func TestGroupStartDelay(t *testing.T) {
|
||||||
g := &Group{}
|
g := &Group{}
|
||||||
g.id = uint64(math.MaxUint64 / 10)
|
|
||||||
// interval of 5min and key generate a static delay of 30s
|
// interval of 5min and key generate a static delay of 30s
|
||||||
g.Interval = time.Minute * 5
|
g.Interval = time.Minute * 5
|
||||||
maxDelay := time.Minute * 5
|
key := uint64(math.MaxUint64 / 10)
|
||||||
|
|
||||||
f := func(atS, expS string) {
|
f := func(atS, expS string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
@@ -584,7 +585,7 @@ func TestGroupStartDelay(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
delay := g.delayBeforeStart(at, maxDelay)
|
delay := delayBeforeStart(at, key, g.Interval, g.EvalOffset)
|
||||||
gotStart := at.Add(delay)
|
gotStart := at.Add(delay)
|
||||||
if expTS != gotStart {
|
if expTS != gotStart {
|
||||||
t.Fatalf("expected to get %v; got %v instead", expTS, gotStart)
|
t.Fatalf("expected to get %v; got %v instead", expTS, gotStart)
|
||||||
@@ -605,15 +606,6 @@ func TestGroupStartDelay(t *testing.T) {
|
|||||||
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
|
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
|
||||||
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||||
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||||
|
|
||||||
maxDelay = time.Minute * 1
|
|
||||||
g.EvalOffset = nil
|
|
||||||
|
|
||||||
// test group with maxDelay, and offset disabled
|
|
||||||
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:06.000+00:00")
|
|
||||||
f("2023-01-01T00:00:01.000+00:00", "2023-01-01T00:00:06.000+00:00")
|
|
||||||
f("2023-01-01T00:00:06.100+00:00", "2023-01-01T00:01:06.000+00:00")
|
|
||||||
f("2023-01-01T00:00:11.000+00:00", "2023-01-01T00:01:06.000+00:00")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetPrometheusReqTimestamp(t *testing.T) {
|
func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package rule
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -82,37 +81,6 @@ func (rr *RecordingRule) ID() uint64 {
|
|||||||
return rr.RuleID
|
return rr.RuleID
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToAPI returns ApiRule representation of rr
|
|
||||||
func (rr *RecordingRule) ToAPI() ApiRule {
|
|
||||||
state := rr.state
|
|
||||||
lastState := state.getLast()
|
|
||||||
r := ApiRule{
|
|
||||||
Type: TypeRecording,
|
|
||||||
DatasourceType: rr.Type.String(),
|
|
||||||
Name: rr.Name,
|
|
||||||
Query: rr.Expr,
|
|
||||||
Labels: rr.Labels,
|
|
||||||
LastEvaluation: lastState.Time,
|
|
||||||
EvaluationTime: lastState.Duration.Seconds(),
|
|
||||||
Health: "ok",
|
|
||||||
LastSamples: lastState.Samples,
|
|
||||||
LastSeriesFetched: lastState.SeriesFetched,
|
|
||||||
MaxUpdates: state.size(),
|
|
||||||
Updates: state.getAll(),
|
|
||||||
|
|
||||||
// encode as strings to avoid rounding
|
|
||||||
ID: fmt.Sprintf("%d", rr.ID()),
|
|
||||||
GroupID: fmt.Sprintf("%d", rr.GroupID),
|
|
||||||
GroupName: rr.GroupName,
|
|
||||||
File: rr.File,
|
|
||||||
}
|
|
||||||
if lastState.Err != nil {
|
|
||||||
r.LastError = lastState.Err.Error()
|
|
||||||
r.Health = "err"
|
|
||||||
}
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewRecordingRule creates a new RecordingRule
|
// NewRecordingRule creates a new RecordingRule
|
||||||
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
|
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
|
||||||
debug := group.Debug
|
debug := group.Debug
|
||||||
@@ -198,7 +166,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
|
|||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
rr.state.add(curState)
|
rr.state.add(curState)
|
||||||
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
|
if curState.Err != nil {
|
||||||
rr.metrics.errors.Inc()
|
rr.metrics.errors.Inc()
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@@ -237,8 +205,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
|
|||||||
Labels: stringToLabels(k),
|
Labels: stringToLabels(k),
|
||||||
Samples: []prompb.Sample{
|
Samples: []prompb.Sample{
|
||||||
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
|
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
|
||||||
},
|
}})
|
||||||
})
|
|
||||||
}
|
}
|
||||||
rr.lastEvaluation = curEvaluation
|
rr.lastEvaluation = curEvaluation
|
||||||
return tss, nil
|
return tss, nil
|
||||||
@@ -293,11 +260,6 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompb.TimeSeries {
|
|||||||
}
|
}
|
||||||
// add extra labels configured by user
|
// add extra labels configured by user
|
||||||
for k := range rr.Labels {
|
for k := range rr.Labels {
|
||||||
// do not add label with empty value, since it has no meaning.
|
|
||||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
|
|
||||||
if rr.Labels[k] == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
existingLabel := promrelabel.GetLabelByName(m.Labels, k)
|
existingLabel := promrelabel.GetLabelByName(m.Labels, k)
|
||||||
if existingLabel != nil { // there is a conflict between extra and existing label
|
if existingLabel != nil { // there is a conflict between extra and existing label
|
||||||
if existingLabel.Value == rr.Labels[k] {
|
if existingLabel.Value == rr.Labels[k] {
|
||||||
|
|||||||
@@ -21,8 +21,6 @@ type Rule interface {
|
|||||||
// ID returns unique ID that may be used for
|
// ID returns unique ID that may be used for
|
||||||
// identifying this Rule among others.
|
// identifying this Rule among others.
|
||||||
ID() uint64
|
ID() uint64
|
||||||
// ToAPI returns ApiRule representation of Rule
|
|
||||||
ToAPI() ApiRule
|
|
||||||
// exec executes the rule with given context at the given timestamp and limit.
|
// exec executes the rule with given context at the given timestamp and limit.
|
||||||
// returns an err if number of resulting time series exceeds the limit.
|
// returns an err if number of resulting time series exceeds the limit.
|
||||||
exec(ctx context.Context, ts time.Time, limit int) ([]prompb.TimeSeries, error)
|
exec(ctx context.Context, ts time.Time, limit int) ([]prompb.TimeSeries, error)
|
||||||
@@ -70,6 +68,39 @@ type StateEntry struct {
|
|||||||
Curl string `json:"curl"`
|
Curl string `json:"curl"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetLastEntry returns latest stateEntry of rule
|
||||||
|
func GetLastEntry(r Rule) StateEntry {
|
||||||
|
if rule, ok := r.(*AlertingRule); ok {
|
||||||
|
return rule.state.getLast()
|
||||||
|
}
|
||||||
|
if rule, ok := r.(*RecordingRule); ok {
|
||||||
|
return rule.state.getLast()
|
||||||
|
}
|
||||||
|
return StateEntry{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetRuleStateSize returns size of rule stateEntry
|
||||||
|
func GetRuleStateSize(r Rule) int {
|
||||||
|
if rule, ok := r.(*AlertingRule); ok {
|
||||||
|
return rule.state.size()
|
||||||
|
}
|
||||||
|
if rule, ok := r.(*RecordingRule); ok {
|
||||||
|
return rule.state.size()
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAllRuleState returns rule entire stateEntries
|
||||||
|
func GetAllRuleState(r Rule) []StateEntry {
|
||||||
|
if rule, ok := r.(*AlertingRule); ok {
|
||||||
|
return rule.state.getAll()
|
||||||
|
}
|
||||||
|
if rule, ok := r.(*RecordingRule); ok {
|
||||||
|
return rule.state.getAll()
|
||||||
|
}
|
||||||
|
return []StateEntry{}
|
||||||
|
}
|
||||||
|
|
||||||
func (s *ruleState) size() int {
|
func (s *ruleState) size() int {
|
||||||
s.RLock()
|
s.RLock()
|
||||||
defer s.RUnlock()
|
defer s.RUnlock()
|
||||||
@@ -121,7 +152,7 @@ func (s *ruleState) add(e StateEntry) {
|
|||||||
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
|
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
|
||||||
var err error
|
var err error
|
||||||
var tss []prompb.TimeSeries
|
var tss []prompb.TimeSeries
|
||||||
for i := range replayRuleRetryAttempts {
|
for i := 0; i < replayRuleRetryAttempts; i++ {
|
||||||
tss, err = r.execRange(context.Background(), start, end)
|
tss, err = r.execRange(context.Background(), start, end)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
break
|
break
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ func TestRule_state(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var last time.Time
|
var last time.Time
|
||||||
for range stateEntriesN * 2 {
|
for i := 0; i < stateEntriesN*2; i++ {
|
||||||
last = time.Now()
|
last = time.Now()
|
||||||
r.state.add(StateEntry{At: last})
|
r.state.add(StateEntry{At: last})
|
||||||
}
|
}
|
||||||
@@ -65,15 +65,17 @@ func TestRule_stateConcurrent(_ *testing.T) {
|
|||||||
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
|
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
|
||||||
const workers = 50
|
const workers = 50
|
||||||
const iterations = 100
|
const iterations = 100
|
||||||
var wg sync.WaitGroup
|
wg := sync.WaitGroup{}
|
||||||
for range workers {
|
wg.Add(workers)
|
||||||
wg.Go(func() {
|
for i := 0; i < workers; i++ {
|
||||||
for range iterations {
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for i := 0; i < iterations; i++ {
|
||||||
r.state.add(StateEntry{At: time.Now()})
|
r.state.add(StateEntry{At: time.Now()})
|
||||||
r.state.getAll()
|
r.state.getAll()
|
||||||
r.state.getLast()
|
r.state.getLast()
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,13 +19,13 @@ func CompareRules(t *testing.T, a, b Rule) error {
|
|||||||
case *AlertingRule:
|
case *AlertingRule:
|
||||||
br, ok := b.(*AlertingRule)
|
br, ok := b.(*AlertingRule)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("rule %d supposed to be of type AlertingRule", b.ID())
|
return fmt.Errorf("rule %q supposed to be of type AlertingRule", b.ID())
|
||||||
}
|
}
|
||||||
return compareAlertingRules(t, v, br)
|
return compareAlertingRules(t, v, br)
|
||||||
case *RecordingRule:
|
case *RecordingRule:
|
||||||
br, ok := b.(*RecordingRule)
|
br, ok := b.(*RecordingRule)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("rule %d supposed to be of type RecordingRule", b.ID())
|
return fmt.Errorf("rule %q supposed to be of type RecordingRule", b.ID())
|
||||||
}
|
}
|
||||||
return compareRecordingRules(t, v, br)
|
return compareRecordingRules(t, v, br)
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -34,12 +34,11 @@ body {
|
|||||||
padding-top: 4.5rem;
|
padding-top: 4.5rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.vm-group {
|
.group-items {
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
padding: 5px;
|
padding: 5px;
|
||||||
margin-top: 5px;
|
margin-top: 5px;
|
||||||
position: relative;
|
position: relative;
|
||||||
display: none;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.btn svg, .dropdown-item svg {
|
.btn svg, .dropdown-item svg {
|
||||||
@@ -56,22 +55,14 @@ body {
|
|||||||
height: 38px;
|
height: 38px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.vm-item:not(.vm-found) {
|
.group-items:not(:has(.sub-item:not(.d-none))) {
|
||||||
display: none;
|
display: none !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.vm-group:has(.vm-item:is(.vm-found)), .vm-group:is(.vm-found) {
|
.group-items:hover {
|
||||||
display: flex;
|
|
||||||
}
|
|
||||||
|
|
||||||
.vm-group:hover {
|
|
||||||
background-color: #f8f9fa!important;
|
background-color: #f8f9fa!important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.vm-group:is(.vm-found) .vm-item {
|
|
||||||
display: table-row;
|
|
||||||
}
|
|
||||||
|
|
||||||
.table {
|
.table {
|
||||||
table-layout: fixed;
|
table-layout: fixed;
|
||||||
}
|
}
|
||||||
@@ -120,9 +111,3 @@ textarea.curl-area {
|
|||||||
.w-60 {
|
.w-60 {
|
||||||
width: 60%;
|
width: 60%;
|
||||||
}
|
}
|
||||||
|
|
||||||
.annotations {
|
|
||||||
white-space: pre-wrap;
|
|
||||||
color: gray;
|
|
||||||
word-wrap: break-word;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -65,34 +65,32 @@ function getParamURL(key) {
|
|||||||
return url.searchParams.get(key)
|
return url.searchParams.get(key)
|
||||||
}
|
}
|
||||||
|
|
||||||
function matchText(search, item) {
|
|
||||||
const text = item.innerText.toLowerCase();
|
|
||||||
return text.indexOf(search) >= 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
function filterRules(searchPhrase) {
|
function filterRules(searchPhrase) {
|
||||||
document.querySelectorAll('.vm-group').forEach((group) => {
|
document.querySelectorAll('.sub-items').forEach((rules) => {
|
||||||
if (!searchPhrase) {
|
let found = false;
|
||||||
group.classList.add('vm-found');
|
rules.querySelectorAll('.sub-item').forEach((rule) => {
|
||||||
return;
|
if (searchPhrase) {
|
||||||
}
|
const ruleName = rule.innerText.toLowerCase();
|
||||||
for (const item of group.querySelectorAll('.vm-group-search')) {
|
const matches = []
|
||||||
if (matchText(searchPhrase, item)) {
|
const hasValue = ruleName.indexOf(searchPhrase) >= 0;
|
||||||
group.classList.add('vm-found');
|
rule.querySelectorAll('.label').forEach((label) => {
|
||||||
return;
|
const text = label.innerText.toLowerCase();
|
||||||
|
if (text.indexOf(searchPhrase) >= 0) {
|
||||||
|
matches.push(text);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (!matches.length && !hasValue) {
|
||||||
|
rule.classList.add('d-none');
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
rule.classList.remove('d-none');
|
||||||
group.classList.remove('vm-found');
|
found = true;
|
||||||
for (const item of group.querySelectorAll('.vm-item')) {
|
});
|
||||||
if (matchText(searchPhrase, item)) {
|
if (found && searchPhrase || !searchPhrase) {
|
||||||
item.classList.add('vm-found');
|
rules.classList.remove('d-none');
|
||||||
continue;
|
} else {
|
||||||
}
|
rules.classList.add('d-none');
|
||||||
if (Array.from(item.querySelectorAll('.label')).find(l => matchText(searchPhrase, l))) {
|
|
||||||
item.classList.add('vm-found');
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
item.classList.remove('vm-found');
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -485,12 +485,6 @@ func templateFuncs() textTpl.FuncMap {
|
|||||||
|
|
||||||
/* Helpers */
|
/* Helpers */
|
||||||
|
|
||||||
// now returns the Unix timestamp in seconds at the time of the template evaluation.
|
|
||||||
// For example: {{ (now | toTime).Sub $activeAt }} will return the duration the alert has been active.
|
|
||||||
"now": func() float64 {
|
|
||||||
return float64(time.Now().Unix())
|
|
||||||
},
|
|
||||||
|
|
||||||
// Converts a list of objects to a map with keys arg0, arg1 etc.
|
// Converts a list of objects to a map with keys arg0, arg1 etc.
|
||||||
// This is intended to allow multiple arguments to be passed to templates.
|
// This is intended to allow multiple arguments to be passed to templates.
|
||||||
"args": func(args ...any) map[string]any {
|
"args": func(args ...any) map[string]any {
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ func (eg *ErrGroup) Error() string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
var b strings.Builder
|
var b strings.Builder
|
||||||
fmt.Fprintf(&b, "errors(%d): \n", len(eg.errs))
|
fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
|
||||||
for i, err := range eg.errs {
|
for i, err := range eg.errs {
|
||||||
b.WriteString(err.Error())
|
b.WriteString(err.Error())
|
||||||
if i != len(eg.errs)-1 {
|
if i != len(eg.errs)-1 {
|
||||||
|
|||||||
@@ -30,8 +30,8 @@ func TestErrGroup(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
f(nil, "")
|
f(nil, "")
|
||||||
f([]error{errors.New("timeout")}, "errors(1): \ntimeout")
|
f([]error{errors.New("timeout")}, "errors(1): timeout")
|
||||||
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): \ntimeout\ndeadline")
|
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): timeout\ndeadline")
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestErrGroupConcurrent supposed to test concurrent
|
// TestErrGroupConcurrent supposed to test concurrent
|
||||||
@@ -42,7 +42,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
|
|||||||
|
|
||||||
const writersN = 4
|
const writersN = 4
|
||||||
payload := make(chan error, writersN)
|
payload := make(chan error, writersN)
|
||||||
for range writersN {
|
for i := 0; i < writersN; i++ {
|
||||||
go func() {
|
go func() {
|
||||||
for err := range payload {
|
for err := range payload {
|
||||||
eg.Add(err)
|
eg.Add(err)
|
||||||
@@ -51,7 +51,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const iterations = 500
|
const iterations = 500
|
||||||
for i := range iterations {
|
for i := 0; i < iterations; i++ {
|
||||||
payload <- fmt.Errorf("error %d", i)
|
payload <- fmt.Errorf("error %d", i)
|
||||||
if i%10 == 0 {
|
if i%10 == 0 {
|
||||||
_ = eg.Err()
|
_ = eg.Err()
|
||||||
|
|||||||
@@ -29,9 +29,7 @@ var (
|
|||||||
{"api/v1/rules", "list all loaded groups and rules"},
|
{"api/v1/rules", "list all loaded groups and rules"},
|
||||||
{"api/v1/alerts", "list all active alerts"},
|
{"api/v1/alerts", "list all active alerts"},
|
||||||
{"api/v1/notifiers", "list all notifiers"},
|
{"api/v1/notifiers", "list all notifiers"},
|
||||||
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamAlertID), "get alert status by group and alert ID"},
|
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", paramGroupID, paramAlertID), "get alert status by group and alert ID"},
|
||||||
{fmt.Sprintf("api/v1/rule?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamRuleID), "get rule status by group and rule ID"},
|
|
||||||
{fmt.Sprintf("api/v1/group?%s=<int>", rule.ParamGroupID), "get group status by group ID"},
|
|
||||||
}
|
}
|
||||||
systemLinks = [][2]string{
|
systemLinks = [][2]string{
|
||||||
{"vmalert/groups", "UI"},
|
{"vmalert/groups", "UI"},
|
||||||
@@ -47,8 +45,8 @@ var (
|
|||||||
{Name: "Docs", URL: "https://docs.victoriametrics.com/victoriametrics/vmalert/"},
|
{Name: "Docs", URL: "https://docs.victoriametrics.com/victoriametrics/vmalert/"},
|
||||||
}
|
}
|
||||||
ruleTypeMap = map[string]string{
|
ruleTypeMap = map[string]string{
|
||||||
"alert": rule.TypeAlerting,
|
"alert": ruleTypeAlerting,
|
||||||
"record": rule.TypeRecording,
|
"record": ruleTypeRecording,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -114,7 +112,7 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
case "/rules":
|
case "/rules":
|
||||||
// Grafana makes an extra request to `/rules`
|
// Grafana makes an extra request to `/rules`
|
||||||
// handler in addition to `/api/v1/rules` calls in alerts UI
|
// handler in addition to `/api/v1/rules` calls in alerts UI
|
||||||
var data []*rule.ApiGroup
|
var data []*apiGroup
|
||||||
rf, err := newRulesFilter(r)
|
rf, err := newRulesFilter(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
httpserver.Errorf(w, r, "%s", err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
@@ -180,14 +178,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
w.Write(data)
|
w.Write(data)
|
||||||
return true
|
return true
|
||||||
case "/vmalert/api/v1/rule", "/api/v1/rule":
|
case "/vmalert/api/v1/rule", "/api/v1/rule":
|
||||||
apiRule, err := rh.getRule(r)
|
rule, err := rh.getRule(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
httpserver.Errorf(w, r, "%s", err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
rwu := rule.ApiRuleWithUpdates{
|
rwu := apiRuleWithUpdates{
|
||||||
ApiRule: apiRule,
|
apiRule: rule,
|
||||||
StateUpdates: apiRule.Updates,
|
StateUpdates: rule.Updates,
|
||||||
}
|
}
|
||||||
data, err := json.Marshal(rwu)
|
data, err := json.Marshal(rwu)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -197,20 +195,6 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
w.Write(data)
|
w.Write(data)
|
||||||
return true
|
return true
|
||||||
case "/vmalert/api/v1/group", "/api/v1/group":
|
|
||||||
group, err := rh.getGroup(r)
|
|
||||||
if err != nil {
|
|
||||||
httpserver.Errorf(w, r, "%s", err)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
data, err := json.Marshal(group)
|
|
||||||
if err != nil {
|
|
||||||
httpserver.Errorf(w, r, "failed to marshal group: %s", err)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.Write(data)
|
|
||||||
return true
|
|
||||||
case "/-/reload":
|
case "/-/reload":
|
||||||
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
|
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
|
||||||
return true
|
return true
|
||||||
@@ -225,42 +209,30 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, error) {
|
func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
|
||||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
|
||||||
}
|
}
|
||||||
obj, err := rh.m.groupAPI(groupID)
|
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errResponse(err, http.StatusNotFound)
|
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramRuleID, err)
|
||||||
}
|
|
||||||
return obj, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, error) {
|
|
||||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
|
||||||
}
|
|
||||||
ruleID, err := strconv.ParseUint(r.FormValue(rule.ParamRuleID), 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err)
|
|
||||||
}
|
}
|
||||||
obj, err := rh.m.ruleAPI(groupID, ruleID)
|
obj, err := rh.m.ruleAPI(groupID, ruleID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return rule.ApiRule{}, errResponse(err, http.StatusNotFound)
|
return apiRule{}, errResponse(err, http.StatusNotFound)
|
||||||
}
|
}
|
||||||
return obj, nil
|
return obj, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
|
func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
|
||||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
return nil, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
|
||||||
}
|
}
|
||||||
alertID, err := strconv.ParseUint(r.FormValue(rule.ParamAlertID), 10, 64)
|
alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err)
|
return nil, fmt.Errorf("failed to read %q param: %w", paramAlertID, err)
|
||||||
}
|
}
|
||||||
a, err := rh.m.alertAPI(groupID, alertID)
|
a, err := rh.m.alertAPI(groupID, alertID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -272,7 +244,7 @@ func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
|
|||||||
type listGroupsResponse struct {
|
type listGroupsResponse struct {
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
Data struct {
|
Data struct {
|
||||||
Groups []*rule.ApiGroup `json:"groups"`
|
Groups []*apiGroup `json:"groups"`
|
||||||
} `json:"data"`
|
} `json:"data"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -338,19 +310,19 @@ func (rf *rulesFilter) matchesGroup(group *rule.Group) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
|
func (rh *requestHandler) groups(rf *rulesFilter) []*apiGroup {
|
||||||
rh.m.groupsMu.RLock()
|
rh.m.groupsMu.RLock()
|
||||||
defer rh.m.groupsMu.RUnlock()
|
defer rh.m.groupsMu.RUnlock()
|
||||||
|
|
||||||
groups := make([]*rule.ApiGroup, 0)
|
groups := make([]*apiGroup, 0)
|
||||||
for _, group := range rh.m.groups {
|
for _, group := range rh.m.groups {
|
||||||
if !rf.matchesGroup(group) {
|
if !rf.matchesGroup(group) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
g := group.ToAPI()
|
g := groupToAPI(group)
|
||||||
// the returned list should always be non-nil
|
// the returned list should always be non-nil
|
||||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
|
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
|
||||||
filteredRules := make([]rule.ApiRule, 0)
|
filteredRules := make([]apiRule, 0)
|
||||||
for _, rule := range g.Rules {
|
for _, rule := range g.Rules {
|
||||||
if rf.ruleType != "" && rf.ruleType != rule.Type {
|
if rf.ruleType != "" && rf.ruleType != rule.Type {
|
||||||
continue
|
continue
|
||||||
@@ -378,7 +350,7 @@ func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
|
|||||||
groups = append(groups, g)
|
groups = append(groups, g)
|
||||||
}
|
}
|
||||||
// sort list of groups for deterministic output
|
// sort list of groups for deterministic output
|
||||||
slices.SortFunc(groups, func(a, b *rule.ApiGroup) int {
|
slices.SortFunc(groups, func(a, b *apiGroup) int {
|
||||||
if a.Name != b.Name {
|
if a.Name != b.Name {
|
||||||
return strings.Compare(a.Name, b.Name)
|
return strings.Compare(a.Name, b.Name)
|
||||||
}
|
}
|
||||||
@@ -403,32 +375,32 @@ func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, error) {
|
|||||||
type listAlertsResponse struct {
|
type listAlertsResponse struct {
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
Data struct {
|
Data struct {
|
||||||
Alerts []*rule.ApiAlert `json:"alerts"`
|
Alerts []*apiAlert `json:"alerts"`
|
||||||
} `json:"data"`
|
} `json:"data"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) groupAlerts() []rule.GroupAlerts {
|
func (rh *requestHandler) groupAlerts() []groupAlerts {
|
||||||
rh.m.groupsMu.RLock()
|
rh.m.groupsMu.RLock()
|
||||||
defer rh.m.groupsMu.RUnlock()
|
defer rh.m.groupsMu.RUnlock()
|
||||||
|
|
||||||
var gAlerts []rule.GroupAlerts
|
var gAlerts []groupAlerts
|
||||||
for _, group := range rh.m.groups {
|
for _, g := range rh.m.groups {
|
||||||
var alerts []*rule.ApiAlert
|
var alerts []*apiAlert
|
||||||
g := group.ToAPI()
|
|
||||||
for _, r := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
if r.Type != rule.TypeAlerting {
|
a, ok := r.(*rule.AlertingRule)
|
||||||
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
alerts = append(alerts, r.Alerts...)
|
alerts = append(alerts, ruleToAPIAlert(a)...)
|
||||||
}
|
}
|
||||||
if len(alerts) > 0 {
|
if len(alerts) > 0 {
|
||||||
gAlerts = append(gAlerts, rule.GroupAlerts{
|
gAlerts = append(gAlerts, groupAlerts{
|
||||||
Group: g,
|
Group: groupToAPI(g),
|
||||||
Alerts: alerts,
|
Alerts: alerts,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
slices.SortFunc(gAlerts, func(a, b rule.GroupAlerts) int {
|
slices.SortFunc(gAlerts, func(a, b groupAlerts) int {
|
||||||
return strings.Compare(a.Group.Name, b.Group.Name)
|
return strings.Compare(a.Group.Name, b.Group.Name)
|
||||||
})
|
})
|
||||||
return gAlerts
|
return gAlerts
|
||||||
@@ -439,22 +411,22 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
|
|||||||
defer rh.m.groupsMu.RUnlock()
|
defer rh.m.groupsMu.RUnlock()
|
||||||
|
|
||||||
lr := listAlertsResponse{Status: "success"}
|
lr := listAlertsResponse{Status: "success"}
|
||||||
lr.Data.Alerts = make([]*rule.ApiAlert, 0)
|
lr.Data.Alerts = make([]*apiAlert, 0)
|
||||||
for _, group := range rh.m.groups {
|
for _, group := range rh.m.groups {
|
||||||
if !rf.matchesGroup(group) {
|
if !rf.matchesGroup(group) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
g := group.ToAPI()
|
for _, r := range group.Rules {
|
||||||
for _, r := range g.Rules {
|
a, ok := r.(*rule.AlertingRule)
|
||||||
if r.Type != rule.TypeAlerting {
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
lr.Data.Alerts = append(lr.Data.Alerts, r.Alerts...)
|
lr.Data.Alerts = append(lr.Data.Alerts, ruleToAPIAlert(a)...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort list of alerts for deterministic output
|
// sort list of alerts for deterministic output
|
||||||
slices.SortFunc(lr.Data.Alerts, func(a, b *rule.ApiAlert) int {
|
slices.SortFunc(lr.Data.Alerts, func(a, b *apiAlert) int {
|
||||||
return strings.Compare(a.ID, b.ID)
|
return strings.Compare(a.ID, b.ID)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -471,7 +443,7 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
|
|||||||
type listNotifiersResponse struct {
|
type listNotifiersResponse struct {
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
Data struct {
|
Data struct {
|
||||||
Notifiers []*notifier.ApiNotifier `json:"notifiers"`
|
Notifiers []*apiNotifier `json:"notifiers"`
|
||||||
} `json:"data"`
|
} `json:"data"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -479,20 +451,19 @@ func (rh *requestHandler) listNotifiers() ([]byte, error) {
|
|||||||
targets := notifier.GetTargets()
|
targets := notifier.GetTargets()
|
||||||
|
|
||||||
lr := listNotifiersResponse{Status: "success"}
|
lr := listNotifiersResponse{Status: "success"}
|
||||||
lr.Data.Notifiers = make([]*notifier.ApiNotifier, 0)
|
lr.Data.Notifiers = make([]*apiNotifier, 0)
|
||||||
for protoName, protoTargets := range targets {
|
for protoName, protoTargets := range targets {
|
||||||
nr := ¬ifier.ApiNotifier{
|
notifier := &apiNotifier{
|
||||||
Kind: protoName,
|
Kind: string(protoName),
|
||||||
Targets: make([]*notifier.ApiTarget, 0, len(protoTargets)),
|
Targets: make([]*apiTarget, 0, len(protoTargets)),
|
||||||
}
|
}
|
||||||
for _, target := range protoTargets {
|
for _, target := range protoTargets {
|
||||||
nr.Targets = append(nr.Targets, ¬ifier.ApiTarget{
|
notifier.Targets = append(notifier.Targets, &apiTarget{
|
||||||
Address: target.Addr(),
|
Address: target.Addr(),
|
||||||
Labels: target.Labels.ToMap(),
|
Labels: target.Labels.ToMap(),
|
||||||
LastError: target.LastError(),
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
lr.Data.Notifiers = append(lr.Data.Notifiers, nr)
|
lr.Data.Notifiers = append(lr.Data.Notifiers, notifier)
|
||||||
}
|
}
|
||||||
|
|
||||||
b, err := json.Marshal(lr)
|
b, err := json.Marshal(lr)
|
||||||
|
|||||||
@@ -8,8 +8,6 @@
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
|
||||||
) %}
|
) %}
|
||||||
|
|
||||||
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
|
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
|
||||||
@@ -79,8 +77,6 @@
|
|||||||
{% func Welcome(r *http.Request) %}
|
{% func Welcome(r *http.Request) %}
|
||||||
{%= tpl.Header(r, navItems, "vmalert", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "vmalert", getLastConfigError()) %}
|
||||||
<p>
|
<p>
|
||||||
Version {%s buildinfo.Version %} <br>
|
|
||||||
|
|
||||||
API:<br>
|
API:<br>
|
||||||
{% for _, p := range apiLinks %}
|
{% for _, p := range apiLinks %}
|
||||||
{%code p, doc := p[0], p[1] %}
|
{%code p, doc := p[0], p[1] %}
|
||||||
@@ -97,7 +93,7 @@
|
|||||||
{%= tpl.Footer(r) %}
|
{%= tpl.Footer(r) %}
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, filter string) %}
|
{% func ListGroups(r *http.Request, groups []*apiGroup, filter string) %}
|
||||||
{%code
|
{%code
|
||||||
prefix := vmalertutil.Prefix(r.URL.Path)
|
prefix := vmalertutil.Prefix(r.URL.Path)
|
||||||
filters := map[string]string{
|
filters := map[string]string{
|
||||||
@@ -117,17 +113,14 @@
|
|||||||
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
|
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
|
||||||
{% if len(groups) > 0 %}
|
{% if len(groups) > 0 %}
|
||||||
{% for _, g := range groups %}
|
{% for _, g := range groups %}
|
||||||
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
|
<div id="group-{%s g.ID %}" class="d-flex w-100 border-0 flex-column group-items{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
|
||||||
<span class="d-flex justify-content-between">
|
<span class="d-flex justify-content-between">
|
||||||
<a
|
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
|
||||||
class="vm-group-search"
|
|
||||||
href="#group-{%s g.ID %}"
|
|
||||||
>{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
|
|
||||||
<span
|
<span
|
||||||
class="flex-grow-1 d-flex justify-content-end"
|
class="flex-grow-1 d-flex justify-content-end"
|
||||||
role="button"
|
role="button"
|
||||||
data-bs-toggle="collapse"
|
data-bs-toggle="collapse"
|
||||||
data-bs-target="#item-{%s g.ID %}"
|
data-bs-target="#sub-{%s g.ID %}"
|
||||||
>
|
>
|
||||||
<span class="d-flex gap-2">
|
<span class="d-flex gap-2">
|
||||||
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
|
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
|
||||||
@@ -140,9 +133,9 @@
|
|||||||
class="d-flex flex-column row-gap-2 mb-2"
|
class="d-flex flex-column row-gap-2 mb-2"
|
||||||
role="button"
|
role="button"
|
||||||
data-bs-toggle="collapse"
|
data-bs-toggle="collapse"
|
||||||
data-bs-target="#item-{%s g.ID %}"
|
data-bs-target="#sub-{%s g.ID %}"
|
||||||
>
|
>
|
||||||
<span class="fs-6 text-start vm-group-search w-100 fw-lighter">{%s g.File %}</span>
|
<span class="fs-6 text-start w-100 fw-lighter">{%s g.File %}</span>
|
||||||
{% if len(g.Params) > 0 %}
|
{% if len(g.Params) > 0 %}
|
||||||
<span class="fs-6 text-start w-100 d-flex justify-content-between fw-lighter">
|
<span class="fs-6 text-start w-100 d-flex justify-content-between fw-lighter">
|
||||||
<span>Extra params</span>
|
<span>Extra params</span>
|
||||||
@@ -164,7 +157,7 @@
|
|||||||
</span>
|
</span>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</span>
|
</span>
|
||||||
<div class="collapse" id="item-{%s g.ID %}">
|
<div class="collapse sub-items" id="sub-{%s g.ID %}">
|
||||||
<table class="table table-striped table-hover table-sm">
|
<table class="table table-striped table-hover table-sm">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
@@ -175,7 +168,7 @@
|
|||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for _, r := range g.Rules %}
|
{% for _, r := range g.Rules %}
|
||||||
<tr class="vm-item{% if r.LastError != "" %} alert-danger{% endif %}">
|
<tr class="sub-item{% if r.LastError != "" %} alert-danger{% endif %}">
|
||||||
<td>
|
<td>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col-12 mb-2">
|
<div class="col-12 mb-2">
|
||||||
@@ -212,12 +205,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
<td class="text-center">{%d r.LastSamples %}</td>
|
<td class="text-center">{%d r.LastSamples %}</td>
|
||||||
<td class="text-center">{% if r.LastEvaluation.IsZero() %}
|
<td class="text-center">{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago</td>
|
||||||
Never
|
|
||||||
{% else %}
|
|
||||||
{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago
|
|
||||||
{% endif %}
|
|
||||||
</td>
|
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
@@ -234,7 +222,7 @@
|
|||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
|
|
||||||
{% func ListAlerts(r *http.Request, groupAlerts []rule.GroupAlerts) %}
|
{% func ListAlerts(r *http.Request, groupAlerts []groupAlerts) %}
|
||||||
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
||||||
{%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %}
|
||||||
{%= Controls(prefix, "", "", nil, nil, true) %}
|
{%= Controls(prefix, "", "", nil, nil, true) %}
|
||||||
@@ -243,7 +231,7 @@
|
|||||||
{%code
|
{%code
|
||||||
g := ga.Group
|
g := ga.Group
|
||||||
var keys []string
|
var keys []string
|
||||||
alertsByRule := make(map[string][]*rule.ApiAlert)
|
alertsByRule := make(map[string][]*apiAlert)
|
||||||
for _, alert := range ga.Alerts {
|
for _, alert := range ga.Alerts {
|
||||||
if len(alertsByRule[alert.RuleID]) < 1 {
|
if len(alertsByRule[alert.RuleID]) < 1 {
|
||||||
keys = append(keys, alert.RuleID)
|
keys = append(keys, alert.RuleID)
|
||||||
@@ -252,14 +240,14 @@
|
|||||||
}
|
}
|
||||||
sort.Strings(keys)
|
sort.Strings(keys)
|
||||||
%}
|
%}
|
||||||
<div class="w-100 flex-column vm-group alert-danger">
|
<div class="d-flex w-100 flex-column group-items alert-danger">
|
||||||
<span id="group-{%s g.ID %}" class="d-flex justify-content-between">
|
<span id="group-{%s g.ID %}" class="d-flex justify-content-between">
|
||||||
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %}</a>
|
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %}</a>
|
||||||
<span
|
<span
|
||||||
class="flex-grow-1 d-flex justify-content-end"
|
class="flex-grow-1 d-flex justify-content-end"
|
||||||
role="button"
|
role="button"
|
||||||
data-bs-toggle="collapse"
|
data-bs-toggle="collapse"
|
||||||
data-bs-target="#item-{%s g.ID %}"
|
data-bs-target="#sub-{%s g.ID %}"
|
||||||
>
|
>
|
||||||
<span class="badge bg-danger" title="Number of active alerts">{%d len(ga.Alerts) %}</span>
|
<span class="badge bg-danger" title="Number of active alerts">{%d len(ga.Alerts) %}</span>
|
||||||
</span>
|
</span>
|
||||||
@@ -269,10 +257,10 @@
|
|||||||
class="fs-6 text-start w-100 fw-lighter"
|
class="fs-6 text-start w-100 fw-lighter"
|
||||||
role="button"
|
role="button"
|
||||||
data-bs-toggle="collapse"
|
data-bs-toggle="collapse"
|
||||||
data-bs-target="#item-{%s g.ID %}"
|
data-bs-target="#sub-{%s g.ID %}"
|
||||||
>{%s g.File %}</span>
|
>{%s g.File %}</span>
|
||||||
</span>
|
</span>
|
||||||
<div class="collapse" id="item-{%s g.ID %}">
|
<div class="collapse sub-items" id="sub-{%s g.ID %}">
|
||||||
{% for _, ruleID := range keys %}
|
{% for _, ruleID := range keys %}
|
||||||
{%code
|
{%code
|
||||||
defaultAR := alertsByRule[ruleID][0]
|
defaultAR := alertsByRule[ruleID][0]
|
||||||
@@ -283,7 +271,7 @@
|
|||||||
sort.Strings(labelKeys)
|
sort.Strings(labelKeys)
|
||||||
%}
|
%}
|
||||||
<br>
|
<br>
|
||||||
<div class="vm-item">
|
<div class="sub-item">
|
||||||
<b>alert:</b> {%s defaultAR.Name %} ({%d len(alertsByRule[ruleID]) %})
|
<b>alert:</b> {%s defaultAR.Name %} ({%d len(alertsByRule[ruleID]) %})
|
||||||
| <span><a target="_blank" href="{%s defaultAR.SourceLink %}">Source</a></span>
|
| <span><a target="_blank" href="{%s defaultAR.SourceLink %}">Source</a></span>
|
||||||
<br>
|
<br>
|
||||||
@@ -348,20 +336,20 @@
|
|||||||
typeK, ns := keys[i], targets[notifier.TargetType(keys[i])]
|
typeK, ns := keys[i], targets[notifier.TargetType(keys[i])]
|
||||||
count := len(ns)
|
count := len(ns)
|
||||||
%}
|
%}
|
||||||
<div class="w-100 flex-column vm-group">
|
<div class="d-flex w-100 flex-column group-items">
|
||||||
<span class="d-flex justify-content-between" id="group-{%s typeK %}">
|
<span class="d-flex justify-content-between" id="group-{%s typeK %}">
|
||||||
<a href="#group-{%s typeK %}">{%s typeK %} ({%d count %})</a>
|
<a href="#group-{%s typeK %}">{%s typeK %} ({%d count %})</a>
|
||||||
<span
|
<span
|
||||||
class="flex-grow-1"
|
class="flex-grow-1"
|
||||||
role="button"
|
role="button"
|
||||||
data-bs-toggle="collapse"
|
data-bs-toggle="collapse"
|
||||||
data-bs-target="#item-{%s typeK %}"
|
data-bs-target="#sub-{%s typeK %}"
|
||||||
></span>
|
></span>
|
||||||
</span>
|
</span>
|
||||||
<div id="item-{%s typeK %}" class="collapse show">
|
<div id="sub-{%s typeK %}" class="collapse show sub-items">
|
||||||
<table class="table table-striped table-hover table-sm">
|
<table class="table table-striped table-hover table-sm">
|
||||||
<thead>
|
<thead>
|
||||||
<tr class="vm-item">
|
<tr class="sub-item">
|
||||||
<th scope="col">Labels</th>
|
<th scope="col">Labels</th>
|
||||||
<th scope="col">Address</th>
|
<th scope="col">Address</th>
|
||||||
</tr>
|
</tr>
|
||||||
@@ -390,7 +378,7 @@
|
|||||||
{%= tpl.Footer(r) %}
|
{%= tpl.Footer(r) %}
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
{% func Alert(r *http.Request, alert *rule.ApiAlert) %}
|
{% func Alert(r *http.Request, alert *apiAlert) %}
|
||||||
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
||||||
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
||||||
{%code
|
{%code
|
||||||
@@ -446,7 +434,7 @@
|
|||||||
<div class="col">
|
<div class="col">
|
||||||
{% for _, k := range annotationKeys %}
|
{% for _, k := range annotationKeys %}
|
||||||
<b>{%s k %}:</b><br>
|
<b>{%s k %}:</b><br>
|
||||||
<p class="annotations">{%s alert.Annotations[k] %}</p>
|
<p>{%s alert.Annotations[k] %}</p>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -476,7 +464,7 @@
|
|||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
|
|
||||||
{% func RuleDetails(r *http.Request, rule rule.ApiRule) %}
|
{% func RuleDetails(r *http.Request, rule apiRule) %}
|
||||||
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
||||||
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
||||||
{%code
|
{%code
|
||||||
@@ -560,7 +548,7 @@
|
|||||||
<div class="col">
|
<div class="col">
|
||||||
{% for _, k := range annotationKeys %}
|
{% for _, k := range annotationKeys %}
|
||||||
<b>{%s k %}:</b><br>
|
<b>{%s k %}:</b><br>
|
||||||
<p class="annotations">{%s rule.Annotations[k] %}</p>
|
<p>{%s rule.Annotations[k] %}</p>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -605,11 +593,11 @@
|
|||||||
<table class="table table-striped table-hover table-sm">
|
<table class="table table-striped table-hover table-sm">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
<th scope="col" title="The time when the rule was executed">Updated at</th>
|
<th scope="col" title="The time when event was created">Updated at</th>
|
||||||
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
||||||
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
|
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
|
||||||
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
|
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
|
||||||
<th scope="col" class="text-center" title="The time used in execution query request">Execution timestamp</th>
|
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
|
||||||
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
@@ -661,7 +649,7 @@
|
|||||||
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
|
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
{% func seriesFetchedWarn(prefix string, r rule.ApiRule) %}
|
{% func seriesFetchedWarn(prefix string, r apiRule) %}
|
||||||
{% if isNoMatch(r) %}
|
{% if isNoMatch(r) %}
|
||||||
<svg
|
<svg
|
||||||
data-bs-toggle="tooltip"
|
data-bs-toggle="tooltip"
|
||||||
@@ -675,7 +663,7 @@
|
|||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
{%code
|
{%code
|
||||||
func isNoMatch (r rule.ApiRule) bool {
|
func isNoMatch (r apiRule) bool {
|
||||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||||
}
|
}
|
||||||
%}
|
%}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -23,12 +23,8 @@ func TestHandler(t *testing.T) {
|
|||||||
Timestamps: []int64{0},
|
Timestamps: []int64{0},
|
||||||
})
|
})
|
||||||
m := &manager{groups: map[uint64]*rule.Group{}}
|
m := &manager{groups: map[uint64]*rule.Group{}}
|
||||||
_, cleanup := notifier.InitFakeNotifier()
|
|
||||||
defer cleanup()
|
|
||||||
|
|
||||||
var ar *rule.AlertingRule
|
var ar *rule.AlertingRule
|
||||||
var rr *rule.RecordingRule
|
var rr *rule.RecordingRule
|
||||||
var groupIDs []uint64
|
|
||||||
for _, dsType := range []string{"prometheus", "", "graphite"} {
|
for _, dsType := range []string{"prometheus", "", "graphite"} {
|
||||||
g := rule.NewGroup(config.Group{
|
g := rule.NewGroup(config.Group{
|
||||||
Name: "group",
|
Name: "group",
|
||||||
@@ -48,10 +44,8 @@ func TestHandler(t *testing.T) {
|
|||||||
}, fq, 1*time.Minute, nil)
|
}, fq, 1*time.Minute, nil)
|
||||||
ar = g.Rules[0].(*rule.AlertingRule)
|
ar = g.Rules[0].(*rule.AlertingRule)
|
||||||
rr = g.Rules[1].(*rule.RecordingRule)
|
rr = g.Rules[1].(*rule.RecordingRule)
|
||||||
g.ExecOnce(context.Background(), nil, time.Time{})
|
g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, nil, time.Time{})
|
||||||
id := g.CreateID()
|
m.groups[g.CreateID()] = g
|
||||||
m.groups[id] = g
|
|
||||||
groupIDs = append(groupIDs, id)
|
|
||||||
}
|
}
|
||||||
rh := &requestHandler{m: m}
|
rh := &requestHandler{m: m}
|
||||||
|
|
||||||
@@ -88,22 +82,22 @@ func TestHandler(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("/vmalert/rule", func(t *testing.T) {
|
t.Run("/vmalert/rule", func(t *testing.T) {
|
||||||
a := ar.ToAPI()
|
a := ruleToAPI(ar)
|
||||||
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
||||||
r := rr.ToAPI()
|
r := ruleToAPI(rr)
|
||||||
getResp(t, ts.URL+"/vmalert/"+r.WebLink(), nil, 200)
|
getResp(t, ts.URL+"/vmalert/"+r.WebLink(), nil, 200)
|
||||||
})
|
})
|
||||||
t.Run("/vmalert/alert", func(t *testing.T) {
|
t.Run("/vmalert/alert", func(t *testing.T) {
|
||||||
alerts := ar.AlertsToAPI()
|
alerts := ruleToAPIAlert(ar)
|
||||||
for _, a := range alerts {
|
for _, a := range alerts {
|
||||||
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
t.Run("/vmalert/rule?badParam", func(t *testing.T) {
|
t.Run("/vmalert/rule?badParam", func(t *testing.T) {
|
||||||
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamRuleID)
|
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramRuleID)
|
||||||
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
|
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
|
||||||
|
|
||||||
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamRuleID)
|
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramRuleID)
|
||||||
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
|
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -130,14 +124,14 @@ func TestHandler(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) {
|
t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) {
|
||||||
expAlert := rule.NewAlertAPI(ar, ar.GetAlerts()[0])
|
expAlert := newAlertAPI(ar, ar.GetAlerts()[0])
|
||||||
alert := &rule.ApiAlert{}
|
alert := &apiAlert{}
|
||||||
getResp(t, ts.URL+"/"+expAlert.APILink(), alert, 200)
|
getResp(t, ts.URL+"/"+expAlert.APILink(), alert, 200)
|
||||||
if !reflect.DeepEqual(alert, expAlert) {
|
if !reflect.DeepEqual(alert, expAlert) {
|
||||||
t.Fatalf("expected %v is equal to %v", alert, expAlert)
|
t.Fatalf("expected %v is equal to %v", alert, expAlert)
|
||||||
}
|
}
|
||||||
|
|
||||||
alert = &rule.ApiAlert{}
|
alert = &apiAlert{}
|
||||||
getResp(t, ts.URL+"/vmalert/"+expAlert.APILink(), alert, 200)
|
getResp(t, ts.URL+"/vmalert/"+expAlert.APILink(), alert, 200)
|
||||||
if !reflect.DeepEqual(alert, expAlert) {
|
if !reflect.DeepEqual(alert, expAlert) {
|
||||||
t.Fatalf("expected %v is equal to %v", alert, expAlert)
|
t.Fatalf("expected %v is equal to %v", alert, expAlert)
|
||||||
@@ -145,16 +139,16 @@ func TestHandler(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("/api/v1/alert?badParams", func(t *testing.T) {
|
t.Run("/api/v1/alert?badParams", func(t *testing.T) {
|
||||||
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamAlertID)
|
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramAlertID)
|
||||||
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
|
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
|
||||||
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
|
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
|
||||||
|
|
||||||
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamAlertID)
|
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramAlertID)
|
||||||
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
|
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
|
||||||
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
|
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
|
||||||
|
|
||||||
// bad request, alertID is missing
|
// bad request, alertID is missing
|
||||||
params = fmt.Sprintf("?%s=1", rule.ParamGroupID)
|
params = fmt.Sprintf("?%s=1", paramGroupID)
|
||||||
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 400)
|
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 400)
|
||||||
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 400)
|
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 400)
|
||||||
})
|
})
|
||||||
@@ -173,42 +167,27 @@ func TestHandler(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
|
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
|
||||||
expRule := ar.ToAPI()
|
expRule := ruleToAPI(ar)
|
||||||
gotRule := rule.ApiRule{}
|
gotRule := apiRule{}
|
||||||
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRule, 200)
|
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRule, 200)
|
||||||
|
|
||||||
if expRule.ID != gotRule.ID {
|
if expRule.ID != gotRule.ID {
|
||||||
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
gotRule = rule.ApiRule{}
|
gotRule = apiRule{}
|
||||||
getResp(t, ts.URL+"/vmalert/"+expRule.APILink(), &gotRule, 200)
|
getResp(t, ts.URL+"/vmalert/"+expRule.APILink(), &gotRule, 200)
|
||||||
|
|
||||||
if expRule.ID != gotRule.ID {
|
if expRule.ID != gotRule.ID {
|
||||||
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
gotRuleWithUpdates := rule.ApiRuleWithUpdates{}
|
gotRuleWithUpdates := apiRuleWithUpdates{}
|
||||||
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRuleWithUpdates, 200)
|
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRuleWithUpdates, 200)
|
||||||
if len(gotRuleWithUpdates.StateUpdates) < 1 {
|
if len(gotRuleWithUpdates.StateUpdates) < 1 {
|
||||||
t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates)
|
t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
t.Run("/api/v1/group?groupID", func(t *testing.T) {
|
|
||||||
id := groupIDs[0]
|
|
||||||
g := m.groups[id]
|
|
||||||
expGroup := g.ToAPI()
|
|
||||||
gotGroup := rule.ApiGroup{}
|
|
||||||
getResp(t, ts.URL+"/"+expGroup.APILink(), &gotGroup, 200)
|
|
||||||
if expGroup.ID != gotGroup.ID {
|
|
||||||
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
|
|
||||||
}
|
|
||||||
gotGroup = rule.ApiGroup{}
|
|
||||||
getResp(t, ts.URL+"/vmalert/"+expGroup.APILink(), &gotGroup, 200)
|
|
||||||
if expGroup.ID != gotGroup.ID {
|
|
||||||
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("/api/v1/rules&filters", func(t *testing.T) {
|
t.Run("/api/v1/rules&filters", func(t *testing.T) {
|
||||||
check := func(url string, statusCode, expGroups, expRules int) {
|
check := func(url string, statusCode, expGroups, expRules int) {
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
package rule
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -8,28 +8,79 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// ParamGroupID is group id key in url parameter
|
// ParamGroupID is group id key in url parameter
|
||||||
ParamGroupID = "group_id"
|
paramGroupID = "group_id"
|
||||||
// ParamAlertID is alert id key in url parameter
|
// ParamAlertID is alert id key in url parameter
|
||||||
ParamAlertID = "alert_id"
|
paramAlertID = "alert_id"
|
||||||
// ParamRuleID is rule id key in url parameter
|
// ParamRuleID is rule id key in url parameter
|
||||||
ParamRuleID = "rule_id"
|
paramRuleID = "rule_id"
|
||||||
|
|
||||||
// TypeRecording is a RecordingRule type
|
|
||||||
TypeRecording = "recording"
|
|
||||||
// TypeAlerting is an AlertingRule type
|
|
||||||
TypeAlerting = "alerting"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// ApiGroup represents a Group for web view
|
type apiNotifier struct {
|
||||||
type ApiGroup struct {
|
Kind string `json:"kind"`
|
||||||
|
Targets []*apiTarget `json:"targets"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type apiTarget struct {
|
||||||
|
Address string `json:"address"`
|
||||||
|
Labels map[string]string `json:"labels"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// apiAlert represents a notifier.AlertingRule state
|
||||||
|
// for WEB view
|
||||||
|
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||||
|
type apiAlert struct {
|
||||||
|
State string `json:"state"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
Labels map[string]string `json:"labels,omitempty"`
|
||||||
|
Annotations map[string]string `json:"annotations"`
|
||||||
|
ActiveAt time.Time `json:"activeAt"`
|
||||||
|
|
||||||
|
// Additional fields
|
||||||
|
|
||||||
|
// ID is an unique Alert's ID within a group
|
||||||
|
ID string `json:"id"`
|
||||||
|
// RuleID is an unique Rule's ID within a group
|
||||||
|
RuleID string `json:"rule_id"`
|
||||||
|
// GroupID is an unique Group's ID
|
||||||
|
GroupID string `json:"group_id"`
|
||||||
|
// Expression contains the PromQL/MetricsQL expression
|
||||||
|
// for Rule's evaluation
|
||||||
|
Expression string `json:"expression"`
|
||||||
|
// SourceLink contains a link to a system which should show
|
||||||
|
// why Alert was generated
|
||||||
|
SourceLink string `json:"source"`
|
||||||
|
// Restored shows whether Alert's state was restored on restart
|
||||||
|
Restored bool `json:"restored"`
|
||||||
|
// Stabilizing shows when firing state is kept because of
|
||||||
|
// `keep_firing_for` instead of real alert
|
||||||
|
Stabilizing bool `json:"stabilizing"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// WebLink returns a link to the alert which can be used in UI.
|
||||||
|
func (aa *apiAlert) WebLink() string {
|
||||||
|
return fmt.Sprintf("alert?%s=%s&%s=%s",
|
||||||
|
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// APILink returns a link to the alert's JSON representation.
|
||||||
|
func (aa *apiAlert) APILink() string {
|
||||||
|
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
|
||||||
|
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// apiGroup represents Group for web view
|
||||||
|
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||||
|
type apiGroup struct {
|
||||||
// Name is the group name as present in the config
|
// Name is the group name as present in the config
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
// Rules contains both recording and alerting rules
|
// Rules contains both recording and alerting rules
|
||||||
Rules []ApiRule `json:"rules"`
|
Rules []apiRule `json:"rules"`
|
||||||
// Interval is the Group's evaluation interval in float seconds as present in the file.
|
// Interval is the Group's evaluation interval in float seconds as present in the file.
|
||||||
Interval float64 `json:"interval"`
|
Interval float64 `json:"interval"`
|
||||||
// LastEvaluation is the timestamp of the last time the Group was executed
|
// LastEvaluation is the timestamp of the last time the Group was executed
|
||||||
@@ -65,20 +116,15 @@ type ApiGroup struct {
|
|||||||
NoMatch int
|
NoMatch int
|
||||||
}
|
}
|
||||||
|
|
||||||
// APILink returns a link to the group's JSON representation.
|
// groupAlerts represents a group of alerts for WEB view
|
||||||
func (ag *ApiGroup) APILink() string {
|
type groupAlerts struct {
|
||||||
return fmt.Sprintf("api/v1/group?%s=%s", ParamGroupID, ag.ID)
|
Group *apiGroup
|
||||||
|
Alerts []*apiAlert
|
||||||
}
|
}
|
||||||
|
|
||||||
// GroupAlerts represents a Group with its Alerts for web view
|
// apiRule represents a Rule for web view
|
||||||
type GroupAlerts struct {
|
|
||||||
Group *ApiGroup
|
|
||||||
Alerts []*ApiAlert
|
|
||||||
}
|
|
||||||
|
|
||||||
// ApiRule represents a Rule for web view
|
|
||||||
// see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
// see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||||
type ApiRule struct {
|
type apiRule struct {
|
||||||
// State must be one of these under following scenarios
|
// State must be one of these under following scenarios
|
||||||
// "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState.
|
// "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState.
|
||||||
// "firing": at least 1 alert in the rule in firing state.
|
// "firing": at least 1 alert in the rule in firing state.
|
||||||
@@ -100,7 +146,7 @@ type ApiRule struct {
|
|||||||
// LastEvaluation is the timestamp of the last time the rule was executed
|
// LastEvaluation is the timestamp of the last time the rule was executed
|
||||||
LastEvaluation time.Time `json:"lastEvaluation"`
|
LastEvaluation time.Time `json:"lastEvaluation"`
|
||||||
// Alerts is the list of all the alerts in this rule that are currently pending or firing
|
// Alerts is the list of all the alerts in this rule that are currently pending or firing
|
||||||
Alerts []*ApiAlert `json:"alerts,omitempty"`
|
Alerts []*apiAlert `json:"alerts,omitempty"`
|
||||||
// Health is the health of rule evaluation.
|
// Health is the health of rule evaluation.
|
||||||
// It MUST be one of "ok", "err", "unknown"
|
// It MUST be one of "ok", "err", "unknown"
|
||||||
Health string `json:"health"`
|
Health string `json:"health"`
|
||||||
@@ -131,87 +177,143 @@ type ApiRule struct {
|
|||||||
// MaxUpdates is the max number of recorded ruleStateEntry objects
|
// MaxUpdates is the max number of recorded ruleStateEntry objects
|
||||||
MaxUpdates int `json:"max_updates_entries"`
|
MaxUpdates int `json:"max_updates_entries"`
|
||||||
// Updates contains the ordered list of recorded ruleStateEntry objects
|
// Updates contains the ordered list of recorded ruleStateEntry objects
|
||||||
Updates []StateEntry `json:"-"`
|
Updates []rule.StateEntry `json:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ApiAlert represents a notifier.AlertingRule state
|
// apiRuleWithUpdates represents apiRule but with extra fields for marshalling
|
||||||
// for WEB view
|
type apiRuleWithUpdates struct {
|
||||||
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
apiRule
|
||||||
type ApiAlert struct {
|
|
||||||
State string `json:"state"`
|
|
||||||
Name string `json:"name"`
|
|
||||||
Value string `json:"value"`
|
|
||||||
Labels map[string]string `json:"labels,omitempty"`
|
|
||||||
Annotations map[string]string `json:"annotations"`
|
|
||||||
ActiveAt time.Time `json:"activeAt"`
|
|
||||||
|
|
||||||
// Additional fields
|
|
||||||
|
|
||||||
// ID is an unique Alert's ID within a group
|
|
||||||
ID string `json:"id"`
|
|
||||||
// RuleID is an unique Rule's ID within a group
|
|
||||||
RuleID string `json:"rule_id"`
|
|
||||||
// GroupID is an unique Group's ID
|
|
||||||
GroupID string `json:"group_id"`
|
|
||||||
// Expression contains the PromQL/MetricsQL expression
|
|
||||||
// for Rule's evaluation
|
|
||||||
Expression string `json:"expression"`
|
|
||||||
// SourceLink contains a link to a system which should show
|
|
||||||
// why Alert was generated
|
|
||||||
SourceLink string `json:"source"`
|
|
||||||
// Restored shows whether Alert's state was restored on restart
|
|
||||||
Restored bool `json:"restored"`
|
|
||||||
// Stabilizing shows when firing state is kept because of
|
|
||||||
// `keep_firing_for` instead of real alert
|
|
||||||
Stabilizing bool `json:"stabilizing"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// WebLink returns a link to the alert which can be used in UI.
|
|
||||||
func (aa *ApiAlert) WebLink() string {
|
|
||||||
return fmt.Sprintf("alert?%s=%s&%s=%s",
|
|
||||||
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
|
|
||||||
}
|
|
||||||
|
|
||||||
// APILink returns a link to the alert's JSON representation.
|
|
||||||
func (aa *ApiAlert) APILink() string {
|
|
||||||
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
|
|
||||||
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ApiRuleWithUpdates represents ApiRule but with extra fields for marshalling
|
|
||||||
type ApiRuleWithUpdates struct {
|
|
||||||
ApiRule
|
|
||||||
// Updates contains the ordered list of recorded ruleStateEntry objects
|
// Updates contains the ordered list of recorded ruleStateEntry objects
|
||||||
StateUpdates []StateEntry `json:"updates,omitempty"`
|
StateUpdates []rule.StateEntry `json:"updates,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// APILink returns a link to the rule's JSON representation.
|
// APILink returns a link to the rule's JSON representation.
|
||||||
func (ar ApiRule) APILink() string {
|
func (ar apiRule) APILink() string {
|
||||||
return fmt.Sprintf("api/v1/rule?%s=%s&%s=%s",
|
return fmt.Sprintf("api/v1/rule?%s=%s&%s=%s",
|
||||||
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID)
|
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// WebLink returns a link to the alert which can be used in UI.
|
// WebLink returns a link to the alert which can be used in UI.
|
||||||
func (ar ApiRule) WebLink() string {
|
func (ar apiRule) WebLink() string {
|
||||||
return fmt.Sprintf("rule?%s=%s&%s=%s",
|
return fmt.Sprintf("rule?%s=%s&%s=%s",
|
||||||
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID)
|
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// AlertsToAPI returns list of ApiAlert objects from existing alerts
|
func ruleToAPI(r any) apiRule {
|
||||||
func (ar *AlertingRule) AlertsToAPI() []*ApiAlert {
|
if ar, ok := r.(*rule.AlertingRule); ok {
|
||||||
var alerts []*ApiAlert
|
return alertingToAPI(ar)
|
||||||
|
}
|
||||||
|
if rr, ok := r.(*rule.RecordingRule); ok {
|
||||||
|
return recordingToAPI(rr)
|
||||||
|
}
|
||||||
|
return apiRule{}
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
ruleTypeRecording = "recording"
|
||||||
|
ruleTypeAlerting = "alerting"
|
||||||
|
)
|
||||||
|
|
||||||
|
func recordingToAPI(rr *rule.RecordingRule) apiRule {
|
||||||
|
lastState := rule.GetLastEntry(rr)
|
||||||
|
r := apiRule{
|
||||||
|
Type: ruleTypeRecording,
|
||||||
|
DatasourceType: rr.Type.String(),
|
||||||
|
Name: rr.Name,
|
||||||
|
Query: rr.Expr,
|
||||||
|
Labels: rr.Labels,
|
||||||
|
LastEvaluation: lastState.Time,
|
||||||
|
EvaluationTime: lastState.Duration.Seconds(),
|
||||||
|
Health: "ok",
|
||||||
|
LastSamples: lastState.Samples,
|
||||||
|
LastSeriesFetched: lastState.SeriesFetched,
|
||||||
|
MaxUpdates: rule.GetRuleStateSize(rr),
|
||||||
|
Updates: rule.GetAllRuleState(rr),
|
||||||
|
|
||||||
|
// encode as strings to avoid rounding
|
||||||
|
ID: fmt.Sprintf("%d", rr.ID()),
|
||||||
|
GroupID: fmt.Sprintf("%d", rr.GroupID),
|
||||||
|
GroupName: rr.GroupName,
|
||||||
|
File: rr.File,
|
||||||
|
}
|
||||||
|
if lastState.Err != nil {
|
||||||
|
r.LastError = lastState.Err.Error()
|
||||||
|
r.Health = "err"
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// alertingToAPI returns Rule representation in form of apiRule
|
||||||
|
func alertingToAPI(ar *rule.AlertingRule) apiRule {
|
||||||
|
lastState := rule.GetLastEntry(ar)
|
||||||
|
r := apiRule{
|
||||||
|
Type: ruleTypeAlerting,
|
||||||
|
DatasourceType: ar.Type.String(),
|
||||||
|
Name: ar.Name,
|
||||||
|
Query: ar.Expr,
|
||||||
|
Duration: ar.For.Seconds(),
|
||||||
|
KeepFiringFor: ar.KeepFiringFor.Seconds(),
|
||||||
|
Labels: ar.Labels,
|
||||||
|
Annotations: ar.Annotations,
|
||||||
|
LastEvaluation: lastState.Time,
|
||||||
|
EvaluationTime: lastState.Duration.Seconds(),
|
||||||
|
Health: "ok",
|
||||||
|
State: "inactive",
|
||||||
|
Alerts: ruleToAPIAlert(ar),
|
||||||
|
LastSamples: lastState.Samples,
|
||||||
|
LastSeriesFetched: lastState.SeriesFetched,
|
||||||
|
MaxUpdates: rule.GetRuleStateSize(ar),
|
||||||
|
Updates: rule.GetAllRuleState(ar),
|
||||||
|
Debug: ar.Debug,
|
||||||
|
|
||||||
|
// encode as strings to avoid rounding in JSON
|
||||||
|
ID: fmt.Sprintf("%d", ar.ID()),
|
||||||
|
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||||
|
GroupName: ar.GroupName,
|
||||||
|
File: ar.File,
|
||||||
|
}
|
||||||
|
if lastState.Err != nil {
|
||||||
|
r.LastError = lastState.Err.Error()
|
||||||
|
r.Health = "err"
|
||||||
|
}
|
||||||
|
// satisfy apiRule.State logic
|
||||||
|
if len(r.Alerts) > 0 {
|
||||||
|
r.State = notifier.StatePending.String()
|
||||||
|
stateFiring := notifier.StateFiring.String()
|
||||||
|
for _, a := range r.Alerts {
|
||||||
|
if a.State == stateFiring {
|
||||||
|
r.State = stateFiring
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// ruleToAPIAlert generates list of apiAlert objects from existing alerts
|
||||||
|
func ruleToAPIAlert(ar *rule.AlertingRule) []*apiAlert {
|
||||||
|
var alerts []*apiAlert
|
||||||
for _, a := range ar.GetAlerts() {
|
for _, a := range ar.GetAlerts() {
|
||||||
if a.State == notifier.StateInactive {
|
if a.State == notifier.StateInactive {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
alerts = append(alerts, NewAlertAPI(ar, a))
|
alerts = append(alerts, newAlertAPI(ar, a))
|
||||||
}
|
}
|
||||||
return alerts
|
return alerts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// alertToAPI generates apiAlert object from alert by its id(hash)
|
||||||
|
func alertToAPI(ar *rule.AlertingRule, id uint64) *apiAlert {
|
||||||
|
a := ar.GetAlert(id)
|
||||||
|
if a == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return newAlertAPI(ar, a)
|
||||||
|
}
|
||||||
|
|
||||||
// NewAlertAPI creates apiAlert for notifier.Alert
|
// NewAlertAPI creates apiAlert for notifier.Alert
|
||||||
func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
func newAlertAPI(ar *rule.AlertingRule, a *notifier.Alert) *apiAlert {
|
||||||
aa := &ApiAlert{
|
aa := &apiAlert{
|
||||||
// encode as strings to avoid rounding
|
// encode as strings to avoid rounding
|
||||||
ID: fmt.Sprintf("%d", a.ID),
|
ID: fmt.Sprintf("%d", a.ID),
|
||||||
GroupID: fmt.Sprintf("%d", a.GroupID),
|
GroupID: fmt.Sprintf("%d", a.GroupID),
|
||||||
@@ -226,8 +328,8 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
|||||||
Restored: a.Restored,
|
Restored: a.Restored,
|
||||||
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
|
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
|
||||||
}
|
}
|
||||||
if notifier.AlertURLGeneratorFn != nil {
|
if alertURLGeneratorFn != nil {
|
||||||
aa.SourceLink = notifier.AlertURLGeneratorFn(*a)
|
aa.SourceLink = alertURLGeneratorFn(*a)
|
||||||
}
|
}
|
||||||
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
|
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
|
||||||
aa.Stabilizing = true
|
aa.Stabilizing = true
|
||||||
@@ -235,11 +337,9 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
|||||||
return aa
|
return aa
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToAPI returns ApiGroup representation of g
|
func groupToAPI(g *rule.Group) *apiGroup {
|
||||||
func (g *Group) ToAPI() *ApiGroup {
|
g = g.DeepCopy()
|
||||||
g.mu.RLock()
|
ag := apiGroup{
|
||||||
defer g.mu.RUnlock()
|
|
||||||
ag := ApiGroup{
|
|
||||||
// encode as string to avoid rounding
|
// encode as string to avoid rounding
|
||||||
ID: strconv.FormatUint(g.GetID(), 10),
|
ID: strconv.FormatUint(g.GetID(), 10),
|
||||||
Name: g.Name,
|
Name: g.Name,
|
||||||
@@ -259,9 +359,9 @@ func (g *Group) ToAPI() *ApiGroup {
|
|||||||
if g.EvalDelay != nil {
|
if g.EvalDelay != nil {
|
||||||
ag.EvalDelay = g.EvalDelay.Seconds()
|
ag.EvalDelay = g.EvalDelay.Seconds()
|
||||||
}
|
}
|
||||||
ag.Rules = make([]ApiRule, 0)
|
ag.Rules = make([]apiRule, 0)
|
||||||
for _, r := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
ag.Rules = append(ag.Rules, r.ToAPI())
|
ag.Rules = append(ag.Rules, ruleToAPI(r))
|
||||||
}
|
}
|
||||||
return &ag
|
return &ag
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package rule
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestRecordingToApi(t *testing.T) {
|
func TestRecordingToApi(t *testing.T) {
|
||||||
@@ -16,7 +17,7 @@ func TestRecordingToApi(t *testing.T) {
|
|||||||
Values: []float64{1}, Timestamps: []int64{0},
|
Values: []float64{1}, Timestamps: []int64{0},
|
||||||
})
|
})
|
||||||
entriesLimit := 44
|
entriesLimit := 44
|
||||||
g := NewGroup(config.Group{
|
g := rule.NewGroup(config.Group{
|
||||||
Name: "group",
|
Name: "group",
|
||||||
File: "rules.yaml",
|
File: "rules.yaml",
|
||||||
Concurrency: 1,
|
Concurrency: 1,
|
||||||
@@ -30,24 +31,24 @@ func TestRecordingToApi(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}, fq, 1*time.Minute, nil)
|
}, fq, 1*time.Minute, nil)
|
||||||
rr := g.Rules[0].(*RecordingRule)
|
rr := g.Rules[0].(*rule.RecordingRule)
|
||||||
|
|
||||||
expectedRes := ApiRule{
|
expectedRes := apiRule{
|
||||||
Name: "record_name",
|
Name: "record_name",
|
||||||
Query: "up",
|
Query: "up",
|
||||||
Labels: map[string]string{"label": "value"},
|
Labels: map[string]string{"label": "value"},
|
||||||
Health: "ok",
|
Health: "ok",
|
||||||
Type: TypeRecording,
|
Type: ruleTypeRecording,
|
||||||
DatasourceType: "prometheus",
|
DatasourceType: "prometheus",
|
||||||
ID: "1248",
|
ID: "1248",
|
||||||
GroupID: fmt.Sprintf("%d", g.CreateID()),
|
GroupID: fmt.Sprintf("%d", g.CreateID()),
|
||||||
GroupName: "group",
|
GroupName: "group",
|
||||||
File: "rules.yaml",
|
File: "rules.yaml",
|
||||||
MaxUpdates: 44,
|
MaxUpdates: 44,
|
||||||
Updates: make([]StateEntry, 0),
|
Updates: make([]rule.StateEntry, 0),
|
||||||
}
|
}
|
||||||
|
|
||||||
res := rr.ToAPI()
|
res := recordingToAPI(rr)
|
||||||
|
|
||||||
if !reflect.DeepEqual(res, expectedRes) {
|
if !reflect.DeepEqual(res, expectedRes) {
|
||||||
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expectedRes, res)
|
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expectedRes, res)
|
||||||
@@ -27,9 +27,6 @@ vmauth-linux-ppc64le-prod:
|
|||||||
vmauth-linux-386-prod:
|
vmauth-linux-386-prod:
|
||||||
APP_NAME=vmauth $(MAKE) app-via-docker-linux-386
|
APP_NAME=vmauth $(MAKE) app-via-docker-linux-386
|
||||||
|
|
||||||
vmauth-linux-s390x-prod:
|
|
||||||
APP_NAME=vmauth $(MAKE) app-via-docker-linux-s390x
|
|
||||||
|
|
||||||
vmauth-darwin-amd64-prod:
|
vmauth-darwin-amd64-prod:
|
||||||
APP_NAME=vmauth $(MAKE) app-via-docker-darwin-amd64
|
APP_NAME=vmauth $(MAKE) app-via-docker-darwin-amd64
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"errors"
|
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
@@ -42,9 +41,6 @@ var (
|
|||||||
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing for details")
|
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing for details")
|
||||||
defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+
|
defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+
|
||||||
"Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing")
|
"Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing")
|
||||||
defaultMergeQueryArgs = flagutil.NewArrayString("mergeQueryArgs", "An optional list of client query arg names, which must be merged with args at backend urls. "+
|
|
||||||
"The rest of client query args are replaced by the corresponding query args from backend urls for security reasons; "+
|
|
||||||
"see https://docs.victoriametrics.com/victoriametrics/vmauth/#query-args-handling")
|
|
||||||
discoverBackendIPsGlobal = flag.Bool("discoverBackendIPs", false, "Whether to discover backend IPs via periodic DNS queries to hostnames specified in url_prefix. "+
|
discoverBackendIPsGlobal = flag.Bool("discoverBackendIPs", false, "Whether to discover backend IPs via periodic DNS queries to hostnames specified in url_prefix. "+
|
||||||
"This may be useful when url_prefix points to a hostname with dynamically scaled instances behind it. See https://docs.victoriametrics.com/victoriametrics/vmauth/#discovering-backend-ips")
|
"This may be useful when url_prefix points to a hostname with dynamically scaled instances behind it. See https://docs.victoriametrics.com/victoriametrics/vmauth/#discovering-backend-ips")
|
||||||
discoverBackendIPsInterval = flag.Duration("discoverBackendIPsInterval", 10*time.Second, "The interval for re-discovering backend IPs if -discoverBackendIPs command-line flag is set. "+
|
discoverBackendIPsInterval = flag.Duration("discoverBackendIPsInterval", 10*time.Second, "The interval for re-discovering backend IPs if -discoverBackendIPs command-line flag is set. "+
|
||||||
@@ -65,11 +61,10 @@ type AuthConfig struct {
|
|||||||
type UserInfo struct {
|
type UserInfo struct {
|
||||||
Name string `yaml:"name,omitempty"`
|
Name string `yaml:"name,omitempty"`
|
||||||
|
|
||||||
BearerToken string `yaml:"bearer_token,omitempty"`
|
BearerToken string `yaml:"bearer_token,omitempty"`
|
||||||
JWT *JWTConfig `yaml:"jwt,omitempty"`
|
AuthToken string `yaml:"auth_token,omitempty"`
|
||||||
AuthToken string `yaml:"auth_token,omitempty"`
|
Username string `yaml:"username,omitempty"`
|
||||||
Username string `yaml:"username,omitempty"`
|
Password string `yaml:"password,omitempty"`
|
||||||
Password string `yaml:"password,omitempty"`
|
|
||||||
|
|
||||||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||||
DiscoverBackendIPs *bool `yaml:"discover_backend_ips,omitempty"`
|
DiscoverBackendIPs *bool `yaml:"discover_backend_ips,omitempty"`
|
||||||
@@ -80,7 +75,6 @@ type UserInfo struct {
|
|||||||
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
|
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
|
||||||
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
||||||
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
||||||
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
|
|
||||||
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||||
TLSCAFile string `yaml:"tls_ca_file,omitempty"`
|
TLSCAFile string `yaml:"tls_ca_file,omitempty"`
|
||||||
TLSCertFile string `yaml:"tls_cert_file,omitempty"`
|
TLSCertFile string `yaml:"tls_cert_file,omitempty"`
|
||||||
@@ -96,8 +90,6 @@ type UserInfo struct {
|
|||||||
rt http.RoundTripper
|
rt http.RoundTripper
|
||||||
|
|
||||||
requests *metrics.Counter
|
requests *metrics.Counter
|
||||||
requestErrors *metrics.Counter
|
|
||||||
backendRequests *metrics.Counter
|
|
||||||
backendErrors *metrics.Counter
|
backendErrors *metrics.Counter
|
||||||
requestsDuration *metrics.Summary
|
requestsDuration *metrics.Summary
|
||||||
}
|
}
|
||||||
@@ -109,29 +101,13 @@ type HeadersConf struct {
|
|||||||
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
|
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
|
func (ui *UserInfo) beginConcurrencyLimit() error {
|
||||||
select {
|
select {
|
||||||
case ui.concurrencyLimitCh <- struct{}{}:
|
case ui.concurrencyLimitCh <- struct{}{}:
|
||||||
return nil
|
return nil
|
||||||
default:
|
default:
|
||||||
// The number of concurrently executed requests for the given user equals the limt.
|
ui.concurrencyLimitReached.Inc()
|
||||||
// Wait until some of the currently executed requests are finished, so the current request could be executed.
|
return fmt.Errorf("cannot handle more than %d concurrent requests from user %s", ui.getMaxConcurrentRequests(), ui.name())
|
||||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
|
|
||||||
select {
|
|
||||||
case ui.concurrencyLimitCh <- struct{}{}:
|
|
||||||
return nil
|
|
||||||
case <-ctx.Done():
|
|
||||||
err := ctx.Err()
|
|
||||||
if errors.Is(err, context.DeadlineExceeded) {
|
|
||||||
// The current request couldn't be executed until the request timeout.
|
|
||||||
ui.concurrencyLimitReached.Inc()
|
|
||||||
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because %d concurrent requests from the user %s are executed",
|
|
||||||
*maxQueueDuration, ui.getMaxConcurrentRequests(), ui.name())
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt.Errorf("cannot start executing the request because %d concurrent requests from the user %s are executed: %w",
|
|
||||||
ui.getMaxConcurrentRequests(), ui.name(), err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -147,28 +123,6 @@ func (ui *UserInfo) getMaxConcurrentRequests() int {
|
|||||||
return mcr
|
return mcr
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ui *UserInfo) stopHealthChecks() {
|
|
||||||
if ui == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if ui.URLPrefix != nil {
|
|
||||||
bus := ui.URLPrefix.bus.Load()
|
|
||||||
bus.stopHealthChecks()
|
|
||||||
}
|
|
||||||
if ui.DefaultURL != nil {
|
|
||||||
bus := ui.DefaultURL.bus.Load()
|
|
||||||
bus.stopHealthChecks()
|
|
||||||
}
|
|
||||||
for i := range ui.URLMaps {
|
|
||||||
um := &ui.URLMaps[i]
|
|
||||||
if um.URLPrefix != nil {
|
|
||||||
bus := um.URLPrefix.bus.Load()
|
|
||||||
bus.stopHealthChecks()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Header is `Name: Value` http header, which must be added to the proxied request.
|
// Header is `Name: Value` http header, which must be added to the proxied request.
|
||||||
type Header struct {
|
type Header struct {
|
||||||
Name string
|
Name string
|
||||||
@@ -228,11 +182,6 @@ type URLMap struct {
|
|||||||
// LoadBalancingPolicy is load balancing policy among UrlPrefix backends.
|
// LoadBalancingPolicy is load balancing policy among UrlPrefix backends.
|
||||||
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
||||||
|
|
||||||
// MergeQueryArgs is a list of client query args, which must be merged with the existing backend query args.
|
|
||||||
//
|
|
||||||
// The rest of client query args are replaced with the corresponding backend query args for security reasons.
|
|
||||||
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
|
|
||||||
|
|
||||||
// DropSrcPathPrefixParts is the number of `/`-delimited request path prefix parts to drop before proxying the request to backend.
|
// DropSrcPathPrefixParts is the number of `/`-delimited request path prefix parts to drop before proxying the request to backend.
|
||||||
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||||
}
|
}
|
||||||
@@ -279,7 +228,7 @@ func (qa *QueryArg) MarshalYAML() (any, error) {
|
|||||||
return qa.sOriginal, nil
|
return qa.sOriginal, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// URLPrefix represents the `url_prefix` from auth config.
|
// URLPrefix represents passed `url_prefix`
|
||||||
type URLPrefix struct {
|
type URLPrefix struct {
|
||||||
// requests are re-tried on other backend urls for these http response status codes
|
// requests are re-tried on other backend urls for these http response status codes
|
||||||
retryStatusCodes []int
|
retryStatusCodes []int
|
||||||
@@ -287,11 +236,6 @@ type URLPrefix struct {
|
|||||||
// load balancing policy used
|
// load balancing policy used
|
||||||
loadBalancingPolicy string
|
loadBalancingPolicy string
|
||||||
|
|
||||||
// the list of client query args, which must be merged with backend query args.
|
|
||||||
//
|
|
||||||
// By default backend query args replace all the client query args for security reasons.
|
|
||||||
mergeQueryArgs []string
|
|
||||||
|
|
||||||
// how many request path prefix parts to drop before routing the request to backendURL
|
// how many request path prefix parts to drop before routing the request to backendURL
|
||||||
dropSrcPathPrefixParts int
|
dropSrcPathPrefixParts int
|
||||||
|
|
||||||
@@ -304,7 +248,7 @@ type URLPrefix struct {
|
|||||||
// the list of backend urls
|
// the list of backend urls
|
||||||
//
|
//
|
||||||
// the list can be dynamically updated if `discover_backend_ips` option is set.
|
// the list can be dynamically updated if `discover_backend_ips` option is set.
|
||||||
bus atomic.Pointer[backendURLs]
|
bus atomic.Pointer[[]*backendURL]
|
||||||
|
|
||||||
// if this option is set, then backend ips for busOriginal are periodically re-discovered and put to bus.
|
// if this option is set, then backend ips for busOriginal are periodically re-discovered and put to bus.
|
||||||
discoverBackendIPs bool
|
discoverBackendIPs bool
|
||||||
@@ -328,91 +272,21 @@ func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type backendURLs struct {
|
|
||||||
healthChecksContext context.Context
|
|
||||||
healthChecksCancel func()
|
|
||||||
healthChecksWG sync.WaitGroup
|
|
||||||
|
|
||||||
bus []*backendURL
|
|
||||||
}
|
|
||||||
|
|
||||||
func newBackendURLs() *backendURLs {
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
|
||||||
return &backendURLs{
|
|
||||||
healthChecksContext: ctx,
|
|
||||||
healthChecksCancel: cancel,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bus *backendURLs) add(u *url.URL) {
|
|
||||||
bus.bus = append(bus.bus, &backendURL{
|
|
||||||
url: u,
|
|
||||||
healthCheckContext: bus.healthChecksContext,
|
|
||||||
healthCheckWG: &bus.healthChecksWG,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bus *backendURLs) stopHealthChecks() {
|
|
||||||
bus.healthChecksCancel()
|
|
||||||
bus.healthChecksWG.Wait()
|
|
||||||
}
|
|
||||||
|
|
||||||
type backendURL struct {
|
type backendURL struct {
|
||||||
broken atomic.Bool
|
brokenDeadline atomic.Uint64
|
||||||
|
|
||||||
healthCheckContext context.Context
|
|
||||||
healthCheckWG *sync.WaitGroup
|
|
||||||
|
|
||||||
concurrentRequests atomic.Int32
|
concurrentRequests atomic.Int32
|
||||||
|
|
||||||
url *url.URL
|
url *url.URL
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bu *backendURL) isBroken() bool {
|
func (bu *backendURL) isBroken() bool {
|
||||||
return bu.broken.Load()
|
ct := fasttime.UnixTimestamp()
|
||||||
|
return ct < bu.brokenDeadline.Load()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bu *backendURL) setBroken() {
|
func (bu *backendURL) setBroken() {
|
||||||
if bu.broken.CompareAndSwap(false, true) {
|
deadline := fasttime.UnixTimestamp() + uint64((*failTimeout).Seconds())
|
||||||
bu.healthCheckWG.Go(func() {
|
bu.brokenDeadline.Store(deadline)
|
||||||
bu.runHealthCheck()
|
|
||||||
bu.broken.Store(false)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bu *backendURL) runHealthCheck() {
|
|
||||||
port := bu.url.Port()
|
|
||||||
if port == "" {
|
|
||||||
port = "80"
|
|
||||||
}
|
|
||||||
addr := net.JoinHostPort(bu.url.Hostname(), port)
|
|
||||||
|
|
||||||
t := time.NewTicker(*failTimeout)
|
|
||||||
defer t.Stop()
|
|
||||||
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-t.C:
|
|
||||||
// Verify network connectivity via TCP dial before marking backend healthy.
|
|
||||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997
|
|
||||||
ctx, cancel := context.WithTimeout(bu.healthCheckContext, time.Second)
|
|
||||||
c, err := netutil.Dialer.DialContext(ctx, "tcp", addr)
|
|
||||||
cancel()
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(bu.healthCheckContext.Err(), context.Canceled) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
logger.Warnf("ignoring the backend at %s for %s because of dial error: %s", addr, *failTimeout, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
_ = c.Close()
|
|
||||||
return
|
|
||||||
case <-bu.healthCheckContext.Done():
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bu *backendURL) get() {
|
func (bu *backendURL) get() {
|
||||||
@@ -424,8 +298,8 @@ func (bu *backendURL) put() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (up *URLPrefix) getBackendsCount() int {
|
func (up *URLPrefix) getBackendsCount() int {
|
||||||
bus := up.bus.Load()
|
pbus := up.bus.Load()
|
||||||
return len(bus.bus)
|
return len(*pbus)
|
||||||
}
|
}
|
||||||
|
|
||||||
// getBackendURL returns the backendURL depending on the load balance policy.
|
// getBackendURL returns the backendURL depending on the load balance policy.
|
||||||
@@ -436,15 +310,16 @@ func (up *URLPrefix) getBackendsCount() int {
|
|||||||
func (up *URLPrefix) getBackendURL() *backendURL {
|
func (up *URLPrefix) getBackendURL() *backendURL {
|
||||||
up.discoverBackendAddrsIfNeeded()
|
up.discoverBackendAddrsIfNeeded()
|
||||||
|
|
||||||
bus := up.bus.Load()
|
pbus := up.bus.Load()
|
||||||
if len(bus.bus) == 0 {
|
bus := *pbus
|
||||||
|
if len(bus) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if up.loadBalancingPolicy == "first_available" {
|
if up.loadBalancingPolicy == "first_available" {
|
||||||
return getFirstAvailableBackendURL(bus.bus)
|
return getFirstAvailableBackendURL(bus)
|
||||||
}
|
}
|
||||||
return getLeastLoadedBackendURL(bus.bus, &up.n)
|
return getLeastLoadedBackendURL(bus, &up.n)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
|
func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
|
||||||
@@ -518,24 +393,25 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
|
|||||||
cancel()
|
cancel()
|
||||||
|
|
||||||
// generate new backendURLs for the resolved IPs
|
// generate new backendURLs for the resolved IPs
|
||||||
busNew := newBackendURLs()
|
var busNew []*backendURL
|
||||||
for _, bu := range up.busOriginal {
|
for _, bu := range up.busOriginal {
|
||||||
host := bu.Hostname()
|
host := bu.Hostname()
|
||||||
for _, addr := range hostToAddrs[host] {
|
for _, addr := range hostToAddrs[host] {
|
||||||
buCopy := *bu
|
buCopy := *bu
|
||||||
buCopy.Host = addr
|
buCopy.Host = addr
|
||||||
busNew.add(&buCopy)
|
busNew = append(busNew, &backendURL{
|
||||||
|
url: &buCopy,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bus := up.bus.Load()
|
pbus := up.bus.Load()
|
||||||
if areEqualBackendURLs(bus.bus, busNew.bus) {
|
if areEqualBackendURLs(*pbus, busNew) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store new backend urls
|
// Store new backend urls
|
||||||
up.bus.Store(busNew)
|
up.bus.Store(&busNew)
|
||||||
bus.stopHealthChecks()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func areEqualBackendURLs(a, b []*backendURL) bool {
|
func areEqualBackendURLs(a, b []*backendURL) bool {
|
||||||
@@ -566,66 +442,53 @@ func getFirstAvailableBackendURL(bus []*backendURL) *backendURL {
|
|||||||
for i := 1; i < len(bus); i++ {
|
for i := 1; i < len(bus); i++ {
|
||||||
if !bus[i].isBroken() {
|
if !bus[i].isBroken() {
|
||||||
bu = bus[i]
|
bu = bus[i]
|
||||||
bu.get()
|
break
|
||||||
return bu
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
bu.get()
|
||||||
|
return bu
|
||||||
}
|
}
|
||||||
|
|
||||||
// getLeastLoadedBackendURL returns a non-broken backendURL with the lowest number of concurrent requests.
|
// getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
|
||||||
//
|
//
|
||||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||||
func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL {
|
func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL {
|
||||||
if len(bus) == 1 {
|
if len(bus) == 1 {
|
||||||
// Fast path - return the only backend url.
|
// Fast path - return the only backend url.
|
||||||
bu := bus[0]
|
bu := bus[0]
|
||||||
if bu.isBroken() {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
bu.get()
|
bu.get()
|
||||||
return bu
|
return bu
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slow path - select other backend urls.
|
// Slow path - select other backend urls.
|
||||||
n := atomicCounter.Add(1) - 1
|
n := atomicCounter.Add(1) - 1
|
||||||
for i := range uint32(len(bus)) {
|
for i := uint32(0); i < uint32(len(bus)); i++ {
|
||||||
idx := (n + i) % uint32(len(bus))
|
idx := (n + i) % uint32(len(bus))
|
||||||
bu := bus[idx]
|
bu := bus[idx]
|
||||||
if bu.isBroken() {
|
if bu.isBroken() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if bu.concurrentRequests.Load() == 0 {
|
||||||
// The Load() in front of CompareAndSwap() avoids CAS overhead for items with values bigger than 0.
|
// Fast path - return the backend with zero concurrently executed requests.
|
||||||
if bu.concurrentRequests.Load() == 0 && bu.concurrentRequests.CompareAndSwap(0, 1) {
|
// Do not use CompareAndSwap() instead of Load(), since it is much slower on systems with many CPU cores.
|
||||||
atomicCounter.CompareAndSwap(n+1, idx+1)
|
bu.concurrentRequests.Add(1)
|
||||||
// There is no need in the call bu.get(), because we already incremented bu.concrrentRequests above.
|
|
||||||
return bu
|
return bu
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slow path - return the backend with the minimum number of concurrently executed requests.
|
// Slow path - return the backend with the minimum number of concurrently executed requests.
|
||||||
buMinIdx := n % uint32(len(bus))
|
buMin := bus[n%uint32(len(bus))]
|
||||||
minRequests := bus[buMinIdx].concurrentRequests.Load()
|
minRequests := buMin.concurrentRequests.Load()
|
||||||
for i := uint32(1); i < uint32(len(bus)); i++ {
|
for _, bu := range bus {
|
||||||
idx := (n + i) % uint32(len(bus))
|
|
||||||
bu := bus[idx]
|
|
||||||
if bu.isBroken() {
|
if bu.isBroken() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if n := bu.concurrentRequests.Load(); n < minRequests || buMin.isBroken() {
|
||||||
reqs := bu.concurrentRequests.Load()
|
buMin = bu
|
||||||
if reqs < minRequests || bus[buMinIdx].isBroken() {
|
minRequests = n
|
||||||
buMinIdx = idx
|
|
||||||
minRequests = reqs
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
buMin := bus[buMinIdx]
|
|
||||||
if buMin.isBroken() {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
buMin.get()
|
buMin.get()
|
||||||
atomicCounter.CompareAndSwap(n+1, buMinIdx+1)
|
|
||||||
return buMin
|
return buMin
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -742,9 +605,11 @@ func initAuthConfig() {
|
|||||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||||
|
|
||||||
stopCh = make(chan struct{})
|
stopCh = make(chan struct{})
|
||||||
authConfigWG.Go(func() {
|
authConfigWG.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer authConfigWG.Done()
|
||||||
authConfigReloader(sighupCh)
|
authConfigReloader(sighupCh)
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
func stopAuthConfig() {
|
func stopAuthConfig() {
|
||||||
@@ -800,9 +665,6 @@ var (
|
|||||||
// authUsers contains the currently loaded auth users
|
// authUsers contains the currently loaded auth users
|
||||||
authUsers atomic.Pointer[map[string]*UserInfo]
|
authUsers atomic.Pointer[map[string]*UserInfo]
|
||||||
|
|
||||||
// jwt authentication cache
|
|
||||||
jwtAuthCache atomic.Pointer[jwtCache]
|
|
||||||
|
|
||||||
authConfigWG sync.WaitGroup
|
authConfigWG sync.WaitGroup
|
||||||
stopCh chan struct{}
|
stopCh chan struct{}
|
||||||
)
|
)
|
||||||
@@ -819,7 +681,7 @@ func reloadAuthConfig() (bool, error) {
|
|||||||
|
|
||||||
ok, err := reloadAuthConfigData(data)
|
ok, err := reloadAuthConfigData(data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("failed to parse -auth.config=%q: %w", *authConfigPath, err)
|
return false, fmt.Errorf("failed to pars -auth.config=%q: %w", *authConfigPath, err)
|
||||||
}
|
}
|
||||||
if !ok {
|
if !ok {
|
||||||
return false, nil
|
return false, nil
|
||||||
@@ -842,14 +704,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
|||||||
return false, fmt.Errorf("failed to parse auth config: %w", err)
|
return false, fmt.Errorf("failed to parse auth config: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
jui, err := parseJWTUsers(ac)
|
|
||||||
if err != nil {
|
|
||||||
return false, fmt.Errorf("failed to parse JWT users from auth config: %w", err)
|
|
||||||
}
|
|
||||||
jwtc := &jwtCache{
|
|
||||||
users: jui,
|
|
||||||
}
|
|
||||||
|
|
||||||
m, err := parseAuthConfigUsers(ac)
|
m, err := parseAuthConfigUsers(ac)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("failed to parse users from auth config: %w", err)
|
return false, fmt.Errorf("failed to parse users from auth config: %w", err)
|
||||||
@@ -857,11 +711,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
|||||||
|
|
||||||
acPrev := authConfig.Load()
|
acPrev := authConfig.Load()
|
||||||
if acPrev != nil {
|
if acPrev != nil {
|
||||||
acPrev.UnauthorizedUser.stopHealthChecks()
|
|
||||||
for i := range acPrev.Users {
|
|
||||||
acPrev.Users[i].stopHealthChecks()
|
|
||||||
}
|
|
||||||
|
|
||||||
metrics.UnregisterSet(acPrev.ms, true)
|
metrics.UnregisterSet(acPrev.ms, true)
|
||||||
}
|
}
|
||||||
metrics.RegisterSet(ac.ms)
|
metrics.RegisterSet(ac.ms)
|
||||||
@@ -869,7 +718,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
|||||||
authConfig.Store(ac)
|
authConfig.Store(ac)
|
||||||
authConfigData.Store(&data)
|
authConfigData.Store(&data)
|
||||||
authUsers.Store(&m)
|
authUsers.Store(&m)
|
||||||
jwtAuthCache.Store(jwtc)
|
|
||||||
|
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
@@ -894,9 +742,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
|||||||
if ui.BearerToken != "" {
|
if ui.BearerToken != "" {
|
||||||
return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section")
|
return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section")
|
||||||
}
|
}
|
||||||
if ui.JWT != nil {
|
|
||||||
return nil, fmt.Errorf("field jwt can't be specified for unauthorized_user section")
|
|
||||||
}
|
|
||||||
if ui.AuthToken != "" {
|
if ui.AuthToken != "" {
|
||||||
return nil, fmt.Errorf("field auth_token can't be specified for unauthorized_user section")
|
return nil, fmt.Errorf("field auth_token can't be specified for unauthorized_user section")
|
||||||
}
|
}
|
||||||
@@ -912,8 +757,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
|||||||
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
|
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
|
||||||
}
|
}
|
||||||
ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
|
ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
|
||||||
ui.requestErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_errors_total` + metricLabels)
|
|
||||||
ui.backendRequests = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_requests_total` + metricLabels)
|
|
||||||
ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
|
ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
|
||||||
ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
|
ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
|
||||||
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
|
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
|
||||||
@@ -943,17 +786,10 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
|||||||
}
|
}
|
||||||
for i := range uis {
|
for i := range uis {
|
||||||
ui := &uis[i]
|
ui := &uis[i]
|
||||||
// users with jwt tokens are parsed by parseJWTUsers function.
|
|
||||||
// the function also checks that users with jwt tokens do not have auth tokens, bearer tokens, usernames and passwords.
|
|
||||||
if ui.JWT != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
ats, err := getAuthTokens(ui.AuthToken, ui.BearerToken, ui.Username, ui.Password)
|
ats, err := getAuthTokens(ui.AuthToken, ui.BearerToken, ui.Username, ui.Password)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, at := range ats {
|
for _, at := range ats {
|
||||||
if uiOld := byAuthToken[at]; uiOld != nil {
|
if uiOld := byAuthToken[at]; uiOld != nil {
|
||||||
return nil, fmt.Errorf("duplicate auth token=%q found for username=%q, name=%q; the previous one is set for username=%q, name=%q",
|
return nil, fmt.Errorf("duplicate auth token=%q found for username=%q, name=%q; the previous one is set for username=%q, name=%q",
|
||||||
@@ -969,8 +805,6 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
|||||||
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||||
}
|
}
|
||||||
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
||||||
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
|
|
||||||
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
|
|
||||||
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
||||||
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
||||||
mcr := ui.getMaxConcurrentRequests()
|
mcr := ui.getMaxConcurrentRequests()
|
||||||
@@ -1022,7 +856,6 @@ func (ui *UserInfo) getMetricLabels() (string, error) {
|
|||||||
func (ui *UserInfo) initURLs() error {
|
func (ui *UserInfo) initURLs() error {
|
||||||
retryStatusCodes := defaultRetryStatusCodes.Values()
|
retryStatusCodes := defaultRetryStatusCodes.Values()
|
||||||
loadBalancingPolicy := *defaultLoadBalancingPolicy
|
loadBalancingPolicy := *defaultLoadBalancingPolicy
|
||||||
mergeQueryArgs := *defaultMergeQueryArgs
|
|
||||||
dropSrcPathPrefixParts := 0
|
dropSrcPathPrefixParts := 0
|
||||||
discoverBackendIPs := *discoverBackendIPsGlobal
|
discoverBackendIPs := *discoverBackendIPsGlobal
|
||||||
if ui.RetryStatusCodes != nil {
|
if ui.RetryStatusCodes != nil {
|
||||||
@@ -1031,9 +864,6 @@ func (ui *UserInfo) initURLs() error {
|
|||||||
if ui.LoadBalancingPolicy != "" {
|
if ui.LoadBalancingPolicy != "" {
|
||||||
loadBalancingPolicy = ui.LoadBalancingPolicy
|
loadBalancingPolicy = ui.LoadBalancingPolicy
|
||||||
}
|
}
|
||||||
if len(ui.MergeQueryArgs) != 0 {
|
|
||||||
mergeQueryArgs = ui.MergeQueryArgs
|
|
||||||
}
|
|
||||||
if ui.DropSrcPathPrefixParts != nil {
|
if ui.DropSrcPathPrefixParts != nil {
|
||||||
dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts
|
dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts
|
||||||
}
|
}
|
||||||
@@ -1041,18 +871,16 @@ func (ui *UserInfo) initURLs() error {
|
|||||||
discoverBackendIPs = *ui.DiscoverBackendIPs
|
discoverBackendIPs = *ui.DiscoverBackendIPs
|
||||||
}
|
}
|
||||||
|
|
||||||
up := ui.URLPrefix
|
if ui.URLPrefix != nil {
|
||||||
if up != nil {
|
if err := ui.URLPrefix.sanitizeAndInitialize(); err != nil {
|
||||||
if err := up.sanitizeAndInitialize(); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
up.retryStatusCodes = retryStatusCodes
|
ui.URLPrefix.retryStatusCodes = retryStatusCodes
|
||||||
up.dropSrcPathPrefixParts = dropSrcPathPrefixParts
|
ui.URLPrefix.dropSrcPathPrefixParts = dropSrcPathPrefixParts
|
||||||
up.discoverBackendIPs = discoverBackendIPs
|
ui.URLPrefix.discoverBackendIPs = discoverBackendIPs
|
||||||
if err := up.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
|
if err := ui.URLPrefix.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
up.mergeQueryArgs = mergeQueryArgs
|
|
||||||
}
|
}
|
||||||
if ui.DefaultURL != nil {
|
if ui.DefaultURL != nil {
|
||||||
if err := ui.DefaultURL.sanitizeAndInitialize(); err != nil {
|
if err := ui.DefaultURL.sanitizeAndInitialize(); err != nil {
|
||||||
@@ -1071,7 +899,6 @@ func (ui *UserInfo) initURLs() error {
|
|||||||
}
|
}
|
||||||
rscs := retryStatusCodes
|
rscs := retryStatusCodes
|
||||||
lbp := loadBalancingPolicy
|
lbp := loadBalancingPolicy
|
||||||
mqa := mergeQueryArgs
|
|
||||||
dsp := dropSrcPathPrefixParts
|
dsp := dropSrcPathPrefixParts
|
||||||
dbd := discoverBackendIPs
|
dbd := discoverBackendIPs
|
||||||
if e.RetryStatusCodes != nil {
|
if e.RetryStatusCodes != nil {
|
||||||
@@ -1080,9 +907,6 @@ func (ui *UserInfo) initURLs() error {
|
|||||||
if e.LoadBalancingPolicy != "" {
|
if e.LoadBalancingPolicy != "" {
|
||||||
lbp = e.LoadBalancingPolicy
|
lbp = e.LoadBalancingPolicy
|
||||||
}
|
}
|
||||||
if len(e.MergeQueryArgs) != 0 {
|
|
||||||
mqa = e.MergeQueryArgs
|
|
||||||
}
|
|
||||||
if e.DropSrcPathPrefixParts != nil {
|
if e.DropSrcPathPrefixParts != nil {
|
||||||
dsp = *e.DropSrcPathPrefixParts
|
dsp = *e.DropSrcPathPrefixParts
|
||||||
}
|
}
|
||||||
@@ -1093,7 +917,6 @@ func (ui *UserInfo) initURLs() error {
|
|||||||
if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil {
|
if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
e.URLPrefix.mergeQueryArgs = mqa
|
|
||||||
e.URLPrefix.dropSrcPathPrefixParts = dsp
|
e.URLPrefix.dropSrcPathPrefixParts = dsp
|
||||||
e.URLPrefix.discoverBackendIPs = dbd
|
e.URLPrefix.discoverBackendIPs = dbd
|
||||||
}
|
}
|
||||||
@@ -1205,11 +1028,13 @@ func (up *URLPrefix) sanitizeAndInitialize() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Initialize up.bus
|
// Initialize up.bus
|
||||||
bus := newBackendURLs()
|
bus := make([]*backendURL, len(up.busOriginal))
|
||||||
for _, bu := range up.busOriginal {
|
for i, bu := range up.busOriginal {
|
||||||
bus.add(bu)
|
bus[i] = &backendURL{
|
||||||
|
url: bu,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
up.bus.Store(bus)
|
up.bus.Store(&bus)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -280,7 +280,7 @@ users:
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseAuthConfigSuccess(t *testing.T) {
|
func TestParseAuthConfigSuccess(t *testing.T) {
|
||||||
f := func(s string, expectedAuthConfig map[string]*UserInfo, expectedUnauthorizedUserConfig *UserInfo) {
|
f := func(s string, expectedAuthConfig map[string]*UserInfo) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
ac, err := parseAuthConfig([]byte(s))
|
ac, err := parseAuthConfig([]byte(s))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -294,19 +294,15 @@ func TestParseAuthConfigSuccess(t *testing.T) {
|
|||||||
if err := areEqualConfigs(m, expectedAuthConfig); err != nil {
|
if err := areEqualConfigs(m, expectedAuthConfig); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := areEqualConfigs(ac.UnauthorizedUser, expectedUnauthorizedUserConfig); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
insecureSkipVerifyTrue := true
|
insecureSkipVerifyTrue := true
|
||||||
|
|
||||||
// Empty config
|
// Empty config
|
||||||
f(``, map[string]*UserInfo{}, nil)
|
f(``, map[string]*UserInfo{})
|
||||||
|
|
||||||
// Empty users
|
// Empty users
|
||||||
f(`users: []`, map[string]*UserInfo{}, nil)
|
f(`users: []`, map[string]*UserInfo{})
|
||||||
|
|
||||||
// Single user
|
// Single user
|
||||||
f(`
|
f(`
|
||||||
@@ -324,7 +320,7 @@ users:
|
|||||||
MaxConcurrentRequests: 5,
|
MaxConcurrentRequests: 5,
|
||||||
TLSInsecureSkipVerify: &insecureSkipVerifyTrue,
|
TLSInsecureSkipVerify: &insecureSkipVerifyTrue,
|
||||||
},
|
},
|
||||||
}, nil)
|
})
|
||||||
|
|
||||||
// Single user with auth_token
|
// Single user with auth_token
|
||||||
f(`
|
f(`
|
||||||
@@ -348,7 +344,7 @@ users:
|
|||||||
TLSCertFile: "foo/baz",
|
TLSCertFile: "foo/baz",
|
||||||
TLSKeyFile: "foo/foo",
|
TLSKeyFile: "foo/foo",
|
||||||
},
|
},
|
||||||
}, nil)
|
})
|
||||||
|
|
||||||
// Multiple url_prefix entries
|
// Multiple url_prefix entries
|
||||||
insecureSkipVerifyFalse := false
|
insecureSkipVerifyFalse := false
|
||||||
@@ -363,7 +359,6 @@ users:
|
|||||||
tls_insecure_skip_verify: false
|
tls_insecure_skip_verify: false
|
||||||
retry_status_codes: [500, 501]
|
retry_status_codes: [500, 501]
|
||||||
load_balancing_policy: first_available
|
load_balancing_policy: first_available
|
||||||
merge_query_args: [foo, bar]
|
|
||||||
drop_src_path_prefix_parts: 1
|
drop_src_path_prefix_parts: 1
|
||||||
discover_backend_ips: true
|
discover_backend_ips: true
|
||||||
`, map[string]*UserInfo{
|
`, map[string]*UserInfo{
|
||||||
@@ -377,11 +372,10 @@ users:
|
|||||||
TLSInsecureSkipVerify: &insecureSkipVerifyFalse,
|
TLSInsecureSkipVerify: &insecureSkipVerifyFalse,
|
||||||
RetryStatusCodes: []int{500, 501},
|
RetryStatusCodes: []int{500, 501},
|
||||||
LoadBalancingPolicy: "first_available",
|
LoadBalancingPolicy: "first_available",
|
||||||
MergeQueryArgs: []string{"foo", "bar"},
|
DropSrcPathPrefixParts: intp(1),
|
||||||
DropSrcPathPrefixParts: new(1),
|
|
||||||
DiscoverBackendIPs: &discoverBackendIPsTrue,
|
DiscoverBackendIPs: &discoverBackendIPsTrue,
|
||||||
},
|
},
|
||||||
}, nil)
|
})
|
||||||
|
|
||||||
// Multiple users
|
// Multiple users
|
||||||
f(`
|
f(`
|
||||||
@@ -399,7 +393,7 @@ users:
|
|||||||
Username: "bar",
|
Username: "bar",
|
||||||
URLPrefix: mustParseURL("https://bar/x/"),
|
URLPrefix: mustParseURL("https://bar/x/"),
|
||||||
},
|
},
|
||||||
}, nil)
|
})
|
||||||
|
|
||||||
// non-empty URLMap
|
// non-empty URLMap
|
||||||
sharedUserInfo := &UserInfo{
|
sharedUserInfo := &UserInfo{
|
||||||
@@ -449,7 +443,7 @@ users:
|
|||||||
`, map[string]*UserInfo{
|
`, map[string]*UserInfo{
|
||||||
getHTTPAuthBearerToken("foo"): sharedUserInfo,
|
getHTTPAuthBearerToken("foo"): sharedUserInfo,
|
||||||
getHTTPAuthBasicToken("foo", ""): sharedUserInfo,
|
getHTTPAuthBasicToken("foo", ""): sharedUserInfo,
|
||||||
}, nil)
|
})
|
||||||
|
|
||||||
// Multiple users with the same name - this should work, since these users have different passwords
|
// Multiple users with the same name - this should work, since these users have different passwords
|
||||||
f(`
|
f(`
|
||||||
@@ -471,7 +465,7 @@ users:
|
|||||||
Password: "bar",
|
Password: "bar",
|
||||||
URLPrefix: mustParseURL("https://bar/x"),
|
URLPrefix: mustParseURL("https://bar/x"),
|
||||||
},
|
},
|
||||||
}, nil)
|
})
|
||||||
|
|
||||||
// with default url
|
// with default url
|
||||||
keepOriginalHost := true
|
keepOriginalHost := true
|
||||||
@@ -487,8 +481,6 @@ users:
|
|||||||
- "foo: bar"
|
- "foo: bar"
|
||||||
- "xxx: y"
|
- "xxx: y"
|
||||||
keep_original_host: true
|
keep_original_host: true
|
||||||
load_balancing_policy: first_available
|
|
||||||
merge_query_args: [foo, bar]
|
|
||||||
default_url:
|
default_url:
|
||||||
- http://default1/select/0/prometheus
|
- http://default1/select/0/prometheus
|
||||||
- http://default2/select/0/prometheus
|
- http://default2/select/0/prometheus
|
||||||
@@ -513,8 +505,6 @@ users:
|
|||||||
},
|
},
|
||||||
KeepOriginalHost: &keepOriginalHost,
|
KeepOriginalHost: &keepOriginalHost,
|
||||||
},
|
},
|
||||||
LoadBalancingPolicy: "first_available",
|
|
||||||
MergeQueryArgs: []string{"foo", "bar"},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
DefaultURL: mustParseURLs([]string{
|
DefaultURL: mustParseURLs([]string{
|
||||||
@@ -542,8 +532,6 @@ users:
|
|||||||
},
|
},
|
||||||
KeepOriginalHost: &keepOriginalHost,
|
KeepOriginalHost: &keepOriginalHost,
|
||||||
},
|
},
|
||||||
LoadBalancingPolicy: "first_available",
|
|
||||||
MergeQueryArgs: []string{"foo", "bar"},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
DefaultURL: mustParseURLs([]string{
|
DefaultURL: mustParseURLs([]string{
|
||||||
@@ -551,7 +539,7 @@ users:
|
|||||||
"http://default2/select/0/prometheus",
|
"http://default2/select/0/prometheus",
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
}, nil)
|
})
|
||||||
|
|
||||||
// With metric_labels
|
// With metric_labels
|
||||||
f(`
|
f(`
|
||||||
@@ -603,40 +591,7 @@ users:
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}, nil)
|
|
||||||
|
|
||||||
// unauthorized_user
|
|
||||||
f(`
|
|
||||||
unauthorized_user:
|
|
||||||
merge_query_args: [extra_filters]
|
|
||||||
url_map:
|
|
||||||
- src_paths: ["/select/.+"]
|
|
||||||
url_prefix: 'http://victoria-logs:9428/?extra_filters={env="prod"}'
|
|
||||||
`, nil, &UserInfo{
|
|
||||||
MergeQueryArgs: []string{"extra_filters"},
|
|
||||||
URLMaps: []URLMap{
|
|
||||||
{
|
|
||||||
SrcPaths: getRegexs([]string{"/select/.+"}),
|
|
||||||
URLPrefix: mustParseURL(`http://victoria-logs:9428/?extra_filters={env="prod"}`),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// skip user info with jwt, it is parsed by parseJWTUsers
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- username: foo
|
|
||||||
password: bar
|
|
||||||
url_prefix: http://aaa:343/bbb
|
|
||||||
- jwt: {skip_verify: true}
|
|
||||||
url_prefix: http://aaa:343/bbb
|
|
||||||
`, map[string]*UserInfo{
|
|
||||||
getHTTPAuthBasicToken("foo", "bar"): {
|
|
||||||
Username: "foo",
|
|
||||||
Password: "bar",
|
|
||||||
URLPrefix: mustParseURL("http://aaa:343/bbb"),
|
|
||||||
},
|
|
||||||
}, nil)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseAuthConfigPassesTLSVerificationConfig(t *testing.T) {
|
func TestParseAuthConfigPassesTLSVerificationConfig(t *testing.T) {
|
||||||
@@ -768,12 +723,10 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
|
|||||||
})
|
})
|
||||||
up.loadBalancingPolicy = "least_loaded"
|
up.loadBalancingPolicy = "least_loaded"
|
||||||
|
|
||||||
pbus := up.bus.Load()
|
|
||||||
bus := pbus.bus
|
|
||||||
|
|
||||||
fn := func(ns ...int) {
|
fn := func(ns ...int) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
pbus := up.bus.Load()
|
||||||
|
bus := *pbus
|
||||||
for i, b := range bus {
|
for i, b := range bus {
|
||||||
got := int(b.concurrentRequests.Load())
|
got := int(b.concurrentRequests.Load())
|
||||||
exp := ns[i]
|
exp := ns[i]
|
||||||
@@ -785,52 +738,45 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
|
|||||||
|
|
||||||
up.getBackendURL()
|
up.getBackendURL()
|
||||||
fn(1, 0, 0)
|
fn(1, 0, 0)
|
||||||
|
|
||||||
up.getBackendURL()
|
up.getBackendURL()
|
||||||
fn(1, 1, 0)
|
fn(1, 1, 0)
|
||||||
|
|
||||||
up.getBackendURL()
|
up.getBackendURL()
|
||||||
fn(1, 1, 1)
|
fn(1, 1, 1)
|
||||||
|
|
||||||
bus[1].put()
|
up.getBackendURL()
|
||||||
bus[2].put()
|
up.getBackendURL()
|
||||||
fn(1, 0, 0)
|
fn(2, 2, 1)
|
||||||
|
|
||||||
|
bus := up.bus.Load()
|
||||||
|
pbus := *bus
|
||||||
|
pbus[0].concurrentRequests.Add(2)
|
||||||
|
pbus[2].concurrentRequests.Add(5)
|
||||||
|
fn(4, 2, 6)
|
||||||
|
|
||||||
up.getBackendURL()
|
up.getBackendURL()
|
||||||
fn(1, 1, 0)
|
fn(4, 3, 6)
|
||||||
|
|
||||||
bus[1].put()
|
|
||||||
up.getBackendURL()
|
up.getBackendURL()
|
||||||
fn(1, 0, 1)
|
fn(4, 4, 6)
|
||||||
|
|
||||||
|
up.getBackendURL()
|
||||||
|
fn(4, 5, 6)
|
||||||
|
|
||||||
|
up.getBackendURL()
|
||||||
|
fn(5, 5, 6)
|
||||||
|
|
||||||
|
up.getBackendURL()
|
||||||
|
fn(6, 5, 6)
|
||||||
|
|
||||||
|
up.getBackendURL()
|
||||||
|
fn(6, 6, 6)
|
||||||
|
|
||||||
|
up.getBackendURL()
|
||||||
|
fn(6, 6, 7)
|
||||||
|
|
||||||
up.getBackendURL()
|
up.getBackendURL()
|
||||||
up.getBackendURL()
|
up.getBackendURL()
|
||||||
fn(1, 1, 2)
|
fn(7, 7, 7)
|
||||||
|
|
||||||
bus[0].concurrentRequests.Add(2)
|
|
||||||
bus[2].concurrentRequests.Add(2)
|
|
||||||
fn(3, 1, 4)
|
|
||||||
|
|
||||||
up.getBackendURL()
|
|
||||||
fn(3, 2, 4)
|
|
||||||
|
|
||||||
up.getBackendURL()
|
|
||||||
fn(3, 3, 4)
|
|
||||||
|
|
||||||
up.getBackendURL()
|
|
||||||
fn(4, 3, 4)
|
|
||||||
|
|
||||||
up.getBackendURL()
|
|
||||||
fn(4, 4, 4)
|
|
||||||
|
|
||||||
bus[0].put()
|
|
||||||
bus[2].put()
|
|
||||||
|
|
||||||
up.getBackendURL()
|
|
||||||
fn(3, 4, 4)
|
|
||||||
|
|
||||||
up.getBackendURL()
|
|
||||||
fn(4, 4, 4)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBrokenBackend(t *testing.T) {
|
func TestBrokenBackend(t *testing.T) {
|
||||||
@@ -841,13 +787,13 @@ func TestBrokenBackend(t *testing.T) {
|
|||||||
})
|
})
|
||||||
up.loadBalancingPolicy = "least_loaded"
|
up.loadBalancingPolicy = "least_loaded"
|
||||||
pbus := up.bus.Load()
|
pbus := up.bus.Load()
|
||||||
bus := pbus.bus
|
bus := *pbus
|
||||||
|
|
||||||
// explicitly mark one of the backends as broken
|
// explicitly mark one of the backends as broken
|
||||||
bus[1].setBroken()
|
bus[1].setBroken()
|
||||||
|
|
||||||
// broken backend should never return while there are healthy backends
|
// broken backend should never return while there are healthy backends
|
||||||
for range int(1e3) {
|
for i := 0; i < 1e3; i++ {
|
||||||
b := up.getBackendURL()
|
b := up.getBackendURL()
|
||||||
if b.isBroken() {
|
if b.isBroken() {
|
||||||
t.Fatalf("unexpected broken backend %q", b.url)
|
t.Fatalf("unexpected broken backend %q", b.url)
|
||||||
@@ -864,7 +810,7 @@ func TestDiscoverBackendIPsWithIPV6(t *testing.T) {
|
|||||||
|
|
||||||
up.discoverBackendAddrsIfNeeded()
|
up.discoverBackendAddrsIfNeeded()
|
||||||
pbus := up.bus.Load()
|
pbus := up.bus.Load()
|
||||||
bus := pbus.bus
|
bus := *pbus
|
||||||
|
|
||||||
if len(bus) != 1 {
|
if len(bus) != 1 {
|
||||||
t.Fatalf("expected url list to be of size 1; got %d instead", len(bus))
|
t.Fatalf("expected url list to be of size 1; got %d instead", len(bus))
|
||||||
@@ -938,7 +884,7 @@ func removeMetrics(m map[string]*UserInfo) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func areEqualConfigs(a, b any) error {
|
func areEqualConfigs(a, b map[string]*UserInfo) error {
|
||||||
aData, err := yaml.Marshal(a)
|
aData, err := yaml.Marshal(a)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("cannot marshal a: %w", err)
|
return fmt.Errorf("cannot marshal a: %w", err)
|
||||||
@@ -958,14 +904,16 @@ func mustParseURL(u string) *URLPrefix {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func mustParseURLs(us []string) *URLPrefix {
|
func mustParseURLs(us []string) *URLPrefix {
|
||||||
bus := newBackendURLs()
|
bus := make([]*backendURL, len(us))
|
||||||
urls := make([]*url.URL, len(us))
|
urls := make([]*url.URL, len(us))
|
||||||
for i, u := range us {
|
for i, u := range us {
|
||||||
pu, err := url.Parse(u)
|
pu, err := url.Parse(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("BUG: cannot parse %q: %w", u, err))
|
panic(fmt.Errorf("BUG: cannot parse %q: %w", u, err))
|
||||||
}
|
}
|
||||||
bus.add(pu)
|
bus[i] = &backendURL{
|
||||||
|
url: pu,
|
||||||
|
}
|
||||||
urls[i] = pu
|
urls[i] = pu
|
||||||
}
|
}
|
||||||
up := &URLPrefix{}
|
up := &URLPrefix{}
|
||||||
@@ -974,11 +922,15 @@ func mustParseURLs(us []string) *URLPrefix {
|
|||||||
} else {
|
} else {
|
||||||
up.vOriginal = us
|
up.vOriginal = us
|
||||||
}
|
}
|
||||||
up.bus.Store(bus)
|
up.bus.Store(&bus)
|
||||||
up.busOriginal = urls
|
up.busOriginal = urls
|
||||||
return up
|
return up
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func intp(n int) *int {
|
||||||
|
return &n
|
||||||
|
}
|
||||||
|
|
||||||
func mustNewRegex(s string) *Regex {
|
func mustNewRegex(s string) *Regex {
|
||||||
var re Regex
|
var re Regex
|
||||||
if err := yaml.Unmarshal([]byte(s), &re); err != nil {
|
if err := yaml.Unmarshal([]byte(s), &re); err != nil {
|
||||||
|
|||||||
@@ -1,156 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
||||||
)
|
|
||||||
|
|
||||||
type jwtCache struct {
|
|
||||||
// users contain UserInfo`s from AuthConfig with JWTConfig set
|
|
||||||
users []*UserInfo
|
|
||||||
}
|
|
||||||
|
|
||||||
type JWTConfig struct {
|
|
||||||
PublicKeys []string `yaml:"public_keys,omitempty"`
|
|
||||||
PublicKeyFiles []string `yaml:"public_key_files,omitempty"`
|
|
||||||
SkipVerify bool `yaml:"skip_verify,omitempty"`
|
|
||||||
|
|
||||||
verifierPool *jwt.VerifierPool
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, error) {
|
|
||||||
jui := make([]*UserInfo, 0, len(ac.Users))
|
|
||||||
for _, ui := range ac.Users {
|
|
||||||
jwtToken := ui.JWT
|
|
||||||
if jwtToken == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if ui.AuthToken != "" || ui.BearerToken != "" || ui.Username != "" || ui.Password != "" {
|
|
||||||
return nil, fmt.Errorf("auth_token, bearer_token, username and password cannot be specified if jwt is set")
|
|
||||||
}
|
|
||||||
if len(jwtToken.PublicKeys) == 0 && len(jwtToken.PublicKeyFiles) == 0 && !jwtToken.SkipVerify {
|
|
||||||
return nil, fmt.Errorf("jwt must contain at least a single public key, public_key_files or have skip_verify=true")
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(jwtToken.PublicKeys) > 0 || len(jwtToken.PublicKeyFiles) > 0 {
|
|
||||||
keys := make([]any, 0, len(jwtToken.PublicKeys)+len(jwtToken.PublicKeyFiles))
|
|
||||||
|
|
||||||
for i := range jwtToken.PublicKeys {
|
|
||||||
k, err := jwt.ParseKey([]byte(jwtToken.PublicKeys[i]))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
keys = append(keys, k)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, filePath := range jwtToken.PublicKeyFiles {
|
|
||||||
keyData, err := os.ReadFile(filePath)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("cannot read public key from file %q: %w", filePath, err)
|
|
||||||
}
|
|
||||||
k, err := jwt.ParseKey(keyData)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("cannot parse public key from file %q: %w", filePath, err)
|
|
||||||
}
|
|
||||||
keys = append(keys, k)
|
|
||||||
}
|
|
||||||
|
|
||||||
vp, err := jwt.NewVerifierPool(keys)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
jwtToken.verifierPool = vp
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := ui.initURLs(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
metricLabels, err := ui.getMetricLabels()
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
|
||||||
}
|
|
||||||
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
|
||||||
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
|
|
||||||
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
|
|
||||||
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
|
||||||
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
|
||||||
mcr := ui.getMaxConcurrentRequests()
|
|
||||||
ui.concurrencyLimitCh = make(chan struct{}, mcr)
|
|
||||||
ui.concurrencyLimitReached = ac.ms.GetOrCreateCounter(`vmauth_user_concurrent_requests_limit_reached_total` + metricLabels)
|
|
||||||
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_capacity`+metricLabels, func() float64 {
|
|
||||||
return float64(cap(ui.concurrencyLimitCh))
|
|
||||||
})
|
|
||||||
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_current`+metricLabels, func() float64 {
|
|
||||||
return float64(len(ui.concurrencyLimitCh))
|
|
||||||
})
|
|
||||||
|
|
||||||
rt, err := newRoundTripper(ui.TLSCAFile, ui.TLSCertFile, ui.TLSKeyFile, ui.TLSServerName, ui.TLSInsecureSkipVerify)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("cannot initialize HTTP RoundTripper: %w", err)
|
|
||||||
}
|
|
||||||
ui.rt = rt
|
|
||||||
|
|
||||||
jui = append(jui, &ui)
|
|
||||||
}
|
|
||||||
|
|
||||||
// the limitation will be lifted once claim based matching will be implemented
|
|
||||||
if len(jui) > 1 {
|
|
||||||
return nil, fmt.Errorf("multiple users with JWT tokens are not supported; found %d users", len(jui))
|
|
||||||
}
|
|
||||||
|
|
||||||
return jui, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func getUserInfoByJWTToken(ats []string) *UserInfo {
|
|
||||||
js := *jwtAuthCache.Load()
|
|
||||||
if len(js.users) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, at := range ats {
|
|
||||||
if strings.Count(at, ".") != 2 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
at, _ = strings.CutPrefix(at, `http_auth:`)
|
|
||||||
|
|
||||||
tkn, err := jwt.NewToken(at, true)
|
|
||||||
if err != nil {
|
|
||||||
if *logInvalidAuthTokens {
|
|
||||||
logger.Infof("cannot parse jwt token: %s", err)
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if tkn.IsExpired(time.Now()) {
|
|
||||||
if *logInvalidAuthTokens {
|
|
||||||
logger.Infof("jwt token is expired")
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, ui := range js.users {
|
|
||||||
if ui.JWT.SkipVerify {
|
|
||||||
return ui
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := ui.JWT.verifierPool.Verify(tkn); err != nil {
|
|
||||||
if *logInvalidAuthTokens {
|
|
||||||
logger.Infof("cannot verify jwt token: %s", err)
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
return ui
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,304 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestJWTParseAuthConfigFailure(t *testing.T) {
|
|
||||||
validRSAPublicKey := `-----BEGIN PUBLIC KEY-----
|
|
||||||
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiX7oPWKOWRQsGFEWvwZO
|
|
||||||
mL2PYsdYUsu9nr0qtPCjxQHUJgLfT3rdKlvKpPFYv7ZmKnqTncg36Wz9uiYmWJ7e
|
|
||||||
IB5Z+fko8kVIMzarCqVvpAJDzYF/pUii68xvuYoK3L9TIOAeyCXv+prwnr2IH+Mw
|
|
||||||
9AONzWbRrYoO74XyTE9vMU5qmI/L1VPk+PR8lqPOSptLvzsfoaIk2ED4yK2nRB+6
|
|
||||||
st+k4nccPqbErqHc8aiXnXfugfnr6b+NPFYUzKsDqkymGOokVijrI8B3jNw6c6Do
|
|
||||||
zphk+D3wgLsXYHfMcZbXIMqffqm/aB8Qg88OpFOkQ3rd2p6R9+hacnZkfkn3Phiw
|
|
||||||
yQIDAQAB
|
|
||||||
-----END PUBLIC KEY-----
|
|
||||||
`
|
|
||||||
// ECDSA with the P-521 curve
|
|
||||||
validECDSAPublicKey := `-----BEGIN PUBLIC KEY-----
|
|
||||||
MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAU9RmtkCRuYTKCyvLlDn5DtBZOHSe
|
|
||||||
QTa5j9q/oQVpCKqcXVFrH5dgh0GL+P/ZhkeuowPzCZqntGf0+7wPt9OxSJcADVJm
|
|
||||||
dv92m540MXss8zdHf5qtE0gsu2Ved0R7Z8a8QwGZ/1mYZ+kFGGbdQTlSvRqDySTq
|
|
||||||
XOtclIk1uhc03oL9nOQ=
|
|
||||||
-----END PUBLIC KEY-----
|
|
||||||
`
|
|
||||||
|
|
||||||
f := func(s string, expErr string) {
|
|
||||||
t.Helper()
|
|
||||||
ac, err := parseAuthConfig([]byte(s))
|
|
||||||
if err != nil {
|
|
||||||
if expErr != err.Error() {
|
|
||||||
t.Fatalf("unexpected error; got %q; want %q", err.Error(), expErr)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
users, err := parseJWTUsers(ac)
|
|
||||||
if err != nil {
|
|
||||||
if expErr != err.Error() {
|
|
||||||
t.Fatalf("unexpected error; got %q; want %q", err.Error(), expErr)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
t.Fatalf("expecting non-nil error; got %v", users)
|
|
||||||
}
|
|
||||||
|
|
||||||
// unauthorized_user cannot be used with jwt
|
|
||||||
f(`
|
|
||||||
unauthorized_user:
|
|
||||||
jwt: {skip_verify: true}
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, `field jwt can't be specified for unauthorized_user section`)
|
|
||||||
|
|
||||||
// username and jwt in a single config
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- username: foo
|
|
||||||
jwt: {skip_verify: true}
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
|
|
||||||
// bearer_token and jwt in a single config
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- bearer_token: foo
|
|
||||||
jwt: {skip_verify: true}
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
|
|
||||||
// bearer_token and jwt in a single config
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- auth_token: "Foo token"
|
|
||||||
jwt: {skip_verify: true}
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
|
|
||||||
|
|
||||||
// jwt public_keys or skip_verify must be set, part 1
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- jwt: {}
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
|
|
||||||
|
|
||||||
// jwt public_keys or skip_verify must be set, part 2
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- jwt: {public_keys: null}
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
|
|
||||||
|
|
||||||
// jwt public_keys or skip_verify must be set, part 3
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- jwt: {public_keys: []}
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
|
|
||||||
|
|
||||||
// jwt public_keys, public_key_files or skip_verify must be set
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- jwt: {public_key_files: []}
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, `jwt must contain at least a single public key, public_key_files or have skip_verify=true`)
|
|
||||||
|
|
||||||
// invalid public key, part 1
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- jwt: {public_keys: [""]}
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, `failed to parse key "": failed to decode PEM block containing public key`)
|
|
||||||
|
|
||||||
// invalid public key, part 2
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- jwt: {public_keys: ["invalid"]}
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, `failed to parse key "invalid": failed to decode PEM block containing public key`)
|
|
||||||
|
|
||||||
// invalid public key, part 2
|
|
||||||
f(fmt.Sprintf(`
|
|
||||||
users:
|
|
||||||
- jwt:
|
|
||||||
public_keys:
|
|
||||||
- %q
|
|
||||||
- %q
|
|
||||||
- "invalid"
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, validRSAPublicKey, validECDSAPublicKey), `failed to parse key "invalid": failed to decode PEM block containing public key`)
|
|
||||||
|
|
||||||
// several jwt users
|
|
||||||
// invalid public key, part 2
|
|
||||||
f(fmt.Sprintf(`
|
|
||||||
users:
|
|
||||||
- jwt:
|
|
||||||
public_keys:
|
|
||||||
- %q
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
- jwt:
|
|
||||||
public_keys:
|
|
||||||
- %q
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, validRSAPublicKey, validECDSAPublicKey), `multiple users with JWT tokens are not supported; found 2 users`)
|
|
||||||
|
|
||||||
// public key file doesn't exist
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- jwt:
|
|
||||||
public_key_files:
|
|
||||||
- /path/to/nonexistent/file.pem
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, "cannot read public key from file \"/path/to/nonexistent/file.pem\": open /path/to/nonexistent/file.pem: no such file or directory")
|
|
||||||
|
|
||||||
// public key file invalid
|
|
||||||
// auth with key from file
|
|
||||||
publicKeyFile := filepath.Join(t.TempDir(), "a_public_key.pem")
|
|
||||||
if err := os.WriteFile(publicKeyFile, []byte(`invalidPEM`), 0o644); err != nil {
|
|
||||||
t.Fatalf("failed to write public key file: %s", err)
|
|
||||||
}
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- jwt:
|
|
||||||
public_key_files:
|
|
||||||
- `+publicKeyFile+`
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, "cannot parse public key from file \""+publicKeyFile+"\": failed to parse key \"invalidPEM\": failed to decode PEM block containing public key")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestJWTParseAuthConfigSuccess(t *testing.T) {
|
|
||||||
validRSAPublicKey := `-----BEGIN PUBLIC KEY-----
|
|
||||||
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiX7oPWKOWRQsGFEWvwZO
|
|
||||||
mL2PYsdYUsu9nr0qtPCjxQHUJgLfT3rdKlvKpPFYv7ZmKnqTncg36Wz9uiYmWJ7e
|
|
||||||
IB5Z+fko8kVIMzarCqVvpAJDzYF/pUii68xvuYoK3L9TIOAeyCXv+prwnr2IH+Mw
|
|
||||||
9AONzWbRrYoO74XyTE9vMU5qmI/L1VPk+PR8lqPOSptLvzsfoaIk2ED4yK2nRB+6
|
|
||||||
st+k4nccPqbErqHc8aiXnXfugfnr6b+NPFYUzKsDqkymGOokVijrI8B3jNw6c6Do
|
|
||||||
zphk+D3wgLsXYHfMcZbXIMqffqm/aB8Qg88OpFOkQ3rd2p6R9+hacnZkfkn3Phiw
|
|
||||||
yQIDAQAB
|
|
||||||
-----END PUBLIC KEY-----
|
|
||||||
`
|
|
||||||
// ECDSA with the P-521 curve
|
|
||||||
validECDSAPublicKey := `-----BEGIN PUBLIC KEY-----
|
|
||||||
MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAU9RmtkCRuYTKCyvLlDn5DtBZOHSe
|
|
||||||
QTa5j9q/oQVpCKqcXVFrH5dgh0GL+P/ZhkeuowPzCZqntGf0+7wPt9OxSJcADVJm
|
|
||||||
dv92m540MXss8zdHf5qtE0gsu2Ved0R7Z8a8QwGZ/1mYZ+kFGGbdQTlSvRqDySTq
|
|
||||||
XOtclIk1uhc03oL9nOQ=
|
|
||||||
-----END PUBLIC KEY-----
|
|
||||||
`
|
|
||||||
|
|
||||||
f := func(s string) {
|
|
||||||
t.Helper()
|
|
||||||
ac, err := parseAuthConfig([]byte(s))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
jui, err := parseJWTUsers(ac)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, ui := range jui {
|
|
||||||
if ui.JWT == nil {
|
|
||||||
t.Fatalf("unexpected nil JWTConfig")
|
|
||||||
}
|
|
||||||
|
|
||||||
if ui.JWT.SkipVerify {
|
|
||||||
if ui.JWT.verifierPool != nil {
|
|
||||||
t.Fatalf("unexpected non-nil verifier pool for skip_verify=true")
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if ui.JWT.verifierPool == nil {
|
|
||||||
t.Fatalf("unexpected nil verifier pool for non-empty public keys")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
f(fmt.Sprintf(`
|
|
||||||
users:
|
|
||||||
- jwt:
|
|
||||||
public_keys:
|
|
||||||
- %q
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, validRSAPublicKey))
|
|
||||||
|
|
||||||
f(fmt.Sprintf(`
|
|
||||||
users:
|
|
||||||
- jwt:
|
|
||||||
public_keys:
|
|
||||||
- %q
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, validECDSAPublicKey))
|
|
||||||
|
|
||||||
f(fmt.Sprintf(`
|
|
||||||
users:
|
|
||||||
- jwt:
|
|
||||||
public_keys:
|
|
||||||
- %q
|
|
||||||
- %q
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, validRSAPublicKey, validECDSAPublicKey))
|
|
||||||
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- jwt:
|
|
||||||
skip_verify: true
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`)
|
|
||||||
|
|
||||||
// combined with other auth methods
|
|
||||||
f(`
|
|
||||||
users:
|
|
||||||
- username: foo
|
|
||||||
password: bar
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
|
|
||||||
- jwt:
|
|
||||||
skip_verify: true
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
|
|
||||||
- bearer_token: foo
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`)
|
|
||||||
|
|
||||||
rsaKeyFile := filepath.Join(t.TempDir(), "rsa_public_key.pem")
|
|
||||||
if err := os.WriteFile(rsaKeyFile, []byte(validRSAPublicKey), 0o644); err != nil {
|
|
||||||
t.Fatalf("failed to write RSA key file: %s", err)
|
|
||||||
}
|
|
||||||
ecdsaKeyFile := filepath.Join(t.TempDir(), "ecdsa_public_key.pem")
|
|
||||||
if err := os.WriteFile(ecdsaKeyFile, []byte(validECDSAPublicKey), 0o644); err != nil {
|
|
||||||
t.Fatalf("failed to write ECDSA key file: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test single public key file
|
|
||||||
f(fmt.Sprintf(`
|
|
||||||
users:
|
|
||||||
- jwt:
|
|
||||||
public_key_files:
|
|
||||||
- %q
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, rsaKeyFile))
|
|
||||||
|
|
||||||
// Test multiple public key files
|
|
||||||
f(fmt.Sprintf(`
|
|
||||||
users:
|
|
||||||
- jwt:
|
|
||||||
public_key_files:
|
|
||||||
- %q
|
|
||||||
- %q
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, rsaKeyFile, ecdsaKeyFile))
|
|
||||||
|
|
||||||
// Test combined inline keys and files
|
|
||||||
f(fmt.Sprintf(`
|
|
||||||
users:
|
|
||||||
- jwt:
|
|
||||||
public_keys:
|
|
||||||
- %q
|
|
||||||
public_key_files:
|
|
||||||
- %q
|
|
||||||
url_prefix: http://foo.bar
|
|
||||||
`, validECDSAPublicKey, rsaKeyFile))
|
|
||||||
}
|
|
||||||
@@ -24,7 +24,6 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ioutil"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||||
@@ -41,38 +40,22 @@ var (
|
|||||||
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
|
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
|
||||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||||
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host")
|
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host. "+
|
||||||
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
|
"See also -maxConcurrentRequests")
|
||||||
|
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
|
||||||
"It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services")
|
"It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services")
|
||||||
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
|
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
|
||||||
|
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process. Other requests are rejected with "+
|
||||||
requestBufferSize = flagutil.NewBytes("requestBufferSize", 32*1024, "The size of the buffer for reading the request body before proxying the request to backends. "+
|
"'429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options")
|
||||||
"This allows reducing the comsumption of backend resources when processing requests from clients connected via slow networks. "+
|
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 300, "The maximum number of concurrent requests vmauth can process per each configured user. "+
|
||||||
"Set to 0 to disable request buffering. See https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering")
|
"Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option "+
|
||||||
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size to buffer in memory for potential retries at other backends. "+
|
"in per-user config")
|
||||||
"Request bodies larger than this size cannot be retried if the backend fails. Zero or negative value disables request body buffering and retries. "+
|
|
||||||
"See also -requestBufferSize")
|
|
||||||
|
|
||||||
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process simultaneously. "+
|
|
||||||
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
|
|
||||||
"This protects vmauth itself from overloading and out-of-memory (OOM) failures. See also -maxConcurrentPerUserRequests "+
|
|
||||||
"and https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
|
|
||||||
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 100, "The maximum number of concurrent requests vmauth can process per each configured user. "+
|
|
||||||
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
|
|
||||||
"This provides fairness and isolation between users, preventing a single user from consuming all the available resources. "+
|
|
||||||
"It works in conjunction with -maxConcurrentRequests, which sets the global limit across all users. "+
|
|
||||||
"This default can be overridden for individual users via max_concurrent_requests option in per-user config. "+
|
|
||||||
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
|
|
||||||
maxQueueDuration = flag.Duration("maxQueueDuration", 10*time.Second, "The maximum duration to wait before rejecting incoming requests if concurrency limit "+
|
|
||||||
"specified via -maxConcurrentRequests or -maxConcurrentPerUserRequests command-line flags is reached. "+
|
|
||||||
"Requests are rejected with '429 Too Many Requests' http status code if the limit is still reached after the -maxQueueDuration duration. "+
|
|
||||||
"This allows graceful handling of short spikes in concurrent requests. See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
|
|
||||||
|
|
||||||
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||||
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
|
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
|
||||||
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
|
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
|
||||||
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
|
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
|
||||||
|
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size, which can be cached and re-tried at other backends. "+
|
||||||
|
"Bigger values may require more memory. Zero or negative value disables caching of request body. This may be useful when proxying data ingestion requests")
|
||||||
backendTLSInsecureSkipVerify = flag.Bool("backend.tlsInsecureSkipVerify", false, "Whether to skip TLS verification when connecting to backends over HTTPS. "+
|
backendTLSInsecureSkipVerify = flag.Bool("backend.tlsInsecureSkipVerify", false, "Whether to skip TLS verification when connecting to backends over HTTPS. "+
|
||||||
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup")
|
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup")
|
||||||
backendTLSCAFile = flag.String("backend.TLSCAFile", "", "Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. "+
|
backendTLSCAFile = flag.String("backend.TLSCAFile", "", "Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. "+
|
||||||
@@ -168,6 +151,7 @@ func requestHandlerWithInternalRoutes(w http.ResponseWriter, r *http.Request) bo
|
|||||||
}
|
}
|
||||||
|
|
||||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||||
|
|
||||||
ats := getAuthTokensFromRequest(r)
|
ats := getAuthTokensFromRequest(r)
|
||||||
if len(ats) == 0 {
|
if len(ats) == 0 {
|
||||||
// Process requests for unauthorized users
|
// Process requests for unauthorized users
|
||||||
@@ -181,32 +165,29 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
if ui := getUserInfoByAuthTokens(ats); ui != nil {
|
ui := getUserInfoByAuthTokens(ats)
|
||||||
processUserRequest(w, r, ui)
|
if ui == nil {
|
||||||
return true
|
uu := authConfig.Load().UnauthorizedUser
|
||||||
}
|
if uu != nil {
|
||||||
if ui := getUserInfoByJWTToken(ats); ui != nil {
|
processUserRequest(w, r, uu)
|
||||||
processUserRequest(w, r, ui)
|
return true
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
uu := authConfig.Load().UnauthorizedUser
|
|
||||||
if uu != nil {
|
|
||||||
processUserRequest(w, r, uu)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
invalidAuthTokenRequests.Inc()
|
|
||||||
if *logInvalidAuthTokens {
|
|
||||||
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
|
|
||||||
err = &httpserver.ErrorWithStatusCode{
|
|
||||||
Err: err,
|
|
||||||
StatusCode: http.StatusUnauthorized,
|
|
||||||
}
|
}
|
||||||
httpserver.Errorf(w, r, "%s", err)
|
|
||||||
} else {
|
invalidAuthTokenRequests.Inc()
|
||||||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
if *logInvalidAuthTokens {
|
||||||
|
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
|
||||||
|
err = &httpserver.ErrorWithStatusCode{
|
||||||
|
Err: err,
|
||||||
|
StatusCode: http.StatusUnauthorized,
|
||||||
|
}
|
||||||
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
|
} else {
|
||||||
|
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||||||
|
}
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
processUserRequest(w, r, ui)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -227,124 +208,26 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
|||||||
|
|
||||||
ui.requests.Inc()
|
ui.requests.Inc()
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(r.Context(), *maxQueueDuration)
|
// Limit the concurrency of requests to backends
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
// Acquire global concurrency limit.
|
|
||||||
if err := beginConcurrencyLimit(ctx); err != nil {
|
|
||||||
handleConcurrencyLimitError(w, r, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer endConcurrencyLimit()
|
|
||||||
|
|
||||||
// Set read deadline for reading the initial chunk for the request body.
|
|
||||||
rc := http.NewResponseController(w)
|
|
||||||
deadline, ok := ctx.Deadline()
|
|
||||||
if !ok {
|
|
||||||
logger.Panicf("BUG: expecting valid deadline for the context")
|
|
||||||
}
|
|
||||||
if err := rc.SetReadDeadline(deadline); err != nil {
|
|
||||||
logger.Panicf("BUG: cannot set read deadline: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read the initial chunk for the request body.
|
|
||||||
userName := ui.name()
|
|
||||||
if userName == "" {
|
|
||||||
userName = "unauthorized"
|
|
||||||
}
|
|
||||||
bb, err := bufferRequestBody(ctx, r.Body, userName)
|
|
||||||
if err != nil {
|
|
||||||
httpserver.Errorf(w, r, "%s", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
r.Body = bb
|
|
||||||
|
|
||||||
// Disable the read deadline for the rest of the request body.
|
|
||||||
if err := rc.SetReadDeadline(time.Time{}); err != nil {
|
|
||||||
logger.Panicf("BUG: cannot reset read deadline: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Acquire concurrency limit for the given user.
|
|
||||||
if err := ui.beginConcurrencyLimit(ctx); err != nil {
|
|
||||||
handleConcurrencyLimitError(w, r, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer ui.endConcurrencyLimit()
|
|
||||||
|
|
||||||
// Process the request.
|
|
||||||
processRequest(w, r, ui)
|
|
||||||
}
|
|
||||||
|
|
||||||
func beginConcurrencyLimit(ctx context.Context) error {
|
|
||||||
concurrencyLimitOnce.Do(concurrencyLimitInit)
|
concurrencyLimitOnce.Do(concurrencyLimitInit)
|
||||||
select {
|
select {
|
||||||
case concurrencyLimitCh <- struct{}{}:
|
case concurrencyLimitCh <- struct{}{}:
|
||||||
return nil
|
if err := ui.beginConcurrencyLimit(); err != nil {
|
||||||
|
handleConcurrencyLimitError(w, r, err)
|
||||||
|
<-concurrencyLimitCh
|
||||||
|
return
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
// The -maxConcurrentRequests are executed. Wait until some of the requests are finished,
|
concurrentRequestsLimitReached.Inc()
|
||||||
// so the current request could be executed.
|
err := fmt.Errorf("cannot serve more than -maxConcurrentRequests=%d concurrent requests", cap(concurrencyLimitCh))
|
||||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
|
handleConcurrencyLimitError(w, r, err)
|
||||||
select {
|
return
|
||||||
case concurrencyLimitCh <- struct{}{}:
|
|
||||||
return nil
|
|
||||||
case <-ctx.Done():
|
|
||||||
err := ctx.Err()
|
|
||||||
if errors.Is(err, context.DeadlineExceeded) {
|
|
||||||
// The current request couldn't be executed until the request timeout.
|
|
||||||
concurrentRequestsLimitReached.Inc()
|
|
||||||
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because -maxConcurrentRequests=%d concurrent requests are executed",
|
|
||||||
*maxQueueDuration, cap(concurrencyLimitCh))
|
|
||||||
}
|
|
||||||
return fmt.Errorf("cannot start executing the request because -maxConcurrentRequests=%d concurrent requests are executed: %w", cap(concurrencyLimitCh), err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
processRequest(w, r, ui)
|
||||||
|
ui.endConcurrencyLimit()
|
||||||
func endConcurrencyLimit() {
|
|
||||||
<-concurrencyLimitCh
|
<-concurrencyLimitCh
|
||||||
}
|
}
|
||||||
|
|
||||||
func bufferRequestBody(ctx context.Context, r io.ReadCloser, userName string) (io.ReadCloser, error) {
|
|
||||||
if r == nil {
|
|
||||||
// This is a GET request with nil reader.
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
maxBufSize := max(requestBufferSize.IntN(), maxRequestBodySizeToRetry.IntN())
|
|
||||||
if maxBufSize <= 0 {
|
|
||||||
return r, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
lr := ioutil.GetLimitedReader(r, int64(maxBufSize))
|
|
||||||
defer ioutil.PutLimitedReader(lr)
|
|
||||||
|
|
||||||
start := time.Now()
|
|
||||||
buf, err := io.ReadAll(lr)
|
|
||||||
bufferRequestBodyDuration.UpdateDuration(start)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
|
|
||||||
rejectSlowClientRequests.Inc()
|
|
||||||
|
|
||||||
d := time.Since(start)
|
|
||||||
|
|
||||||
return nil, &httpserver.ErrorWithStatusCode{
|
|
||||||
Err: fmt.Errorf("reject request from the user %s because the request body couldn't be read in -maxQueueDuration=%s; read %d bytes in %s",
|
|
||||||
userName, *maxQueueDuration, len(buf), d.Truncate(time.Second)),
|
|
||||||
StatusCode: http.StatusBadRequest,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, &httpserver.ErrorWithStatusCode{
|
|
||||||
Err: fmt.Errorf("cannot read request body: %w", err),
|
|
||||||
StatusCode: http.StatusBadRequest,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bb := newBufferedBody(r, buf, maxBufSize)
|
|
||||||
return bb, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||||
u := normalizeURL(r.URL)
|
u := normalizeURL(r.URL)
|
||||||
up, hc := ui.getURLPrefixAndHeaders(u, r.Host, r.Header)
|
up, hc := ui.getURLPrefixAndHeaders(u, r.Host, r.Header)
|
||||||
@@ -370,26 +253,28 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
|||||||
isDefault = true
|
isDefault = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rtb := newReadTrackingBody(r.Body, maxRequestBodySizeToRetry.IntN())
|
||||||
|
r.Body = rtb
|
||||||
|
|
||||||
maxAttempts := up.getBackendsCount()
|
maxAttempts := up.getBackendsCount()
|
||||||
for range maxAttempts {
|
for i := 0; i < maxAttempts; i++ {
|
||||||
bu := up.getBackendURL()
|
bu := up.getBackendURL()
|
||||||
if bu == nil {
|
if bu == nil {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
targetURL := bu.url
|
targetURL := bu.url
|
||||||
|
// Don't change path and add request_path query param for default route.
|
||||||
if isDefault {
|
if isDefault {
|
||||||
// Don't change path and add request_path query param for default route.
|
|
||||||
query := targetURL.Query()
|
query := targetURL.Query()
|
||||||
query.Set("request_path", u.String())
|
query.Set("request_path", u.String())
|
||||||
targetURL.RawQuery = query.Encode()
|
targetURL.RawQuery = query.Encode()
|
||||||
} else {
|
} else { // Update path for regular routes.
|
||||||
// Update path for regular routes.
|
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts)
|
||||||
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wasLocalRetry := false
|
wasLocalRetry := false
|
||||||
again:
|
again:
|
||||||
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui, bu)
|
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui)
|
||||||
if needLocalRetry && !wasLocalRetry {
|
if needLocalRetry && !wasLocalRetry {
|
||||||
wasLocalRetry = true
|
wasLocalRetry = true
|
||||||
goto again
|
goto again
|
||||||
@@ -399,20 +284,17 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
|||||||
if ok {
|
if ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
bu.setBroken()
|
bu.setBroken()
|
||||||
ui.backendErrors.Inc()
|
|
||||||
}
|
}
|
||||||
err := &httpserver.ErrorWithStatusCode{
|
err := &httpserver.ErrorWithStatusCode{
|
||||||
Err: fmt.Errorf("all the %d backends for the user %q are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend", up.getBackendsCount(), ui.name()),
|
Err: fmt.Errorf("all the %d backends for the user %q are unavailable", up.getBackendsCount(), ui.name()),
|
||||||
StatusCode: http.StatusBadGateway,
|
StatusCode: http.StatusBadGateway,
|
||||||
}
|
}
|
||||||
httpserver.Errorf(w, r, "%s", err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
ui.requestErrors.Inc()
|
ui.backendErrors.Inc()
|
||||||
}
|
}
|
||||||
|
|
||||||
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo, bu *backendURL) (bool, bool) {
|
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo) (bool, bool) {
|
||||||
ui.backendRequests.Inc()
|
|
||||||
req := sanitizeRequestHeaders(r)
|
req := sanitizeRequestHeaders(r)
|
||||||
|
|
||||||
req.URL = targetURL
|
req.URL = targetURL
|
||||||
@@ -426,19 +308,21 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bb, bbOK := req.Body.(*bufferedBody)
|
rtb, rtbOK := req.Body.(*readTrackingBody)
|
||||||
canRetry := !bbOK || bb.canRetry()
|
|
||||||
|
|
||||||
res, err := ui.rt.RoundTrip(req)
|
res, err := ui.rt.RoundTrip(req)
|
||||||
|
|
||||||
if errors.Is(r.Context().Err(), context.Canceled) {
|
|
||||||
// Do not retry canceled requests.
|
|
||||||
clientCanceledRequests.Inc()
|
|
||||||
return true, false
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !canRetry {
|
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
// Do not retry canceled or timed out requests
|
||||||
|
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||||
|
requestURI := httpserver.GetRequestURI(r)
|
||||||
|
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||||
|
if errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
// Timed out request must be counted as errors, since this usually means that the backend is slow.
|
||||||
|
ui.backendErrors.Inc()
|
||||||
|
}
|
||||||
|
return false, false
|
||||||
|
}
|
||||||
|
if !rtbOK || !rtb.canRetry() {
|
||||||
// Request body cannot be re-sent to another backend. Return the error to the client then.
|
// Request body cannot be re-sent to another backend. Return the error to the client then.
|
||||||
err = &httpserver.ErrorWithStatusCode{
|
err = &httpserver.ErrorWithStatusCode{
|
||||||
Err: fmt.Errorf("cannot proxy the request to %s: %w", targetURL, err),
|
Err: fmt.Errorf("cannot proxy the request to %s: %w", targetURL, err),
|
||||||
@@ -446,51 +330,41 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
|||||||
}
|
}
|
||||||
httpserver.Errorf(w, r, "%s", err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
ui.backendErrors.Inc()
|
ui.backendErrors.Inc()
|
||||||
ui.requestErrors.Inc()
|
|
||||||
bu.setBroken()
|
|
||||||
return true, false
|
return true, false
|
||||||
}
|
}
|
||||||
if netutil.IsTrivialNetworkError(err) {
|
if netutil.IsTrivialNetworkError(err) {
|
||||||
// Retry request at the same backend on trivial network errors, such as proxy idle timeout misconfiguration or socket close by OS
|
// Retry request at the same backend on trivial network errors, such as proxy idle timeout misconfiguration or socket close by OS
|
||||||
if bbOK {
|
|
||||||
bb.resetReader()
|
|
||||||
}
|
|
||||||
return false, true
|
return false, true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retry the request at another backend
|
// Retry the request if its body wasn't read yet. This usually means that the backend isn't reachable.
|
||||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||||
requestURI := httpserver.GetRequestURI(r)
|
// NOTE: do not use httpserver.GetRequestURI
|
||||||
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed: %s, retrying the request at another backend", remoteAddr, requestURI, targetURL, err)
|
// it explicitly reads request body, which may fail retries.
|
||||||
if bbOK {
|
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because of response error: %s", remoteAddr, req.URL, targetURL, err)
|
||||||
bb.resetReader()
|
|
||||||
}
|
|
||||||
return false, false
|
return false, false
|
||||||
}
|
}
|
||||||
if slices.Contains(retryStatusCodes, res.StatusCode) {
|
if slices.Contains(retryStatusCodes, res.StatusCode) {
|
||||||
if !canRetry {
|
_ = res.Body.Close()
|
||||||
|
if !rtbOK || !rtb.canRetry() {
|
||||||
// If we get an error from the retry_status_codes list, but cannot execute retry,
|
// If we get an error from the retry_status_codes list, but cannot execute retry,
|
||||||
// we consider such a request an error as well.
|
// we consider such a request an error as well.
|
||||||
err := &httpserver.ErrorWithStatusCode{
|
err := &httpserver.ErrorWithStatusCode{
|
||||||
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request at another backend, because the request body has been already consumed",
|
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request on another backend, because the request has been already consumed",
|
||||||
res.StatusCode, targetURL),
|
res.StatusCode, targetURL),
|
||||||
StatusCode: http.StatusServiceUnavailable,
|
StatusCode: http.StatusServiceUnavailable,
|
||||||
}
|
}
|
||||||
httpserver.Errorf(w, r, "%s", err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
ui.backendErrors.Inc()
|
ui.backendErrors.Inc()
|
||||||
ui.requestErrors.Inc()
|
|
||||||
return true, false
|
return true, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retry requests at other backends if it matches retryStatusCodes.
|
// Retry requests at other backends if it matches retryStatusCodes.
|
||||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893
|
||||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||||
requestURI := httpserver.GetRequestURI(r)
|
// NOTE: do not use httpserver.GetRequestURI
|
||||||
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed, retrying the request at another backend because response status code=%d belongs to retry_status_codes=%d",
|
// it explicitly reads request body, which may fail retries.
|
||||||
remoteAddr, requestURI, targetURL, res.StatusCode, retryStatusCodes)
|
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because response status code=%d belongs to retry_status_codes=%d",
|
||||||
if bbOK {
|
remoteAddr, req.URL, targetURL, res.StatusCode, retryStatusCodes)
|
||||||
bb.resetReader()
|
|
||||||
}
|
|
||||||
return false, false
|
return false, false
|
||||||
}
|
}
|
||||||
removeHopHeaders(res.Header)
|
removeHopHeaders(res.Header)
|
||||||
@@ -498,61 +372,20 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
|||||||
updateHeadersByConfig(w.Header(), hc.ResponseHeaders)
|
updateHeadersByConfig(w.Header(), hc.ResponseHeaders)
|
||||||
w.WriteHeader(res.StatusCode)
|
w.WriteHeader(res.StatusCode)
|
||||||
|
|
||||||
err = copyStreamToClient(w, res.Body)
|
copyBuf := copyBufPool.Get()
|
||||||
|
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
|
||||||
|
_, err = io.CopyBuffer(w, res.Body, copyBuf.B)
|
||||||
|
copyBufPool.Put(copyBuf)
|
||||||
_ = res.Body.Close()
|
_ = res.Body.Close()
|
||||||
|
|
||||||
if errors.Is(r.Context().Err(), context.Canceled) {
|
|
||||||
// Do not retry canceled requests.
|
|
||||||
clientCanceledRequests.Inc()
|
|
||||||
return true, false
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil && !netutil.IsTrivialNetworkError(err) {
|
if err != nil && !netutil.IsTrivialNetworkError(err) {
|
||||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||||
requestURI := httpserver.GetRequestURI(r)
|
requestURI := httpserver.GetRequestURI(r)
|
||||||
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
|
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||||
ui.requestErrors.Inc()
|
|
||||||
return true, false
|
return true, false
|
||||||
}
|
}
|
||||||
return true, false
|
return true, false
|
||||||
}
|
}
|
||||||
|
|
||||||
func copyStreamToClient(client io.Writer, backend io.Reader) error {
|
|
||||||
copyBuf := copyBufPool.Get()
|
|
||||||
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
|
|
||||||
defer copyBufPool.Put(copyBuf)
|
|
||||||
buf := copyBuf.B
|
|
||||||
|
|
||||||
flusher, ok := client.(http.Flusher)
|
|
||||||
if !ok {
|
|
||||||
logger.Panicf("BUG: client must implement net/http.Flusher interface; got %T", client)
|
|
||||||
}
|
|
||||||
|
|
||||||
for {
|
|
||||||
n, backendErr := backend.Read(buf)
|
|
||||||
if n > 0 {
|
|
||||||
data := buf[:n]
|
|
||||||
n, clientErr := client.Write(data)
|
|
||||||
if clientErr != nil {
|
|
||||||
return fmt.Errorf("cannot write data to client: %w", clientErr)
|
|
||||||
}
|
|
||||||
if n != len(data) {
|
|
||||||
logger.Panicf("BUG: unexpected number of bytes written returned by client.Write; got %d; want %d", n, len(data))
|
|
||||||
}
|
|
||||||
// Flush the read data from the backend to the client as fast as possible
|
|
||||||
// in order to reduce delays for data propagation.
|
|
||||||
// See https://github.com/VictoriaMetrics/VictoriaLogs/issues/667
|
|
||||||
flusher.Flush()
|
|
||||||
}
|
|
||||||
if backendErr != nil {
|
|
||||||
if backendErr == io.EOF {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return fmt.Errorf("cannot read data from backend: %w", backendErr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var copyBufPool bytesutil.ByteBufferPool
|
var copyBufPool bytesutil.ByteBufferPool
|
||||||
|
|
||||||
func copyHeader(dst, src http.Header) {
|
func copyHeader(dst, src http.Header) {
|
||||||
@@ -639,10 +472,6 @@ var (
|
|||||||
configReloadRequests = metrics.NewCounter(`vmauth_http_requests_total{path="/-/reload"}`)
|
configReloadRequests = metrics.NewCounter(`vmauth_http_requests_total{path="/-/reload"}`)
|
||||||
invalidAuthTokenRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="invalid_auth_token"}`)
|
invalidAuthTokenRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="invalid_auth_token"}`)
|
||||||
missingRouteRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`)
|
missingRouteRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`)
|
||||||
clientCanceledRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="client_canceled"}`)
|
|
||||||
rejectSlowClientRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="reject_slow_client"}`)
|
|
||||||
|
|
||||||
bufferRequestBodyDuration = metrics.NewSummary(`vmauth_buffer_request_body_duration_seconds`)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func newRoundTripper(caFileOpt, certFileOpt, keyFileOpt, serverNameOpt string, insecureSkipVerifyP *bool) (http.RoundTripper, error) {
|
func newRoundTripper(caFileOpt, certFileOpt, keyFileOpt, serverNameOpt string, insecureSkipVerifyP *bool) (http.RoundTripper, error) {
|
||||||
@@ -726,13 +555,6 @@ func handleMissingAuthorizationError(w http.ResponseWriter) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err error) {
|
func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err error) {
|
||||||
if errors.Is(r.Context().Err(), context.Canceled) {
|
|
||||||
// Do not return any response for the request canceled by the client,
|
|
||||||
// since the connection to the client is already closed.
|
|
||||||
clientCanceledRequests.Inc()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Add("Retry-After", "10")
|
w.Header().Add("Retry-After", "10")
|
||||||
err = &httpserver.ErrorWithStatusCode{
|
err = &httpserver.ErrorWithStatusCode{
|
||||||
Err: err,
|
Err: err,
|
||||||
@@ -741,76 +563,120 @@ func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err err
|
|||||||
httpserver.Errorf(w, r, "%s", err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// bufferedBody serves two purposes:
|
// readTrackingBody must be obtained via getReadTrackingBody()
|
||||||
// 1. Enables request retries when the body size does not exceed maxBodySize
|
type readTrackingBody struct {
|
||||||
// by fully buffering the body in memory.
|
// maxBodySize is the maximum body size to cache in buf.
|
||||||
// 2. Prevents slow clients from reducing effective server capacity by
|
|
||||||
// buffering the request body before acquiring a per-user concurrency slot.
|
|
||||||
//
|
|
||||||
// See bufferRequestBody for details on how bufferedBody is used.
|
|
||||||
type bufferedBody struct {
|
|
||||||
// r contains reader for reading the data after buf is read.
|
|
||||||
//
|
//
|
||||||
// r is nil if buf contains all the data.
|
// Bigger bodies cannot be retried.
|
||||||
|
maxBodySize int
|
||||||
|
|
||||||
|
// r contains reader for initial data reading
|
||||||
r io.ReadCloser
|
r io.ReadCloser
|
||||||
|
|
||||||
// buf contains the initial buffer read from r.
|
// buf is a buffer for data read from r. Buf size is limited by maxBodySize.
|
||||||
|
// If more than maxBodySize is read from r, then cannotRetry is set to true.
|
||||||
buf []byte
|
buf []byte
|
||||||
|
|
||||||
// bufOffset is the offset at buf for already read bytes.
|
// readBuf points to the cached data at buf, which must be read in the next call to Read().
|
||||||
bufOffset int
|
readBuf []byte
|
||||||
|
|
||||||
// cannotRetry is set to true after Close() call on non-nil r.
|
// cannotRetry is set to true when more than maxBodySize bytes are read from r.
|
||||||
|
// In this case the read data cannot fit buf, so it cannot be re-read from buf.
|
||||||
cannotRetry bool
|
cannotRetry bool
|
||||||
|
|
||||||
|
// bufComplete is set to true when buf contains complete request body read from r.
|
||||||
|
bufComplete bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func newBufferedBody(r io.ReadCloser, buf []byte, maxBufSize int) *bufferedBody {
|
func newReadTrackingBody(r io.ReadCloser, maxBodySize int) *readTrackingBody {
|
||||||
// Do not use sync.Pool here, since http.RoundTrip may still use request body after return.
|
// do not use sync.Pool there
|
||||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
|
// since http.RoundTrip may still use request body after return
|
||||||
|
// See this issue for details https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
|
||||||
if len(buf) < maxBufSize {
|
rtb := &readTrackingBody{}
|
||||||
// Read the full request body into buf.
|
if maxBodySize < 0 {
|
||||||
r = nil
|
maxBodySize = 0
|
||||||
}
|
}
|
||||||
|
rtb.maxBodySize = maxBodySize
|
||||||
|
|
||||||
return &bufferedBody{
|
if r == nil {
|
||||||
r: r,
|
// This is GET request without request body
|
||||||
buf: buf,
|
r = (*zeroReader)(nil)
|
||||||
}
|
}
|
||||||
|
rtb.r = r
|
||||||
|
return rtb
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read implements io.Reader interface.
|
type zeroReader struct{}
|
||||||
func (bb *bufferedBody) Read(p []byte) (int, error) {
|
|
||||||
if bb.cannotRetry {
|
|
||||||
return 0, fmt.Errorf("cannot read already closed body")
|
|
||||||
}
|
|
||||||
if bb.bufOffset < len(bb.buf) {
|
|
||||||
n := copy(p, bb.buf[bb.bufOffset:])
|
|
||||||
bb.bufOffset += n
|
|
||||||
return n, nil
|
|
||||||
}
|
|
||||||
if bb.r == nil {
|
|
||||||
return 0, io.EOF
|
|
||||||
}
|
|
||||||
return bb.r.Read(p)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bb *bufferedBody) canRetry() bool {
|
func (r *zeroReader) Read(_ []byte) (int, error) {
|
||||||
return bb.r == nil
|
return 0, io.EOF
|
||||||
}
|
}
|
||||||
|
func (r *zeroReader) Close() error {
|
||||||
// Close implements io.Closer interface.
|
|
||||||
func (bb *bufferedBody) Close() error {
|
|
||||||
bb.resetReader()
|
|
||||||
if bb.r != nil {
|
|
||||||
bb.cannotRetry = true
|
|
||||||
return bb.r.Close()
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bb *bufferedBody) resetReader() {
|
// Read implements io.Reader interface.
|
||||||
bb.bufOffset = 0
|
func (rtb *readTrackingBody) Read(p []byte) (int, error) {
|
||||||
|
if len(rtb.readBuf) > 0 {
|
||||||
|
n := copy(p, rtb.readBuf)
|
||||||
|
rtb.readBuf = rtb.readBuf[n:]
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if rtb.r == nil {
|
||||||
|
if rtb.bufComplete {
|
||||||
|
return 0, io.EOF
|
||||||
|
}
|
||||||
|
return 0, fmt.Errorf("cannot read client request body after closing client reader")
|
||||||
|
}
|
||||||
|
|
||||||
|
n, err := rtb.r.Read(p)
|
||||||
|
if rtb.cannotRetry {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(rtb.buf)+n > rtb.maxBodySize {
|
||||||
|
rtb.cannotRetry = true
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
rtb.buf = append(rtb.buf, p[:n]...)
|
||||||
|
if err == io.EOF {
|
||||||
|
rtb.bufComplete = true
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rtb *readTrackingBody) canRetry() bool {
|
||||||
|
if rtb.cannotRetry {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if rtb.bufComplete {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return rtb.r != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close implements io.Closer interface.
|
||||||
|
func (rtb *readTrackingBody) Close() error {
|
||||||
|
if !rtb.cannotRetry {
|
||||||
|
rtb.readBuf = rtb.buf
|
||||||
|
} else {
|
||||||
|
rtb.readBuf = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close rtb.r only if the request body is completely read or if it is too big.
|
||||||
|
// http.Roundtrip performs body.Close call even without any Read calls,
|
||||||
|
// so this hack allows us to reuse request body.
|
||||||
|
if rtb.bufComplete || rtb.cannotRetry {
|
||||||
|
if rtb.r == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
err := rtb.r.Close()
|
||||||
|
rtb.r = nil
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func debugInfo(u *url.URL, r *http.Request) string {
|
func debugInfo(u *url.URL, r *http.Request) string {
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user