mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-17 15:53:29 +03:00
Compare commits
1 Commits
optimize-a
...
weakpointe
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e9261be945 |
2
.github/ISSUE_TEMPLATE/question.yml
vendored
2
.github/ISSUE_TEMPLATE/question.yml
vendored
@@ -5,7 +5,7 @@ body:
|
|||||||
- type: textarea
|
- type: textarea
|
||||||
id: describe-the-component
|
id: describe-the-component
|
||||||
attributes:
|
attributes:
|
||||||
label: Is your question related to a specific component?
|
label: Is your question request related to a specific component?
|
||||||
placeholder: |
|
placeholder: |
|
||||||
VictoriaMetrics, vmagent, vmalert, vmui, etc...
|
VictoriaMetrics, vmagent, vmalert, vmui, etc...
|
||||||
validations:
|
validations:
|
||||||
|
|||||||
23
.github/copilot-instructions.md
vendored
Normal file
23
.github/copilot-instructions.md
vendored
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# Project Overview
|
||||||
|
|
||||||
|
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
|
||||||
|
|
||||||
|
## Folder Structure
|
||||||
|
|
||||||
|
- `/app`: Contains the compilable binaries.
|
||||||
|
- `/lib`: Contains the golang reusable libraries
|
||||||
|
- `/docs/victoriametrics`: Contains documentation for the project.
|
||||||
|
- `/apptest/tests`: Contains integration tests.
|
||||||
|
|
||||||
|
## Libraries and Frameworks
|
||||||
|
|
||||||
|
- Backend: Golang, no framework. Use third-party libraries sparingly.
|
||||||
|
- Frontend: React.
|
||||||
|
|
||||||
|
## Code review guidelines
|
||||||
|
|
||||||
|
Ensure the feature or bugfix includes a changelog entry in /docs/victoriametrics/changelog/CHANGELOG.md.
|
||||||
|
Verify the entry is under the ## tip section and matches the structure and style of existing entries.
|
||||||
|
Chore-only changes may be omitted from the changelog.
|
||||||
|
|
||||||
|
|
||||||
4
.github/dependabot.yml
vendored
4
.github/dependabot.yml
vendored
@@ -4,8 +4,6 @@ updates:
|
|||||||
directory: "/"
|
directory: "/"
|
||||||
schedule:
|
schedule:
|
||||||
interval: "daily"
|
interval: "daily"
|
||||||
cooldown:
|
|
||||||
default-days: 21
|
|
||||||
- package-ecosystem: "gomod"
|
- package-ecosystem: "gomod"
|
||||||
directory: "/"
|
directory: "/"
|
||||||
schedule:
|
schedule:
|
||||||
@@ -25,8 +23,6 @@ updates:
|
|||||||
directory: "/"
|
directory: "/"
|
||||||
schedule:
|
schedule:
|
||||||
interval: "daily"
|
interval: "daily"
|
||||||
cooldown:
|
|
||||||
default-days: 21
|
|
||||||
- package-ecosystem: "npm"
|
- package-ecosystem: "npm"
|
||||||
directory: "/app/vmui/packages/vmui"
|
directory: "/app/vmui/packages/vmui"
|
||||||
schedule:
|
schedule:
|
||||||
|
|||||||
48
.github/scripts/lint-changelog-tip.sh
vendored
48
.github/scripts/lint-changelog-tip.sh
vendored
@@ -1,48 +0,0 @@
|
|||||||
#!/usr/bin/env sh
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
CHANGELOG_FILE="docs/victoriametrics/changelog/CHANGELOG.md"
|
|
||||||
|
|
||||||
GITHUB_BASE_REF=${GITHUB_BASE_REF:-"master"}
|
|
||||||
GIT_REMOTE=${GIT_REMOTE:-"origin"}
|
|
||||||
|
|
||||||
git diff "${GIT_REMOTE}/${GITHUB_BASE_REF}"...HEAD -- $CHANGELOG_FILE > diff.txt
|
|
||||||
if ! grep -q "^+" diff.txt; then
|
|
||||||
echo "No additions in CHANGELOG.md"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
ADDED_LINES=$(grep "^+\S" diff.txt | sed 's/^+//')
|
|
||||||
|
|
||||||
START_TIP=$(grep -n "^## tip" "$CHANGELOG_FILE" | head -1 | cut -d: -f1)
|
|
||||||
if [ -z "$START_TIP" ]; then
|
|
||||||
echo "ERROR: ${CHANGELOG_FILE} does not contain a ## tip section"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
END_TIP=$(awk "NR>$START_TIP && /^## / {print NR; exit}" "${CHANGELOG_FILE}")
|
|
||||||
if [ -z "$END_TIP" ]; then
|
|
||||||
END_TIP=$(wc -l < "$CHANGELOG_FILE")
|
|
||||||
fi
|
|
||||||
|
|
||||||
BAD=0
|
|
||||||
while IFS= read -r line; do
|
|
||||||
# Grep exact line inside the file and get line numbers
|
|
||||||
MATCHES=$(grep -n -F "$line" "$CHANGELOG_FILE" | cut -d: -f1)
|
|
||||||
for m in $MATCHES; do
|
|
||||||
if [ "$m" -lt "$START_TIP" ] || [ "$m" -gt "$END_TIP" ]; then
|
|
||||||
echo "'$line' on line ${m} is outside ## tip section (lines ${START_TIP}-${END_TIP})"
|
|
||||||
BAD=1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
done << EOF
|
|
||||||
$ADDED_LINES
|
|
||||||
EOF
|
|
||||||
|
|
||||||
if [ "$BAD" -ne 0 ]; then
|
|
||||||
echo "CHANGELOG modifications must be placed inside the ## tip section."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "CHANGELOG modifications are valid."
|
|
||||||
9
.github/workflows/build.yml
vendored
9
.github/workflows/build.yml
vendored
@@ -47,8 +47,6 @@ jobs:
|
|||||||
arch: arm
|
arch: arm
|
||||||
- os: linux
|
- os: linux
|
||||||
arch: ppc64le
|
arch: ppc64le
|
||||||
- os: linux
|
|
||||||
arch: s390x
|
|
||||||
- os: darwin
|
- os: darwin
|
||||||
arch: amd64
|
arch: amd64
|
||||||
- os: darwin
|
- os: darwin
|
||||||
@@ -61,18 +59,17 @@ jobs:
|
|||||||
arch: amd64
|
arch: amd64
|
||||||
steps:
|
steps:
|
||||||
- name: Code checkout
|
- name: Code checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
id: go
|
id: go
|
||||||
uses: actions/setup-go@v6
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
cache-dependency-path: |
|
cache-dependency-path: |
|
||||||
go.sum
|
go.sum
|
||||||
Makefile
|
Makefile
|
||||||
app/**/Makefile
|
app/**/Makefile
|
||||||
go-version-file: 'go.mod'
|
go-version: stable
|
||||||
- run: go version
|
|
||||||
|
|
||||||
- name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }}
|
- name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }}
|
||||||
run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }}
|
run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }}
|
||||||
|
|||||||
19
.github/workflows/changelog-linter.yml
vendored
19
.github/workflows/changelog-linter.yml
vendored
@@ -1,19 +0,0 @@
|
|||||||
name: 'changelog-linter'
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- "docs/victoriametrics/changelog/CHANGELOG.md"
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
tip-lint:
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
steps:
|
|
||||||
- uses: 'actions/checkout@v6'
|
|
||||||
with:
|
|
||||||
# needed for proper diff
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: 'Validate that changelog changes are under ## tip'
|
|
||||||
run: |
|
|
||||||
GITHUB_BASE_REF=${{ github.base_ref }} ./.github/scripts/lint-changelog-tip.sh
|
|
||||||
47
.github/workflows/check-commit-signed.yml
vendored
47
.github/workflows/check-commit-signed.yml
vendored
@@ -1,47 +0,0 @@
|
|||||||
name: check-commit-signed
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
check-commit-signed:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v6
|
|
||||||
with:
|
|
||||||
fetch-depth: 0 # we need full history for commit verification
|
|
||||||
|
|
||||||
- name: Check commit signatures
|
|
||||||
run: |
|
|
||||||
if [ "${{ github.event_name }}" != "pull_request" ]; then
|
|
||||||
echo "Not a PR event, skipping signature check"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
RANGE="${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
|
|
||||||
echo "Checking commits in PR range: $RANGE"
|
|
||||||
|
|
||||||
if [ -z "$(git rev-list $RANGE)" ]; then
|
|
||||||
echo "No new commits in this PR, skipping signature check"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check raw commit objects for a "gpgsig" header as a fast early signal for
|
|
||||||
# contributors. Both GPG and SSH signatures use this header.
|
|
||||||
# This avoids relying on %G? which returns N for SSH commits.
|
|
||||||
# This check is not a security enforcement — unsigned commits cannot be merged
|
|
||||||
# anyway due to the GitHub repository merge policy.
|
|
||||||
unsigned=""
|
|
||||||
for sha in $(git rev-list $RANGE); do
|
|
||||||
if ! git cat-file commit "$sha" | grep -q "^gpgsig"; then
|
|
||||||
unsigned="$unsigned $sha"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
if [ -n "$unsigned" ]; then
|
|
||||||
echo "Found unsigned commits:"
|
|
||||||
echo "$unsigned"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "All commits in PR are signed (GPG or SSH)"
|
|
||||||
8
.github/workflows/check-licenses.yml
vendored
8
.github/workflows/check-licenses.yml
vendored
@@ -19,13 +19,11 @@ jobs:
|
|||||||
|
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
id: go
|
id: go
|
||||||
uses: actions/setup-go@v6
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version-file: 'go.mod'
|
go-version: stable
|
||||||
cache: false
|
cache: false
|
||||||
|
|
||||||
- run: go version
|
|
||||||
|
|
||||||
- name: Cache Go artifacts
|
- name: Cache Go artifacts
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
with:
|
with:
|
||||||
@@ -34,7 +32,7 @@ jobs:
|
|||||||
~/go/pkg/mod
|
~/go/pkg/mod
|
||||||
~/go/bin
|
~/go/bin
|
||||||
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||||
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-
|
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
|
||||||
|
|
||||||
- name: Check License
|
- name: Check License
|
||||||
run: make check-licenses
|
run: make check-licenses
|
||||||
|
|||||||
15
.github/workflows/codeql-analysis-go.yml
vendored
15
.github/workflows/codeql-analysis-go.yml
vendored
@@ -29,15 +29,14 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
id: go
|
id: go
|
||||||
uses: actions/setup-go@v6
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
cache: false
|
cache: false
|
||||||
go-version-file: 'go.mod'
|
go-version: stable
|
||||||
- run: go version
|
|
||||||
|
|
||||||
- name: Cache Go artifacts
|
- name: Cache Go artifacts
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
@@ -47,17 +46,17 @@ jobs:
|
|||||||
~/go/bin
|
~/go/bin
|
||||||
~/go/pkg/mod
|
~/go/pkg/mod
|
||||||
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-
|
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
|
||||||
|
|
||||||
- name: Initialize CodeQL
|
- name: Initialize CodeQL
|
||||||
uses: github/codeql-action/init@v4
|
uses: github/codeql-action/init@v3
|
||||||
with:
|
with:
|
||||||
languages: go
|
languages: go
|
||||||
|
|
||||||
- name: Autobuild
|
- name: Autobuild
|
||||||
uses: github/codeql-action/autobuild@v4
|
uses: github/codeql-action/autobuild@v3
|
||||||
|
|
||||||
- name: Perform CodeQL Analysis
|
- name: Perform CodeQL Analysis
|
||||||
uses: github/codeql-action/analyze@v4
|
uses: github/codeql-action/analyze@v3
|
||||||
with:
|
with:
|
||||||
category: 'language:go'
|
category: 'language:go'
|
||||||
|
|||||||
6
.github/workflows/docs.yaml
vendored
6
.github/workflows/docs.yaml
vendored
@@ -16,19 +16,19 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Code checkout
|
- name: Code checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
path: __vm
|
path: __vm
|
||||||
|
|
||||||
- name: Checkout private code
|
- name: Checkout private code
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
repository: VictoriaMetrics/vmdocs
|
repository: VictoriaMetrics/vmdocs
|
||||||
token: ${{ secrets.VM_BOT_GH_TOKEN }}
|
token: ${{ secrets.VM_BOT_GH_TOKEN }}
|
||||||
path: __vm-docs
|
path: __vm-docs
|
||||||
|
|
||||||
- name: Import GPG key
|
- name: Import GPG key
|
||||||
uses: crazy-max/ghaction-import-gpg@v7
|
uses: crazy-max/ghaction-import-gpg@v6
|
||||||
id: import-gpg
|
id: import-gpg
|
||||||
with:
|
with:
|
||||||
gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }}
|
gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }}
|
||||||
|
|||||||
44
.github/workflows/test.yml
vendored
44
.github/workflows/test.yml
vendored
@@ -32,19 +32,18 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Code checkout
|
- name: Code checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
id: go
|
id: go
|
||||||
uses: actions/setup-go@v6
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
cache-dependency-path: |
|
cache-dependency-path: |
|
||||||
go.sum
|
go.sum
|
||||||
Makefile
|
Makefile
|
||||||
app/**/Makefile
|
app/**/Makefile
|
||||||
go-version-file: 'go.mod'
|
go-version: stable
|
||||||
|
|
||||||
- run: go version
|
|
||||||
|
|
||||||
- name: Cache golangci-lint
|
- name: Cache golangci-lint
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
@@ -52,7 +51,7 @@ jobs:
|
|||||||
path: |
|
path: |
|
||||||
~/.cache/golangci-lint
|
~/.cache/golangci-lint
|
||||||
~/go/bin
|
~/go/bin
|
||||||
key: golangci-lint-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('.golangci.yml') }}
|
key: golangci-lint-${{ runner.os }}-${{ hashFiles('.golangci.yml') }}
|
||||||
|
|
||||||
- name: Run check-all
|
- name: Run check-all
|
||||||
run: |
|
run: |
|
||||||
@@ -66,46 +65,49 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
scenario:
|
scenario:
|
||||||
- 'test'
|
- 'test-full'
|
||||||
- 'test-386'
|
- 'test-full-386'
|
||||||
- 'test-pure'
|
- 'test-pure'
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Code checkout
|
- name: Code checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
id: go
|
id: go
|
||||||
uses: actions/setup-go@v6
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
cache-dependency-path: |
|
cache-dependency-path: |
|
||||||
go.sum
|
go.sum
|
||||||
Makefile
|
Makefile
|
||||||
app/**/Makefile
|
app/**/Makefile
|
||||||
go-version-file: 'go.mod'
|
go-version: stable
|
||||||
- run: go version
|
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: make ${{ matrix.scenario}}
|
run: GOGC=10 make ${{ matrix.scenario}}
|
||||||
|
|
||||||
apptest:
|
- name: Publish coverage
|
||||||
name: apptest
|
uses: codecov/codecov-action@v5
|
||||||
runs-on: apptest
|
with:
|
||||||
|
files: ./coverage.txt
|
||||||
|
|
||||||
|
integration:
|
||||||
|
name: integration
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Code checkout
|
- name: Code checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
id: go
|
id: go
|
||||||
uses: actions/setup-go@v6
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
cache-dependency-path: |
|
cache-dependency-path: |
|
||||||
go.sum
|
go.sum
|
||||||
Makefile
|
Makefile
|
||||||
app/**/Makefile
|
app/**/Makefile
|
||||||
go-version-file: 'go.mod'
|
go-version: stable
|
||||||
- run: go version
|
|
||||||
|
|
||||||
- name: Run app tests
|
- name: Run integration tests
|
||||||
run: make apptest
|
run: make integration-test
|
||||||
|
|||||||
28
.github/workflows/vmui.yml
vendored
28
.github/workflows/vmui.yml
vendored
@@ -32,41 +32,35 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Code checkout
|
- name: Code checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Cache node_modules
|
- name: Setup Node
|
||||||
id: cache
|
uses: actions/setup-node@v4
|
||||||
uses: actions/cache@v5
|
|
||||||
with:
|
with:
|
||||||
path: app/vmui/packages/vmui/node_modules
|
node-version: '24.x'
|
||||||
key: vmui-deps-${{ runner.os }}-${{ hashFiles('app/vmui/packages/vmui/package-lock.json', 'app/vmui/Dockerfile-build') }}
|
|
||||||
restore-keys: |
|
|
||||||
vmui-deps-${{ runner.os }}-
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Cache node-modules
|
||||||
if: steps.cache.outputs.cache-hit != 'true'
|
uses: actions/cache@v4
|
||||||
run: make vmui-install
|
with:
|
||||||
|
path: |
|
||||||
|
app/vmui/packages/vmui/node_modules
|
||||||
|
key: vmui-artifacts-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
||||||
|
restore-keys: vmui-artifacts-${{ runner.os }}-
|
||||||
|
|
||||||
- name: Run lint
|
- name: Run lint
|
||||||
id: lint
|
id: lint
|
||||||
run: make vmui-lint
|
run: make vmui-lint
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
env:
|
|
||||||
VMUI_SKIP_INSTALL: true
|
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
id: test
|
id: test
|
||||||
run: make vmui-test
|
run: make vmui-test
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
env:
|
|
||||||
VMUI_SKIP_INSTALL: true
|
|
||||||
|
|
||||||
- name: Run typecheck
|
- name: Run typecheck
|
||||||
id: typecheck
|
id: typecheck
|
||||||
run: make vmui-typecheck
|
run: make vmui-typecheck
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
env:
|
|
||||||
VMUI_SKIP_INSTALL: true
|
|
||||||
|
|
||||||
- name: Annotate Code Linting Results
|
- name: Annotate Code Linting Results
|
||||||
uses: ataylorme/eslint-annotate-action@v3
|
uses: ataylorme/eslint-annotate-action@v3
|
||||||
|
|||||||
2
LICENSE
2
LICENSE
@@ -175,7 +175,7 @@
|
|||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
Copyright 2019-2026 VictoriaMetrics, Inc.
|
Copyright 2019-2025 VictoriaMetrics, Inc.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
|
|||||||
74
Makefile
74
Makefile
@@ -17,7 +17,7 @@ EXTRA_GO_BUILD_TAGS ?=
|
|||||||
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
|
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
|
||||||
TAR_OWNERSHIP ?= --owner=1000 --group=1000
|
TAR_OWNERSHIP ?= --owner=1000 --group=1000
|
||||||
|
|
||||||
GOLANGCI_LINT_VERSION := 2.9.0
|
GOLANGCI_LINT_VERSION := 2.4.0
|
||||||
|
|
||||||
.PHONY: $(MAKECMDGOALS)
|
.PHONY: $(MAKECMDGOALS)
|
||||||
|
|
||||||
@@ -125,15 +125,6 @@ vmutils-linux-ppc64le: \
|
|||||||
vmrestore-linux-ppc64le \
|
vmrestore-linux-ppc64le \
|
||||||
vmctl-linux-ppc64le
|
vmctl-linux-ppc64le
|
||||||
|
|
||||||
vmutils-linux-s390x: \
|
|
||||||
vmagent-linux-s390x \
|
|
||||||
vmalert-linux-s390x \
|
|
||||||
vmalert-tool-linux-s390x \
|
|
||||||
vmauth-linux-s390x \
|
|
||||||
vmbackup-linux-s390x \
|
|
||||||
vmrestore-linux-s390x \
|
|
||||||
vmctl-linux-s390x
|
|
||||||
|
|
||||||
vmutils-darwin-amd64: \
|
vmutils-darwin-amd64: \
|
||||||
vmagent-darwin-amd64 \
|
vmagent-darwin-amd64 \
|
||||||
vmalert-darwin-amd64 \
|
vmalert-darwin-amd64 \
|
||||||
@@ -266,7 +257,6 @@ release-victoria-metrics: \
|
|||||||
release-victoria-metrics-linux-amd64 \
|
release-victoria-metrics-linux-amd64 \
|
||||||
release-victoria-metrics-linux-arm \
|
release-victoria-metrics-linux-arm \
|
||||||
release-victoria-metrics-linux-arm64 \
|
release-victoria-metrics-linux-arm64 \
|
||||||
release-victoria-metrics-linux-s390x \
|
|
||||||
release-victoria-metrics-darwin-amd64 \
|
release-victoria-metrics-darwin-amd64 \
|
||||||
release-victoria-metrics-darwin-arm64 \
|
release-victoria-metrics-darwin-arm64 \
|
||||||
release-victoria-metrics-freebsd-amd64 \
|
release-victoria-metrics-freebsd-amd64 \
|
||||||
@@ -285,9 +275,6 @@ release-victoria-metrics-linux-arm:
|
|||||||
release-victoria-metrics-linux-arm64:
|
release-victoria-metrics-linux-arm64:
|
||||||
GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch
|
GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch
|
||||||
|
|
||||||
release-victoria-metrics-linux-s390x:
|
|
||||||
GOOS=linux GOARCH=s390x $(MAKE) release-victoria-metrics-goos-goarch
|
|
||||||
|
|
||||||
release-victoria-metrics-darwin-amd64:
|
release-victoria-metrics-darwin-amd64:
|
||||||
GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
|
GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
|
||||||
|
|
||||||
@@ -327,7 +314,6 @@ release-vmutils: \
|
|||||||
release-vmutils-linux-amd64 \
|
release-vmutils-linux-amd64 \
|
||||||
release-vmutils-linux-arm64 \
|
release-vmutils-linux-arm64 \
|
||||||
release-vmutils-linux-arm \
|
release-vmutils-linux-arm \
|
||||||
release-vmutils-linux-s390x \
|
|
||||||
release-vmutils-darwin-amd64 \
|
release-vmutils-darwin-amd64 \
|
||||||
release-vmutils-darwin-arm64 \
|
release-vmutils-darwin-arm64 \
|
||||||
release-vmutils-freebsd-amd64 \
|
release-vmutils-freebsd-amd64 \
|
||||||
@@ -346,9 +332,6 @@ release-vmutils-linux-arm64:
|
|||||||
release-vmutils-linux-arm:
|
release-vmutils-linux-arm:
|
||||||
GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch
|
GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch
|
||||||
|
|
||||||
release-vmutils-linux-s390x:
|
|
||||||
GOOS=linux GOARCH=s390x $(MAKE) release-vmutils-goos-goarch
|
|
||||||
|
|
||||||
release-vmutils-darwin-amd64:
|
release-vmutils-darwin-amd64:
|
||||||
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
|
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
|
||||||
|
|
||||||
@@ -435,7 +418,7 @@ release-vmutils-windows-goarch: \
|
|||||||
vmctl-windows-$(GOARCH)-prod.exe
|
vmctl-windows-$(GOARCH)-prod.exe
|
||||||
|
|
||||||
pprof-cpu:
|
pprof-cpu:
|
||||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics $(PPROF_FILE)
|
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
|
||||||
|
|
||||||
fmt:
|
fmt:
|
||||||
gofmt -l -w -s ./lib
|
gofmt -l -w -s ./lib
|
||||||
@@ -443,7 +426,7 @@ fmt:
|
|||||||
gofmt -l -w -s ./apptest
|
gofmt -l -w -s ./apptest
|
||||||
|
|
||||||
vet:
|
vet:
|
||||||
go vet -tags 'synctest' ./lib/...
|
GOEXPERIMENT=synctest go vet ./lib/...
|
||||||
go vet ./app/...
|
go vet ./app/...
|
||||||
go vet ./apptest/...
|
go vet ./apptest/...
|
||||||
|
|
||||||
@@ -452,55 +435,39 @@ check-all: fmt vet golangci-lint govulncheck
|
|||||||
clean-checkers: remove-golangci-lint remove-govulncheck
|
clean-checkers: remove-golangci-lint remove-govulncheck
|
||||||
|
|
||||||
test:
|
test:
|
||||||
go test -tags 'synctest' ./lib/... ./app/...
|
GOEXPERIMENT=synctest go test ./lib/... ./app/...
|
||||||
|
|
||||||
test-race:
|
test-race:
|
||||||
go test -tags 'synctest' -race ./lib/... ./app/...
|
GOEXPERIMENT=synctest go test -race ./lib/... ./app/...
|
||||||
|
|
||||||
test-386:
|
|
||||||
GOARCH=386 go test -tags 'synctest' ./lib/... ./app/...
|
|
||||||
|
|
||||||
test-pure:
|
test-pure:
|
||||||
CGO_ENABLED=0 go test -tags 'synctest' ./lib/... ./app/...
|
GOEXPERIMENT=synctest CGO_ENABLED=0 go test ./lib/... ./app/...
|
||||||
|
|
||||||
test-full:
|
test-full:
|
||||||
go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
GOEXPERIMENT=synctest go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||||
|
|
||||||
test-full-386:
|
test-full-386:
|
||||||
GOARCH=386 go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||||
|
|
||||||
|
integration-test:
|
||||||
|
$(MAKE) apptest
|
||||||
|
|
||||||
apptest:
|
apptest:
|
||||||
$(MAKE) victoria-metrics-race vmagent-race vmalert-race vmauth-race vmctl-race vmbackup-race vmrestore-race
|
$(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
|
||||||
go test ./apptest/... -skip="^Test(Cluster|Legacy).*"
|
go test ./apptest/... -skip="^TestCluster.*"
|
||||||
|
|
||||||
apptest-legacy: victoria-metrics-race vmbackup-race vmrestore-race
|
|
||||||
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
|
|
||||||
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
|
|
||||||
VERSION=v1.132.0; \
|
|
||||||
VMSINGLE=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}.tar.gz; \
|
|
||||||
VMCLUSTER=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}-cluster.tar.gz; \
|
|
||||||
URL=https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/$${VERSION}; \
|
|
||||||
DIR=/tmp/$${VERSION}; \
|
|
||||||
test -d $${DIR} || (mkdir $${DIR} && \
|
|
||||||
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
|
|
||||||
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
|
|
||||||
); \
|
|
||||||
VM_LEGACY_VMSINGLE_PATH=$${DIR}/victoria-metrics-prod \
|
|
||||||
VM_LEGACY_VMSTORAGE_PATH=$${DIR}/vmstorage-prod \
|
|
||||||
go test ./apptest/tests -run="^TestLegacySingle.*"
|
|
||||||
|
|
||||||
benchmark:
|
benchmark:
|
||||||
go test -run=NO_TESTS -bench=. ./lib/...
|
GOEXPERIMENT=synctest go test -bench=. ./lib/...
|
||||||
go test -run=NO_TESTS -bench=. ./app/...
|
go test -bench=. ./app/...
|
||||||
|
|
||||||
benchmark-pure:
|
benchmark-pure:
|
||||||
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./lib/...
|
GOEXPERIMENT=synctest CGO_ENABLED=0 go test -bench=. ./lib/...
|
||||||
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./app/...
|
CGO_ENABLED=0 go test -bench=. ./app/...
|
||||||
|
|
||||||
vendor-update:
|
vendor-update:
|
||||||
go get -u ./lib/...
|
go get -u ./lib/...
|
||||||
go get -u ./app/...
|
go get -u ./app/...
|
||||||
go mod tidy -compat=1.26
|
go mod tidy -compat=1.24
|
||||||
go mod vendor
|
go mod vendor
|
||||||
|
|
||||||
app-local:
|
app-local:
|
||||||
@@ -516,15 +483,14 @@ app-local-windows-goarch:
|
|||||||
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
|
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
|
||||||
|
|
||||||
quicktemplate-gen: install-qtc
|
quicktemplate-gen: install-qtc
|
||||||
qtc -dir=lib
|
qtc
|
||||||
qtc -dir=app
|
|
||||||
|
|
||||||
install-qtc:
|
install-qtc:
|
||||||
which qtc || go install github.com/valyala/quicktemplate/qtc@latest
|
which qtc || go install github.com/valyala/quicktemplate/qtc@latest
|
||||||
|
|
||||||
|
|
||||||
golangci-lint: install-golangci-lint
|
golangci-lint: install-golangci-lint
|
||||||
golangci-lint run --build-tags 'synctest'
|
GOEXPERIMENT=synctest golangci-lint run
|
||||||
|
|
||||||
install-golangci-lint:
|
install-golangci-lint:
|
||||||
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)
|
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)
|
||||||
|
|||||||
26
README.md
26
README.md
@@ -1,11 +1,12 @@
|
|||||||
# VictoriaMetrics
|
# VictoriaMetrics
|
||||||
|
|
||||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
[](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||||
[](https://hub.docker.com/u/victoriametrics)
|

|
||||||
[](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
|
[](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
|
||||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml)
|
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml)
|
||||||
|
[](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
|
||||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
|
[](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
|
||||||
[](https://slack.victoriametrics.com)
|

|
||||||
[](https://x.com/VictoriaMetrics/)
|
[](https://x.com/VictoriaMetrics/)
|
||||||
[](https://www.reddit.com/r/VictoriaMetrics/)
|
[](https://www.reddit.com/r/VictoriaMetrics/)
|
||||||
|
|
||||||
@@ -15,21 +16,16 @@
|
|||||||
<img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo">
|
<img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo">
|
||||||
</picture>
|
</picture>
|
||||||
|
|
||||||
VictoriaMetrics is a fast, cost-effective, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
|
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
|
||||||
|
|
||||||
Here are some resources and information about VictoriaMetrics:
|
Here are some resources and information about VictoriaMetrics:
|
||||||
|
|
||||||
- **Case studies**: [Grammarly, Roblox, Wix, Spotify,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
|
- Documentation: [docs.victoriametrics.com](https://docs.victoriametrics.com)
|
||||||
- **Available**: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), Docker images on [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
- Case studies: [Grammarly, Roblox, Wix,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
|
||||||
- **Deployment types**: [Single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) under [Apache License 2.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE).
|
- Available: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), docker images [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||||
- **Getting started:** Read [key concepts](https://docs.victoriametrics.com/victoriametrics/keyconcepts/) and follow the
|
- Deployment types: [Single-node version](https://docs.victoriametrics.com/), [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/), and [Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/)
|
||||||
[quick start guide](https://docs.victoriametrics.com/victoriametrics/quick-start/).
|
- Changelog: [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics)
|
||||||
- **Community**: [Slack](https://slack.victoriametrics.com/) (join via [Slack Inviter](https://slack.victoriametrics.com/)), [X (Twitter)](https://x.com/VictoriaMetrics), [YouTube](https://www.youtube.com/@VictoriaMetrics). See full list [here](https://docs.victoriametrics.com/victoriametrics/#community-and-contributions).
|
- Community: [Slack](https://slack.victoriametrics.com/), [X (Twitter)](https://x.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
|
||||||
- **Changelog**: Project evolves fast - check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).
|
|
||||||
- **Enterprise support:** [Contact us](mailto:info@victoriametrics.com) for commercial support with additional [enterprise features](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
|
||||||
- **Enterprise releases:** Enterprise and [long-term support releases (LTS)](https://docs.victoriametrics.com/victoriametrics/lts-releases/) are publicly available and can be evaluated for free
|
|
||||||
using a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
|
|
||||||
- **Security:** we achieved [security certifications](https://victoriametrics.com/security/) for Database Software Development and Software-Based Monitoring Services.
|
|
||||||
|
|
||||||
Yes, we open-source both the single-node VictoriaMetrics and the cluster version.
|
Yes, we open-source both the single-node VictoriaMetrics and the cluster version.
|
||||||
|
|
||||||
|
|||||||
36
SECURITY.md
36
SECURITY.md
@@ -4,39 +4,15 @@
|
|||||||
|
|
||||||
The following versions of VictoriaMetrics receive regular security fixes:
|
The following versions of VictoriaMetrics receive regular security fixes:
|
||||||
|
|
||||||
| Version | Supported |
|
| Version | Supported |
|
||||||
|--------------------------------------------------------------------------------|--------------------|
|
|---------|--------------------|
|
||||||
| [Latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
|
| [latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
|
||||||
| [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
|
| v1.102.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
|
||||||
| other releases | :x: |
|
| v1.110.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
|
||||||
|
| other releases | :x: |
|
||||||
|
|
||||||
See [this page](https://victoriametrics.com/security/) for more details.
|
See [this page](https://victoriametrics.com/security/) for more details.
|
||||||
|
|
||||||
## Software Bill of Materials (SBOM)
|
|
||||||
|
|
||||||
Every VictoriaMetrics container{{% available_from "#" %}} image published to
|
|
||||||
[Docker Hub](https://hub.docker.com/u/victoriametrics)
|
|
||||||
and [Quay.io](https://quay.io/organization/victoriametrics)
|
|
||||||
includes an [SPDX](https://spdx.dev/) SBOM attestation
|
|
||||||
generated automatically by BuildKit during
|
|
||||||
`docker buildx build`.
|
|
||||||
|
|
||||||
To inspect the SBOM for an image:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
docker buildx imagetools inspect \
|
|
||||||
docker.io/victoriametrics/victoria-metrics:latest \
|
|
||||||
--format "{{ json .SBOM }}"
|
|
||||||
```
|
|
||||||
|
|
||||||
To scan an image using its SBOM attestation with
|
|
||||||
[Trivy](https://github.com/aquasecurity/trivy):
|
|
||||||
|
|
||||||
```sh
|
|
||||||
trivy image --sbom-sources oci \
|
|
||||||
docker.io/victoriametrics/victoria-metrics:latest
|
|
||||||
```
|
|
||||||
|
|
||||||
## Reporting a Vulnerability
|
## Reporting a Vulnerability
|
||||||
|
|
||||||
Please report any security issues to <security@victoriametrics.com>
|
Please report any security issues to <security@victoriametrics.com>
|
||||||
|
|||||||
@@ -27,9 +27,6 @@ victoria-metrics-linux-ppc64le-prod:
|
|||||||
victoria-metrics-linux-386-prod:
|
victoria-metrics-linux-386-prod:
|
||||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386
|
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386
|
||||||
|
|
||||||
victoria-metrics-linux-s390x-prod:
|
|
||||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-s390x
|
|
||||||
|
|
||||||
victoria-metrics-darwin-amd64-prod:
|
victoria-metrics-darwin-amd64-prod:
|
||||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64
|
APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64
|
||||||
|
|
||||||
|
|||||||
@@ -134,7 +134,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
}
|
}
|
||||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>")
|
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>")
|
||||||
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
|
|
||||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>")
|
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>")
|
||||||
fmt.Fprintf(w, "Useful endpoints:</br>")
|
fmt.Fprintf(w, "Useful endpoints:</br>")
|
||||||
httpserver.WriteAPIHelp(w, [][2]string{
|
httpserver.WriteAPIHelp(w, [][2]string{
|
||||||
@@ -170,7 +169,7 @@ func usage() {
|
|||||||
const s = `
|
const s = `
|
||||||
victoria-metrics is a time series database and monitoring solution.
|
victoria-metrics is a time series database and monitoring solution.
|
||||||
|
|
||||||
See the docs at https://docs.victoriametrics.com/victoriametrics/
|
See the docs at https://docs.victoriametrics.com/
|
||||||
`
|
`
|
||||||
flagutil.Usage(s)
|
flagutil.Usage(s)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,11 +10,9 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -29,9 +27,11 @@ var selfScraperWG sync.WaitGroup
|
|||||||
|
|
||||||
func startSelfScraper() {
|
func startSelfScraper() {
|
||||||
selfScraperStopCh = make(chan struct{})
|
selfScraperStopCh = make(chan struct{})
|
||||||
selfScraperWG.Go(func() {
|
selfScraperWG.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer selfScraperWG.Done()
|
||||||
selfScraper(*selfScrapeInterval)
|
selfScraper(*selfScrapeInterval)
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
func stopSelfScraper() {
|
func stopSelfScraper() {
|
||||||
@@ -48,7 +48,6 @@ func selfScraper(scrapeInterval time.Duration) {
|
|||||||
|
|
||||||
var bb bytesutil.ByteBuffer
|
var bb bytesutil.ByteBuffer
|
||||||
var rows prometheus.Rows
|
var rows prometheus.Rows
|
||||||
var metadataRows prometheus.MetadataRows
|
|
||||||
var mrs []storage.MetricRow
|
var mrs []storage.MetricRow
|
||||||
var labels []prompb.Label
|
var labels []prompb.Label
|
||||||
t := time.NewTicker(scrapeInterval)
|
t := time.NewTicker(scrapeInterval)
|
||||||
@@ -58,12 +57,8 @@ func selfScraper(scrapeInterval time.Duration) {
|
|||||||
appmetrics.WritePrometheusMetrics(&bb)
|
appmetrics.WritePrometheusMetrics(&bb)
|
||||||
s := bytesutil.ToUnsafeString(bb.B)
|
s := bytesutil.ToUnsafeString(bb.B)
|
||||||
rows.Reset()
|
rows.Reset()
|
||||||
// Parse metrics and optionally metadata when enabled
|
// VictoriaMetrics components don't expose metadata yet, only need to parse samples
|
||||||
if prommetadata.IsEnabled() {
|
rows.UnmarshalWithErrLogger(s, nil)
|
||||||
rows, metadataRows = prometheus.UnmarshalWithMetadata(rows, metadataRows, s, nil)
|
|
||||||
} else {
|
|
||||||
rows.UnmarshalWithErrLogger(s, nil)
|
|
||||||
}
|
|
||||||
mrs = mrs[:0]
|
mrs = mrs[:0]
|
||||||
for i := range rows.Rows {
|
for i := range rows.Rows {
|
||||||
r := &rows.Rows[i]
|
r := &rows.Rows[i]
|
||||||
@@ -96,19 +91,6 @@ func selfScraper(scrapeInterval time.Duration) {
|
|||||||
if err := vmstorage.AddRows(mrs); err != nil {
|
if err := vmstorage.AddRows(mrs); err != nil {
|
||||||
logger.Errorf("cannot store self-scraped metrics: %s", err)
|
logger.Errorf("cannot store self-scraped metrics: %s", err)
|
||||||
}
|
}
|
||||||
if len(metadataRows.Rows) > 0 {
|
|
||||||
mms := make([]metricsmetadata.Row, 0, len(metadataRows.Rows))
|
|
||||||
for _, mm := range metadataRows.Rows {
|
|
||||||
mms = append(mms, metricsmetadata.Row{
|
|
||||||
MetricFamilyName: bytesutil.ToUnsafeBytes(mm.Metric),
|
|
||||||
Help: bytesutil.ToUnsafeBytes(mm.Help),
|
|
||||||
Type: mm.Type,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
if err := vmstorage.AddMetadataRows(mms); err != nil {
|
|
||||||
logger.Errorf("cannot store self-scraped metrics metadata: %s", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
|
|||||||
@@ -33,13 +33,13 @@ func PopulateTimeTpl(b []byte, tGlobal time.Time) []byte {
|
|||||||
}
|
}
|
||||||
switch strings.TrimSpace(parts[0]) {
|
switch strings.TrimSpace(parts[0]) {
|
||||||
case `TIME_S`:
|
case `TIME_S`:
|
||||||
return fmt.Appendf(nil, "%d", t.Unix())
|
return []byte(fmt.Sprintf("%d", t.Unix()))
|
||||||
case `TIME_MSZ`:
|
case `TIME_MSZ`:
|
||||||
return fmt.Appendf(nil, "%d", t.Unix()*1e3)
|
return []byte(fmt.Sprintf("%d", t.Unix()*1e3))
|
||||||
case `TIME_MS`:
|
case `TIME_MS`:
|
||||||
return fmt.Appendf(nil, "%d", timeToMillis(t))
|
return []byte(fmt.Sprintf("%d", timeToMillis(t)))
|
||||||
case `TIME_NS`:
|
case `TIME_NS`:
|
||||||
return fmt.Appendf(nil, "%d", t.UnixNano())
|
return []byte(fmt.Sprintf("%d", t.UnixNano()))
|
||||||
default:
|
default:
|
||||||
log.Fatalf("unknown time pattern %s in %s", parts[0], repl)
|
log.Fatalf("unknown time pattern %s in %s", parts[0], repl)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,9 +27,6 @@ vmagent-linux-ppc64le-prod:
|
|||||||
vmagent-linux-386-prod:
|
vmagent-linux-386-prod:
|
||||||
APP_NAME=vmagent $(MAKE) app-via-docker-linux-386
|
APP_NAME=vmagent $(MAKE) app-via-docker-linux-386
|
||||||
|
|
||||||
vmagent-linux-s390x-prod:
|
|
||||||
APP_NAME=vmagent $(MAKE) app-via-docker-linux-s390x
|
|
||||||
|
|
||||||
vmagent-darwin-amd64-prod:
|
vmagent-darwin-amd64-prod:
|
||||||
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64
|
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64
|
||||||
|
|
||||||
|
|||||||
@@ -49,11 +49,6 @@ func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels
|
|||||||
Name: "__name__",
|
Name: "__name__",
|
||||||
Value: m.Name,
|
Value: m.Name,
|
||||||
})
|
})
|
||||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10557
|
|
||||||
labels = append(labels, prompb.Label{
|
|
||||||
Name: "host",
|
|
||||||
Value: sketch.Host,
|
|
||||||
})
|
|
||||||
for _, label := range m.Labels {
|
for _, label := range m.Labels {
|
||||||
labels = append(labels, prompb.Label{
|
labels = append(labels, prompb.Label{
|
||||||
Name: label.Name,
|
Name: label.Name,
|
||||||
@@ -62,6 +57,9 @@ func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels
|
|||||||
}
|
}
|
||||||
for _, tag := range sketch.Tags {
|
for _, tag := range sketch.Tags {
|
||||||
name, value := datadogutil.SplitTag(tag)
|
name, value := datadogutil.SplitTag(tag)
|
||||||
|
if name == "host" {
|
||||||
|
name = "exported_host"
|
||||||
|
}
|
||||||
labels = append(labels, prompb.Label{
|
labels = append(labels, prompb.Label{
|
||||||
Name: name,
|
Name: name,
|
||||||
Value: value,
|
Value: value,
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/zabbixconnector"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||||
@@ -75,7 +74,7 @@ var (
|
|||||||
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
|
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
|
||||||
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
|
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
|
||||||
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||||
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config and /remotewrite-.*-config pages. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config page. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||||
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||||
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
|
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
|
||||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
|
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
|
||||||
@@ -245,7 +244,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
}
|
}
|
||||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
fmt.Fprintf(w, "<h2>vmagent</h2>")
|
fmt.Fprintf(w, "<h2>vmagent</h2>")
|
||||||
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
|
|
||||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
|
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
|
||||||
fmt.Fprintf(w, "Useful endpoints:</br>")
|
fmt.Fprintf(w, "Useful endpoints:</br>")
|
||||||
httpserver.WriteAPIHelp(w, [][2]string{
|
httpserver.WriteAPIHelp(w, [][2]string{
|
||||||
@@ -254,8 +252,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
{"metric-relabel-debug", "debug metric relabeling"},
|
{"metric-relabel-debug", "debug metric relabeling"},
|
||||||
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
||||||
{"config", "-promscrape.config contents"},
|
{"config", "-promscrape.config contents"},
|
||||||
{"remotewrite-relabel-config", "-remoteWrite.relabelConfig contents"},
|
|
||||||
{"remotewrite-url-relabel-config", "-remoteWrite.urlRelabelConfig contents"},
|
|
||||||
{"metrics", "available service metrics"},
|
{"metrics", "available service metrics"},
|
||||||
{"flags", "command-line flags"},
|
{"flags", "command-line flags"},
|
||||||
{"-/reload", "reload configuration"},
|
{"-/reload", "reload configuration"},
|
||||||
@@ -352,17 +348,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
}
|
}
|
||||||
firehose.WriteSuccessResponse(w, r)
|
firehose.WriteSuccessResponse(w, r)
|
||||||
return true
|
return true
|
||||||
case "/zabbixconnector/api/v1/history":
|
|
||||||
zabbixconnectorHistoryRequests.Inc()
|
|
||||||
if err := zabbixconnector.InsertHandlerForHTTP(nil, r); err != nil {
|
|
||||||
zabbixconnectorHistoryErrors.Inc()
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(http.StatusBadRequest)
|
|
||||||
fmt.Fprintf(w, `{"error":%q}`, err.Error())
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
return true
|
|
||||||
case "/newrelic":
|
case "/newrelic":
|
||||||
newrelicCheckRequest.Inc()
|
newrelicCheckRequest.Inc()
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
@@ -492,42 +477,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
promscrape.WriteConfigData(&bb)
|
promscrape.WriteConfigData(&bb)
|
||||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
||||||
return true
|
return true
|
||||||
case "/remotewrite-relabel-config":
|
|
||||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
remoteWriteRelabelConfigRequests.Inc()
|
|
||||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
|
||||||
remotewrite.WriteRelabelConfigData(w)
|
|
||||||
return true
|
|
||||||
case "/api/v1/status/remotewrite-relabel-config":
|
|
||||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
remoteWriteStatusRelabelConfigRequests.Inc()
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
var bb bytesutil.ByteBuffer
|
|
||||||
remotewrite.WriteRelabelConfigData(&bb)
|
|
||||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
|
||||||
return true
|
|
||||||
case "/remotewrite-url-relabel-config":
|
|
||||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
remoteWriteURLRelabelConfigRequests.Inc()
|
|
||||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
|
||||||
remotewrite.WriteURLRelabelConfigData(w)
|
|
||||||
return true
|
|
||||||
case "/api/v1/status/remotewrite-url-relabel-config":
|
|
||||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
remoteWriteStatusURLRelabelConfigRequests.Inc()
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
var bb bytesutil.ByteBuffer
|
|
||||||
remotewrite.WriteURLRelabelConfigData(&bb)
|
|
||||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
|
||||||
return true
|
|
||||||
case "/prometheus/-/reload", "/-/reload":
|
case "/prometheus/-/reload", "/-/reload":
|
||||||
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
|
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
|
||||||
return true
|
return true
|
||||||
@@ -657,17 +606,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
|||||||
}
|
}
|
||||||
firehose.WriteSuccessResponse(w, r)
|
firehose.WriteSuccessResponse(w, r)
|
||||||
return true
|
return true
|
||||||
case "zabbixconnector/api/v1/history":
|
|
||||||
zabbixconnectorHistoryRequests.Inc()
|
|
||||||
if err := zabbixconnector.InsertHandlerForHTTP(at, r); err != nil {
|
|
||||||
zabbixconnectorHistoryErrors.Inc()
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(http.StatusBadRequest)
|
|
||||||
fmt.Fprintf(w, `{"error":%q}`, err.Error())
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
return true
|
|
||||||
case "newrelic":
|
case "newrelic":
|
||||||
newrelicCheckRequest.Inc()
|
newrelicCheckRequest.Inc()
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
@@ -789,9 +727,6 @@ var (
|
|||||||
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
||||||
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
||||||
|
|
||||||
zabbixconnectorHistoryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
|
|
||||||
zabbixconnectorHistoryErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
|
|
||||||
|
|
||||||
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
||||||
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
||||||
|
|
||||||
@@ -812,12 +747,6 @@ var (
|
|||||||
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
|
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
|
||||||
promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`)
|
promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`)
|
||||||
|
|
||||||
remoteWriteRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-relabel-config"}`)
|
|
||||||
remoteWriteStatusRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-relabel-config"}`)
|
|
||||||
|
|
||||||
remoteWriteURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-url-relabel-config"}`)
|
|
||||||
remoteWriteStatusURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-url-relabel-config"}`)
|
|
||||||
|
|
||||||
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
|
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompb.Label)
|
|||||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||||
return remotewrite.ErrQueueFullHTTPRetry
|
return remotewrite.ErrQueueFullHTTPRetry
|
||||||
}
|
}
|
||||||
rowsInserted.Add(samplesCount)
|
rowsInserted.Add(len(rows))
|
||||||
if at != nil {
|
if at != nil {
|
||||||
rowsTenantInserted.Get(at).Add(samplesCount)
|
rowsTenantInserted.Get(at).Add(samplesCount)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,14 +2,13 @@ package opentelemetry
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||||
@@ -25,13 +24,6 @@ var (
|
|||||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
|
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
|
||||||
)
|
)
|
||||||
|
|
||||||
// InsertHandlerForReader processes metrics from given reader.
|
|
||||||
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
|
|
||||||
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
|
|
||||||
return insertRows(at, tss, mms, nil)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// InsertHandler processes opentelemetry metrics.
|
// InsertHandler processes opentelemetry metrics.
|
||||||
func InsertHandler(at *auth.Token, req *http.Request) error {
|
func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||||
@@ -76,7 +68,7 @@ func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMeta
|
|||||||
ctx.WriteRequest.Timeseries = tssDst
|
ctx.WriteRequest.Timeseries = tssDst
|
||||||
|
|
||||||
var metadataTotal int
|
var metadataTotal int
|
||||||
if prommetadata.IsEnabled() {
|
if promscrape.IsMetadataEnabled() {
|
||||||
var accountID, projectID uint32
|
var accountID, projectID uint32
|
||||||
if at != nil {
|
if at != nil {
|
||||||
accountID = at.AccountID
|
accountID = at.AccountID
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||||
@@ -36,7 +36,7 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
encoding := req.Header.Get("Content-Encoding")
|
encoding := req.Header.Get("Content-Encoding")
|
||||||
return stream.Parse(req.Body, defaultTimestamp, encoding, true, prommetadata.IsEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
|
return stream.Parse(req.Body, defaultTimestamp, encoding, true, promscrape.IsMetadataEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
|
||||||
return insertRows(at, rows, mms, extraLabels)
|
return insertRows(at, rows, mms, extraLabels)
|
||||||
}, func(s string) {
|
}, func(s string) {
|
||||||
httpserver.LogError(req, s)
|
httpserver.LogError(req, s)
|
||||||
|
|||||||
@@ -6,8 +6,8 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||||
@@ -71,7 +71,7 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
|
|||||||
ctx.WriteRequest.Timeseries = tssDst
|
ctx.WriteRequest.Timeseries = tssDst
|
||||||
|
|
||||||
var metadataTotal int
|
var metadataTotal int
|
||||||
if prommetadata.IsEnabled() {
|
if promscrape.IsMetadataEnabled() {
|
||||||
var accountID, projectID uint32
|
var accountID, projectID uint32
|
||||||
if at != nil {
|
if at != nil {
|
||||||
accountID = at.AccountID
|
accountID = at.AccountID
|
||||||
|
|||||||
@@ -13,18 +13,19 @@ import (
|
|||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/metrics"
|
|
||||||
"github.com/golang/snappy"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||||
|
"github.com/VictoriaMetrics/metrics"
|
||||||
|
"github.com/golang/snappy"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -202,10 +203,14 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
|
|||||||
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
|
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
|
||||||
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
|
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
|
||||||
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
|
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
|
||||||
return float64(concurrency)
|
return float64(*queues)
|
||||||
})
|
})
|
||||||
for range concurrency {
|
for i := 0; i < concurrency; i++ {
|
||||||
c.wg.Go(c.runWorker)
|
c.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer c.wg.Done()
|
||||||
|
c.runWorker()
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
|
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
|
||||||
}
|
}
|
||||||
@@ -290,7 +295,7 @@ func getAWSAPIConfig(argIdx int) (*awsapi.Config, error) {
|
|||||||
accessKey := awsAccessKey.GetOptionalArg(argIdx)
|
accessKey := awsAccessKey.GetOptionalArg(argIdx)
|
||||||
secretKey := awsSecretKey.GetOptionalArg(argIdx)
|
secretKey := awsSecretKey.GetOptionalArg(argIdx)
|
||||||
service := awsService.GetOptionalArg(argIdx)
|
service := awsService.GetOptionalArg(argIdx)
|
||||||
cfg, err := awsapi.NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, service, "")
|
cfg, err := awsapi.NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, service)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -405,7 +410,8 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
|||||||
// Otherwise, it tries sending the block to remote storage indefinitely.
|
// Otherwise, it tries sending the block to remote storage indefinitely.
|
||||||
func (c *client) sendBlockHTTP(block []byte) bool {
|
func (c *client) sendBlockHTTP(block []byte) bool {
|
||||||
c.rl.Register(len(block))
|
c.rl.Register(len(block))
|
||||||
bt := timeutil.NewBackoffTimer(c.retryMinInterval, c.retryMaxInterval)
|
maxRetryDuration := timeutil.AddJitterToDuration(c.retryMaxInterval)
|
||||||
|
retryDuration := timeutil.AddJitterToDuration(c.retryMinInterval)
|
||||||
retriesCount := 0
|
retriesCount := 0
|
||||||
|
|
||||||
again:
|
again:
|
||||||
@@ -414,10 +420,19 @@ again:
|
|||||||
c.requestDuration.UpdateDuration(startTime)
|
c.requestDuration.UpdateDuration(startTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.errorsCount.Inc()
|
c.errorsCount.Inc()
|
||||||
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %s",
|
retryDuration *= 2
|
||||||
len(block), c.sanitizedURL, err, bt.CurrentDelay())
|
if retryDuration > maxRetryDuration {
|
||||||
if !bt.Wait(c.stopCh) {
|
retryDuration = maxRetryDuration
|
||||||
|
}
|
||||||
|
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
|
||||||
|
len(block), c.sanitizedURL, err, retryDuration.Seconds())
|
||||||
|
t := timerpool.Get(retryDuration)
|
||||||
|
select {
|
||||||
|
case <-c.stopCh:
|
||||||
|
timerpool.Put(t)
|
||||||
return false
|
return false
|
||||||
|
case <-t.C:
|
||||||
|
timerpool.Put(t)
|
||||||
}
|
}
|
||||||
c.retriesCount.Inc()
|
c.retriesCount.Inc()
|
||||||
goto again
|
goto again
|
||||||
@@ -448,6 +463,12 @@ again:
|
|||||||
// - Real-world implementations of v1 use both 400 and 415 status codes.
|
// - Real-world implementations of v1 use both 400 and 415 status codes.
|
||||||
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
|
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
|
||||||
case 415, 400:
|
case 415, 400:
|
||||||
|
if c.canDowngradeVMProto.Swap(false) {
|
||||||
|
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
|
||||||
|
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
|
||||||
|
c.useVMProto.Store(false)
|
||||||
|
}
|
||||||
|
|
||||||
if encoding.IsZstd(block) {
|
if encoding.IsZstd(block) {
|
||||||
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
|
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
|
||||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
|
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
|
||||||
@@ -483,10 +504,7 @@ again:
|
|||||||
// Unexpected status code returned
|
// Unexpected status code returned
|
||||||
retriesCount++
|
retriesCount++
|
||||||
retryAfterHeader := parseRetryAfterHeader(resp.Header.Get("Retry-After"))
|
retryAfterHeader := parseRetryAfterHeader(resp.Header.Get("Retry-After"))
|
||||||
// retryAfterDuration has the highest priority duration
|
retryDuration = getRetryDuration(retryAfterHeader, retryDuration, maxRetryDuration)
|
||||||
if retryAfterHeader > 0 {
|
|
||||||
bt.SetDelay(retryAfterHeader)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle response
|
// Handle response
|
||||||
body, err := io.ReadAll(resp.Body)
|
body, err := io.ReadAll(resp.Body)
|
||||||
@@ -495,10 +513,15 @@ again:
|
|||||||
logger.Errorf("cannot read response body from %q during retry #%d: %s", c.sanitizedURL, retriesCount, err)
|
logger.Errorf("cannot read response body from %q during retry #%d: %s", c.sanitizedURL, retriesCount, err)
|
||||||
} else {
|
} else {
|
||||||
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+
|
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+
|
||||||
"re-sending the block in %s", len(block), c.sanitizedURL, retriesCount, statusCode, body, bt.CurrentDelay())
|
"re-sending the block in %.3f seconds", len(block), c.sanitizedURL, retriesCount, statusCode, body, retryDuration.Seconds())
|
||||||
}
|
}
|
||||||
if !bt.Wait(c.stopCh) {
|
t := timerpool.Get(retryDuration)
|
||||||
|
select {
|
||||||
|
case <-c.stopCh:
|
||||||
|
timerpool.Put(t)
|
||||||
return false
|
return false
|
||||||
|
case <-t.C:
|
||||||
|
timerpool.Put(t)
|
||||||
}
|
}
|
||||||
c.retriesCount.Inc()
|
c.retriesCount.Inc()
|
||||||
goto again
|
goto again
|
||||||
@@ -507,6 +530,27 @@ again:
|
|||||||
var remoteWriteRejectedLogger = logger.WithThrottler("remoteWriteRejected", 5*time.Second)
|
var remoteWriteRejectedLogger = logger.WithThrottler("remoteWriteRejected", 5*time.Second)
|
||||||
var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Second)
|
var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Second)
|
||||||
|
|
||||||
|
// getRetryDuration returns retry duration.
|
||||||
|
// retryAfterDuration has the highest priority.
|
||||||
|
// If retryAfterDuration is not specified, retryDuration gets doubled.
|
||||||
|
// retryDuration can't exceed maxRetryDuration.
|
||||||
|
//
|
||||||
|
// Also see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6097
|
||||||
|
func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.Duration) time.Duration {
|
||||||
|
// retryAfterDuration has the highest priority duration
|
||||||
|
if retryAfterDuration > 0 {
|
||||||
|
return timeutil.AddJitterToDuration(retryAfterDuration)
|
||||||
|
}
|
||||||
|
|
||||||
|
// default backoff retry policy
|
||||||
|
retryDuration *= 2
|
||||||
|
if retryDuration > maxRetryDuration {
|
||||||
|
retryDuration = maxRetryDuration
|
||||||
|
}
|
||||||
|
|
||||||
|
return retryDuration
|
||||||
|
}
|
||||||
|
|
||||||
// repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block.
|
// repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block.
|
||||||
//
|
//
|
||||||
// The input block may be corrupted, for example, if vmagent was shut down ungracefully and
|
// The input block may be corrupted, for example, if vmagent was shut down ungracefully and
|
||||||
@@ -516,9 +560,9 @@ var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Sec
|
|||||||
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
|
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
|
||||||
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
|
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
|
||||||
plainBlock := make([]byte, 0, len(zstdBlock)*2)
|
plainBlock := make([]byte, 0, len(zstdBlock)*2)
|
||||||
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock)
|
plainBlock, err := zstd.Decompress(plainBlock, zstdBlock)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("zstd: decompress: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return snappy.Encode(nil, plainBlock), nil
|
return snappy.Encode(nil, plainBlock), nil
|
||||||
@@ -537,20 +581,24 @@ func logBlockRejected(block []byte, sanitizedURL string, resp *http.Response) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// parseRetryAfterHeader parses `Retry-After` value retrieved from HTTP response header.
|
// parseRetryAfterHeader parses `Retry-After` value retrieved from HTTP response header.
|
||||||
//
|
// retryAfterString should be in either HTTP-date or a number of seconds.
|
||||||
// s should be in either HTTP-date or a number of seconds.
|
// It will return time.Duration(0) if `retryAfterString` does not follow RFC 7231.
|
||||||
// It returns time.Duration(0) if s does not follow RFC 7231.
|
func parseRetryAfterHeader(retryAfterString string) (retryAfterDuration time.Duration) {
|
||||||
func parseRetryAfterHeader(s string) time.Duration {
|
if retryAfterString == "" {
|
||||||
if s == "" {
|
return retryAfterDuration
|
||||||
return 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
v := retryAfterDuration.Seconds()
|
||||||
|
logger.Infof("'Retry-After: %s' parsed into %.2f second(s)", retryAfterString, v)
|
||||||
|
}()
|
||||||
|
|
||||||
// Retry-After could be in "Mon, 02 Jan 2006 15:04:05 GMT" format.
|
// Retry-After could be in "Mon, 02 Jan 2006 15:04:05 GMT" format.
|
||||||
if parsedTime, err := time.Parse(http.TimeFormat, s); err == nil {
|
if parsedTime, err := time.Parse(http.TimeFormat, retryAfterString); err == nil {
|
||||||
return time.Duration(time.Until(parsedTime).Seconds()) * time.Second
|
return time.Duration(time.Until(parsedTime).Seconds()) * time.Second
|
||||||
}
|
}
|
||||||
// Retry-After could be in seconds.
|
// Retry-After could be in seconds.
|
||||||
if seconds, err := strconv.Atoi(s); err == nil {
|
if seconds, err := strconv.Atoi(retryAfterString); err == nil {
|
||||||
return time.Duration(seconds) * time.Second
|
return time.Duration(seconds) * time.Second
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,11 +6,66 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/golang/snappy"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||||
|
"github.com/golang/snappy"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestCalculateRetryDuration(t *testing.T) {
|
||||||
|
// `testFunc` call `calculateRetryDuration` for `n` times
|
||||||
|
// and evaluate if the result of `calculateRetryDuration` is
|
||||||
|
// 1. >= expectMinDuration
|
||||||
|
// 2. <= expectMinDuration + 10% (see timeutil.AddJitterToDuration)
|
||||||
|
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectMaxDuration := helper(expectMinDuration)
|
||||||
|
expectMinDuration = expectMinDuration - (1000 * time.Millisecond) // Avoid edge case when calculating time.Until(now)
|
||||||
|
|
||||||
|
if retryDuration < expectMinDuration || retryDuration > expectMaxDuration {
|
||||||
|
t.Fatalf(
|
||||||
|
"incorrect retry duration, want (ms): [%d, %d], got (ms): %d",
|
||||||
|
expectMinDuration.Milliseconds(), expectMaxDuration.Milliseconds(),
|
||||||
|
retryDuration.Milliseconds(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call calculateRetryDuration for 1 time.
|
||||||
|
{
|
||||||
|
// default backoff policy
|
||||||
|
f(0, time.Second, 1, 2*time.Second)
|
||||||
|
// default backoff policy exceed max limit"
|
||||||
|
f(0, 10*time.Minute, 1, time.Minute)
|
||||||
|
|
||||||
|
// retry after > default backoff policy
|
||||||
|
f(10*time.Second, 1*time.Second, 1, 10*time.Second)
|
||||||
|
// retry after < default backoff policy
|
||||||
|
f(1*time.Second, 10*time.Second, 1, 1*time.Second)
|
||||||
|
// retry after invalid and < default backoff policy
|
||||||
|
f(0, time.Second, 1, 2*time.Second)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call calculateRetryDuration for multiple times.
|
||||||
|
{
|
||||||
|
// default backoff policy 2 times
|
||||||
|
f(0, time.Second, 2, 4*time.Second)
|
||||||
|
// default backoff policy 3 times
|
||||||
|
f(0, time.Second, 3, 8*time.Second)
|
||||||
|
// default backoff policy N times exceed max limit
|
||||||
|
f(0, time.Second, 10, time.Minute)
|
||||||
|
|
||||||
|
// retry after 120s 1 times
|
||||||
|
f(120*time.Second, time.Second, 1, 120*time.Second)
|
||||||
|
// retry after 120s 2 times
|
||||||
|
f(120*time.Second, time.Second, 2, 120*time.Second)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseRetryAfterHeader(t *testing.T) {
|
func TestParseRetryAfterHeader(t *testing.T) {
|
||||||
f := func(retryAfterString string, expectResult time.Duration) {
|
f := func(retryAfterString string, expectResult time.Duration) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
@@ -36,6 +91,16 @@ func TestParseRetryAfterHeader(t *testing.T) {
|
|||||||
f(time.Now().Add(10*time.Second).Format("Mon, 02 Jan 2006 15:04:05 FAKETZ"), 0)
|
f(time.Now().Add(10*time.Second).Format("Mon, 02 Jan 2006 15:04:05 FAKETZ"), 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
|
||||||
|
func helper(d time.Duration) time.Duration {
|
||||||
|
dv := d / 10
|
||||||
|
if dv > 10*time.Second {
|
||||||
|
dv = 10 * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
return d + dv
|
||||||
|
}
|
||||||
|
|
||||||
func TestRepackBlockFromZstdToSnappy(t *testing.T) {
|
func TestRepackBlockFromZstdToSnappy(t *testing.T) {
|
||||||
expectedPlainBlock := []byte(`foobar`)
|
expectedPlainBlock := []byte(`foobar`)
|
||||||
|
|
||||||
|
|||||||
@@ -48,7 +48,11 @@ func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite *atomic.Boo
|
|||||||
ps.wr.significantFigures = significantFigures
|
ps.wr.significantFigures = significantFigures
|
||||||
ps.wr.roundDigits = roundDigits
|
ps.wr.roundDigits = roundDigits
|
||||||
ps.stopCh = make(chan struct{})
|
ps.stopCh = make(chan struct{})
|
||||||
ps.periodicFlusherWG.Go(ps.periodicFlusher)
|
ps.periodicFlusherWG.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer ps.periodicFlusherWG.Done()
|
||||||
|
ps.periodicFlusher()
|
||||||
|
}()
|
||||||
return &ps
|
return &ps
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -51,9 +51,9 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
|
|||||||
|
|
||||||
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
|
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
|
||||||
var wr prompb.WriteRequest
|
var wr prompb.WriteRequest
|
||||||
for i := range seriesCount {
|
for i := 0; i < seriesCount; i++ {
|
||||||
var labels []prompb.Label
|
var labels []prompb.Label
|
||||||
for j := range labelsCount {
|
for j := 0; j < labelsCount; j++ {
|
||||||
labels = append(labels, prompb.Label{
|
labels = append(labels, prompb.Label{
|
||||||
Name: fmt.Sprintf("label_%d_%d", i, j),
|
Name: fmt.Sprintf("label_%d_%d", i, j),
|
||||||
Value: fmt.Sprintf("value_%d_%d", i, j),
|
Value: fmt.Sprintf("value_%d_%d", i, j),
|
||||||
|
|||||||
@@ -3,24 +3,22 @@ package remotewrite
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/metrics"
|
|
||||||
"gopkg.in/yaml.v2"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to all -remoteWrite.url.")
|
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
|
||||||
|
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
|
||||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
|
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
|
||||||
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
|
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
|
||||||
"The path can point either to local file or to http url. "+
|
"The path can point either to local file or to http url. "+
|
||||||
@@ -34,12 +32,9 @@ var (
|
|||||||
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
|
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var labelsGlobal []prompb.Label
|
||||||
|
|
||||||
var (
|
var (
|
||||||
labelsGlobal []prompb.Label
|
|
||||||
|
|
||||||
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
|
|
||||||
remoteWriteURLRelabelConfigData atomic.Pointer[[]any]
|
|
||||||
|
|
||||||
relabelConfigReloads *metrics.Counter
|
relabelConfigReloads *metrics.Counter
|
||||||
relabelConfigReloadErrors *metrics.Counter
|
relabelConfigReloadErrors *metrics.Counter
|
||||||
relabelConfigSuccess *metrics.Gauge
|
relabelConfigSuccess *metrics.Gauge
|
||||||
@@ -72,42 +67,6 @@ func initRelabelConfigs() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WriteRelabelConfigData writes -remoteWrite.relabelConfig contents to w
|
|
||||||
func WriteRelabelConfigData(w io.Writer) {
|
|
||||||
p := remoteWriteRelabelConfigData.Load()
|
|
||||||
if p == nil {
|
|
||||||
// Nothing to write to w
|
|
||||||
return
|
|
||||||
}
|
|
||||||
_, _ = w.Write(*p)
|
|
||||||
}
|
|
||||||
|
|
||||||
// WriteURLRelabelConfigData writes -remoteWrite.urlRelabelConfig contents to w
|
|
||||||
func WriteURLRelabelConfigData(w io.Writer) {
|
|
||||||
p := remoteWriteURLRelabelConfigData.Load()
|
|
||||||
if p == nil {
|
|
||||||
// Nothing to write to w
|
|
||||||
return
|
|
||||||
}
|
|
||||||
type urlRelabelCfg struct {
|
|
||||||
Url string `yaml:"url"`
|
|
||||||
RelabelConfig any `yaml:"relabel_config"`
|
|
||||||
}
|
|
||||||
var cs []urlRelabelCfg
|
|
||||||
for i, url := range *remoteWriteURLs {
|
|
||||||
cfgData := (*p)[i]
|
|
||||||
if !*showRemoteWriteURL {
|
|
||||||
url = fmt.Sprintf("%d:secret-url", i+1)
|
|
||||||
}
|
|
||||||
cs = append(cs, urlRelabelCfg{
|
|
||||||
Url: url,
|
|
||||||
RelabelConfig: cfgData,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
d, _ := yaml.Marshal(cs)
|
|
||||||
_, _ = w.Write(d)
|
|
||||||
}
|
|
||||||
|
|
||||||
func reloadRelabelConfigs() {
|
func reloadRelabelConfigs() {
|
||||||
rcs := allRelabelConfigs.Load()
|
rcs := allRelabelConfigs.Load()
|
||||||
if !rcs.isSet() {
|
if !rcs.isSet() {
|
||||||
@@ -131,43 +90,28 @@ func reloadRelabelConfigs() {
|
|||||||
func loadRelabelConfigs() (*relabelConfigs, error) {
|
func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||||
var rcs relabelConfigs
|
var rcs relabelConfigs
|
||||||
if *relabelConfigPathGlobal != "" {
|
if *relabelConfigPathGlobal != "" {
|
||||||
global, rawCfg, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
|
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
|
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
|
||||||
}
|
}
|
||||||
remoteWriteRelabelConfigData.Store(&rawCfg)
|
|
||||||
rcs.global = global
|
rcs.global = global
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
|
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
|
||||||
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
|
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
|
||||||
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
|
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
|
||||||
}
|
}
|
||||||
|
|
||||||
var urlRelabelCfgs []any
|
|
||||||
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
|
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
|
||||||
for i, path := range *relabelConfigPaths {
|
for i, path := range *relabelConfigPaths {
|
||||||
if len(path) == 0 {
|
if len(path) == 0 {
|
||||||
urlRelabelCfgs = append(urlRelabelCfgs, nil)
|
// Skip empty relabel config.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
prc, rawCfg, err := promrelabel.LoadRelabelConfigs(path)
|
prc, err := promrelabel.LoadRelabelConfigs(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
|
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
|
||||||
}
|
}
|
||||||
rcs.perURL[i] = prc
|
rcs.perURL[i] = prc
|
||||||
|
|
||||||
var parsedCfg any
|
|
||||||
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
|
|
||||||
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
|
|
||||||
}
|
}
|
||||||
if len(*remoteWriteURLs) > len(*relabelConfigPaths) {
|
|
||||||
// fill the urlRelabelCfgs with empty relabel configs if not set
|
|
||||||
for i := len(*relabelConfigPaths); i < len(*remoteWriteURLs); i++ {
|
|
||||||
urlRelabelCfgs = append(urlRelabelCfgs, nil)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
remoteWriteURLRelabelConfigData.Store(&urlRelabelCfgs)
|
|
||||||
return &rcs, nil
|
return &rcs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -176,9 +120,19 @@ type relabelConfigs struct {
|
|||||||
perURL []*promrelabel.ParsedConfigs
|
perURL []*promrelabel.ParsedConfigs
|
||||||
}
|
}
|
||||||
|
|
||||||
// isSet indicates whether (global or per-URL) command-line flags is set
|
|
||||||
func (rcs *relabelConfigs) isSet() bool {
|
func (rcs *relabelConfigs) isSet() bool {
|
||||||
return *relabelConfigPathGlobal != "" || len(*relabelConfigPaths) > 0
|
if rcs == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if rcs.global.Len() > 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for _, pc := range rcs.perURL {
|
||||||
|
if pc.Len() > 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// initLabelsGlobal must be called after parsing command-line flags.
|
// initLabelsGlobal must be called after parsing command-line flags.
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package remotewrite
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -12,10 +11,6 @@ import (
|
|||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/cespare/xxhash/v2"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/metrics"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||||
@@ -28,14 +23,14 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
||||||
|
"github.com/VictoriaMetrics/metrics"
|
||||||
|
"github.com/cespare/xxhash/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -63,7 +58,7 @@ var (
|
|||||||
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
|
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
|
||||||
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
||||||
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
|
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
|
||||||
queues = flagutil.NewArrayInt("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||||
"isn't enough for sending high volume of collected data to remote storage. "+
|
"isn't enough for sending high volume of collected data to remote storage. "+
|
||||||
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
|
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
|
||||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||||
@@ -84,14 +79,10 @@ var (
|
|||||||
`This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. `+
|
`This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. `+
|
||||||
`For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}`+
|
`For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}`+
|
||||||
`Enabled sorting for labels can slow down ingestion performance a bit`)
|
`Enabled sorting for labels can slow down ingestion performance a bit`)
|
||||||
maxHourlySeries = flag.Int64("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+
|
maxHourlySeries = flag.Int("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+
|
||||||
"Excess series are logged and dropped. This can be useful for limiting series cardinality. "+
|
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
|
||||||
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
|
maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
|
||||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
|
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
|
||||||
maxDailySeries = flag.Int64("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
|
|
||||||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. "+
|
|
||||||
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
|
|
||||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
|
|
||||||
maxIngestionRate = flag.Int("maxIngestionRate", 0, "The maximum number of samples vmagent can receive per second. Data ingestion is paused when the limit is exceeded. "+
|
maxIngestionRate = flag.Int("maxIngestionRate", 0, "The maximum number of samples vmagent can receive per second. Data ingestion is paused when the limit is exceeded. "+
|
||||||
"By default there are no limits on samples ingestion rate. See also -remoteWrite.rateLimit")
|
"By default there are no limits on samples ingestion rate. See also -remoteWrite.rateLimit")
|
||||||
|
|
||||||
@@ -100,8 +91,6 @@ var (
|
|||||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence . See also -remoteWrite.dropSamplesOnOverload")
|
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence . See also -remoteWrite.dropSamplesOnOverload")
|
||||||
dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+
|
dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+
|
||||||
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
|
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
|
||||||
disableMetadataPerURL = flagutil.NewArrayBool("remoteWrite.disableMetadata", "Whether to disable sending metadata to the corresponding -remoteWrite.url. "+
|
|
||||||
"By default, metadata sending is controlled by the global -enableMetadata flag")
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -167,8 +156,8 @@ func Init() {
|
|||||||
if len(*remoteWriteURLs) == 0 {
|
if len(*remoteWriteURLs) == 0 {
|
||||||
logger.Fatalf("at least one `-remoteWrite.url` command-line flag must be set")
|
logger.Fatalf("at least one `-remoteWrite.url` command-line flag must be set")
|
||||||
}
|
}
|
||||||
if limit := getMaxHourlySeries(); limit > 0 {
|
if *maxHourlySeries > 0 {
|
||||||
hourlySeriesLimiter = bloomfilter.NewLimiter(limit, time.Hour)
|
hourlySeriesLimiter = bloomfilter.NewLimiter(*maxHourlySeries, time.Hour)
|
||||||
_ = metrics.NewGauge(`vmagent_hourly_series_limit_max_series`, func() float64 {
|
_ = metrics.NewGauge(`vmagent_hourly_series_limit_max_series`, func() float64 {
|
||||||
return float64(hourlySeriesLimiter.MaxItems())
|
return float64(hourlySeriesLimiter.MaxItems())
|
||||||
})
|
})
|
||||||
@@ -176,8 +165,8 @@ func Init() {
|
|||||||
return float64(hourlySeriesLimiter.CurrentItems())
|
return float64(hourlySeriesLimiter.CurrentItems())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
if limit := getMaxDailySeries(); limit > 0 {
|
if *maxDailySeries > 0 {
|
||||||
dailySeriesLimiter = bloomfilter.NewLimiter(limit, 24*time.Hour)
|
dailySeriesLimiter = bloomfilter.NewLimiter(*maxDailySeries, 24*time.Hour)
|
||||||
_ = metrics.NewGauge(`vmagent_daily_series_limit_max_series`, func() float64 {
|
_ = metrics.NewGauge(`vmagent_daily_series_limit_max_series`, func() float64 {
|
||||||
return float64(dailySeriesLimiter.MaxItems())
|
return float64(dailySeriesLimiter.MaxItems())
|
||||||
})
|
})
|
||||||
@@ -186,6 +175,13 @@ func Init() {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if *queues > maxQueues {
|
||||||
|
*queues = maxQueues
|
||||||
|
}
|
||||||
|
if *queues <= 0 {
|
||||||
|
*queues = 1
|
||||||
|
}
|
||||||
|
|
||||||
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
|
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
|
||||||
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
|
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
|
||||||
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
|
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
|
||||||
@@ -218,7 +214,9 @@ func Init() {
|
|||||||
dropDanglingQueues()
|
dropDanglingQueues()
|
||||||
|
|
||||||
// Start config reloader.
|
// Start config reloader.
|
||||||
configReloaderWG.Go(func() {
|
configReloaderWG.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer configReloaderWG.Done()
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-configReloaderStopCh:
|
case <-configReloaderStopCh:
|
||||||
@@ -228,7 +226,7 @@ func Init() {
|
|||||||
reloadRelabelConfigs()
|
reloadRelabelConfigs()
|
||||||
reloadStreamAggrConfigs()
|
reloadStreamAggrConfigs()
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
func dropDanglingQueues() {
|
func dropDanglingQueues() {
|
||||||
@@ -268,6 +266,17 @@ func initRemoteWriteCtxs(urls []string) {
|
|||||||
if len(urls) == 0 {
|
if len(urls) == 0 {
|
||||||
logger.Panicf("BUG: urls must be non-empty")
|
logger.Panicf("BUG: urls must be non-empty")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
maxInmemoryBlocks := memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
|
||||||
|
if maxInmemoryBlocks / *queues > 100 {
|
||||||
|
// There is no much sense in keeping higher number of blocks in memory,
|
||||||
|
// since this means that the producer outperforms consumer and the queue
|
||||||
|
// will continue growing. It is better storing the queue to file.
|
||||||
|
maxInmemoryBlocks = 100 * *queues
|
||||||
|
}
|
||||||
|
if maxInmemoryBlocks < 2 {
|
||||||
|
maxInmemoryBlocks = 2
|
||||||
|
}
|
||||||
rwctxs := make([]*remoteWriteCtx, len(urls))
|
rwctxs := make([]*remoteWriteCtx, len(urls))
|
||||||
rwctxIdx := make([]int, len(urls))
|
rwctxIdx := make([]int, len(urls))
|
||||||
if retryMaxTime.String() != "" {
|
if retryMaxTime.String() != "" {
|
||||||
@@ -282,7 +291,7 @@ func initRemoteWriteCtxs(urls []string) {
|
|||||||
if *showRemoteWriteURL {
|
if *showRemoteWriteURL {
|
||||||
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
|
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
|
||||||
}
|
}
|
||||||
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, sanitizedURL)
|
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
|
||||||
rwctxIdx[i] = i
|
rwctxIdx[i] = i
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -476,9 +485,6 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
|
|||||||
matchIdxs.B = sas.Push(tssBlock, matchIdxs.B)
|
matchIdxs.B = sas.Push(tssBlock, matchIdxs.B)
|
||||||
if !*streamAggrGlobalKeepInput {
|
if !*streamAggrGlobalKeepInput {
|
||||||
tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput)
|
tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput)
|
||||||
} else if *streamAggrGlobalDropInput {
|
|
||||||
// if both keep_input and drop_input are true, we keep only the aggregated series
|
|
||||||
tssBlock = dropUnaggregatedSeries(tssBlock, matchIdxs.B)
|
|
||||||
}
|
}
|
||||||
matchIdxsPool.Put(matchIdxs)
|
matchIdxsPool.Put(matchIdxs)
|
||||||
}
|
}
|
||||||
@@ -548,13 +554,11 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
|
|||||||
// Push metadata to remote storage systems in parallel to reduce
|
// Push metadata to remote storage systems in parallel to reduce
|
||||||
// the time needed for sending the data to multiple remote storage systems.
|
// the time needed for sending the data to multiple remote storage systems.
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(len(rwctxs))
|
||||||
var anyPushFailed atomic.Bool
|
var anyPushFailed atomic.Bool
|
||||||
for _, rwctx := range rwctxs {
|
for _, rwctx := range rwctxs {
|
||||||
if !rwctx.enableMetadata {
|
go func(rwctx *remoteWriteCtx) {
|
||||||
// Skip remote storage with disabled metadata
|
defer wg.Done()
|
||||||
continue
|
|
||||||
}
|
|
||||||
wg.Go(func() {
|
|
||||||
if !rwctx.tryPushMetadataInternal(mms) {
|
if !rwctx.tryPushMetadataInternal(mms) {
|
||||||
rwctx.pushFailures.Inc()
|
rwctx.pushFailures.Inc()
|
||||||
if forceDropSamplesOnFailure {
|
if forceDropSamplesOnFailure {
|
||||||
@@ -563,7 +567,7 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
|
|||||||
}
|
}
|
||||||
anyPushFailed.Store(true)
|
anyPushFailed.Store(true)
|
||||||
}
|
}
|
||||||
})
|
}(rwctx)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
return !anyPushFailed.Load()
|
return !anyPushFailed.Load()
|
||||||
@@ -595,13 +599,15 @@ func tryPushTimeSeriesToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prom
|
|||||||
// Push tssBlock to remote storage systems in parallel to reduce
|
// Push tssBlock to remote storage systems in parallel to reduce
|
||||||
// the time needed for sending the data to multiple remote storage systems.
|
// the time needed for sending the data to multiple remote storage systems.
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(len(rwctxs))
|
||||||
var anyPushFailed atomic.Bool
|
var anyPushFailed atomic.Bool
|
||||||
for _, rwctx := range rwctxs {
|
for _, rwctx := range rwctxs {
|
||||||
wg.Go(func() {
|
go func(rwctx *remoteWriteCtx) {
|
||||||
|
defer wg.Done()
|
||||||
if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) {
|
if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) {
|
||||||
anyPushFailed.Store(true)
|
anyPushFailed.Store(true)
|
||||||
}
|
}
|
||||||
})
|
}(rwctx)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
return !anyPushFailed.Load()
|
return !anyPushFailed.Load()
|
||||||
@@ -623,11 +629,13 @@ func tryShardingTimeSeriesAmongRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock
|
|||||||
if len(shard) == 0 {
|
if len(shard) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
if !rwctx.TryPushTimeSeries(shard, forceDropSamplesOnFailure) {
|
go func(rwctx *remoteWriteCtx, tss []prompb.TimeSeries) {
|
||||||
|
defer wg.Done()
|
||||||
|
if !rwctx.TryPushTimeSeries(tss, forceDropSamplesOnFailure) {
|
||||||
anyPushFailed.Store(true)
|
anyPushFailed.Store(true)
|
||||||
}
|
}
|
||||||
})
|
}(rwctx, shard)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
return !anyPushFailed.Load()
|
return !anyPushFailed.Load()
|
||||||
@@ -825,11 +833,6 @@ type remoteWriteCtx struct {
|
|||||||
streamAggrKeepInput bool
|
streamAggrKeepInput bool
|
||||||
streamAggrDropInput bool
|
streamAggrDropInput bool
|
||||||
|
|
||||||
// enableMetadata indicates whether metadata should be sent to this remote storage.
|
|
||||||
// It is determined by -remoteWrite.enableMetadata per-URL flag if set,
|
|
||||||
// otherwise by the global -enableMetadata flag.
|
|
||||||
enableMetadata bool
|
|
||||||
|
|
||||||
pss []*pendingSeries
|
pss []*pendingSeries
|
||||||
pssNextIdx atomic.Uint64
|
pssNextIdx atomic.Uint64
|
||||||
|
|
||||||
@@ -841,19 +844,7 @@ type remoteWriteCtx struct {
|
|||||||
rowsDroppedOnPushFailure *metrics.Counter
|
rowsDroppedOnPushFailure *metrics.Counter
|
||||||
}
|
}
|
||||||
|
|
||||||
// isMetadataEnabledForURL returns true if metadata should be sent to the remote storage at argIdx.
|
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
|
||||||
// It checks the per-URL -remoteWrite.disableMetadata flag first.
|
|
||||||
// If not set, it falls back to the global -enableMetadata flag.
|
|
||||||
func isMetadataEnabledForURL(argIdx int) bool {
|
|
||||||
if disableMetadataPerURL.GetOptionalArg(argIdx) {
|
|
||||||
// Metadata is explicitly disabled for this URL
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
// Use global -enableMetadata value
|
|
||||||
return prommetadata.IsEnabled()
|
|
||||||
}
|
|
||||||
|
|
||||||
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string) *remoteWriteCtx {
|
|
||||||
// strip query params, otherwise changing params resets pq
|
// strip query params, otherwise changing params resets pq
|
||||||
pqURL := *remoteWriteURL
|
pqURL := *remoteWriteURL
|
||||||
pqURL.RawQuery = ""
|
pqURL.RawQuery = ""
|
||||||
@@ -868,23 +859,6 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
|||||||
}
|
}
|
||||||
|
|
||||||
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
|
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
|
||||||
queuesSize := queues.GetOptionalArg(argIdx)
|
|
||||||
if queuesSize > maxQueues {
|
|
||||||
queuesSize = maxQueues
|
|
||||||
} else if queuesSize <= 0 {
|
|
||||||
queuesSize = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
maxInmemoryBlocks := memory.Allowed() / len(*remoteWriteURLs) / *maxRowsPerBlock / 100
|
|
||||||
if maxInmemoryBlocks/queuesSize > 100 {
|
|
||||||
// There is no much sense in keeping higher number of blocks in memory,
|
|
||||||
// since this means that the producer outperforms consumer and the queue
|
|
||||||
// will continue growing. It is better storing the queue to file.
|
|
||||||
maxInmemoryBlocks = 100 * queuesSize
|
|
||||||
}
|
|
||||||
if maxInmemoryBlocks < 2 {
|
|
||||||
maxInmemoryBlocks = 2
|
|
||||||
}
|
|
||||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
|
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
|
||||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||||
return float64(fq.GetPendingBytes())
|
return float64(fq.GetPendingBytes())
|
||||||
@@ -902,16 +876,16 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
|||||||
var c *client
|
var c *client
|
||||||
switch remoteWriteURL.Scheme {
|
switch remoteWriteURL.Scheme {
|
||||||
case "http", "https":
|
case "http", "https":
|
||||||
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, queuesSize)
|
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
|
||||||
default:
|
default:
|
||||||
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
|
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
|
||||||
}
|
}
|
||||||
c.init(argIdx, queuesSize, sanitizedURL)
|
c.init(argIdx, *queues, sanitizedURL)
|
||||||
|
|
||||||
// Initialize pss
|
// Initialize pss
|
||||||
sf := significantFigures.GetOptionalArg(argIdx)
|
sf := significantFigures.GetOptionalArg(argIdx)
|
||||||
rd := roundDigits.GetOptionalArg(argIdx)
|
rd := roundDigits.GetOptionalArg(argIdx)
|
||||||
pssLen := queuesSize
|
pssLen := *queues
|
||||||
if n := cgroup.AvailableCPUs(); pssLen > n {
|
if n := cgroup.AvailableCPUs(); pssLen > n {
|
||||||
// There is no sense in running more than availableCPUs concurrent pendingSeries,
|
// There is no sense in running more than availableCPUs concurrent pendingSeries,
|
||||||
// since every pendingSeries can saturate up to a single CPU.
|
// since every pendingSeries can saturate up to a single CPU.
|
||||||
@@ -923,11 +897,10 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
|||||||
}
|
}
|
||||||
|
|
||||||
rwctx := &remoteWriteCtx{
|
rwctx := &remoteWriteCtx{
|
||||||
idx: argIdx,
|
idx: argIdx,
|
||||||
fq: fq,
|
fq: fq,
|
||||||
c: c,
|
c: c,
|
||||||
pss: pss,
|
pss: pss,
|
||||||
enableMetadata: isMetadataEnabledForURL(argIdx),
|
|
||||||
|
|
||||||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
||||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
||||||
@@ -1015,17 +988,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
|||||||
tss = append(*v, tss...)
|
tss = append(*v, tss...)
|
||||||
}
|
}
|
||||||
tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput)
|
tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput)
|
||||||
} else if rwctx.streamAggrDropInput {
|
|
||||||
// if both keep_input and drop_input are true, we keep only the aggregated series
|
|
||||||
if rctx == nil {
|
|
||||||
rctx = getRelabelCtx()
|
|
||||||
// Make a copy of tss before dropping aggregated series
|
|
||||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
|
||||||
tss = append(*v, tss...)
|
|
||||||
}
|
|
||||||
tss = dropUnaggregatedSeries(tss, matchIdxs.B)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
matchIdxsPool.Put(matchIdxs)
|
matchIdxsPool.Put(matchIdxs)
|
||||||
}
|
}
|
||||||
if rwctx.deduplicator != nil {
|
if rwctx.deduplicator != nil {
|
||||||
@@ -1048,10 +1011,9 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
var matchIdxsPool slicesutil.BufferPool[uint32]
|
var matchIdxsPool bytesutil.ByteBufferPool
|
||||||
|
|
||||||
// dropAggregatedSeries drops matched series, also the unmatched if dropInput is true.
|
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []byte, dropInput bool) []prompb.TimeSeries {
|
||||||
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput bool) []prompb.TimeSeries {
|
|
||||||
dst := src[:0]
|
dst := src[:0]
|
||||||
if !dropInput {
|
if !dropInput {
|
||||||
for i, match := range matchIdxs {
|
for i, match := range matchIdxs {
|
||||||
@@ -1066,20 +1028,6 @@ func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput
|
|||||||
return dst
|
return dst
|
||||||
}
|
}
|
||||||
|
|
||||||
// dropUnaggregatedSeries drops unmatched series.
|
|
||||||
func dropUnaggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32) []prompb.TimeSeries {
|
|
||||||
dst := src[:0]
|
|
||||||
for i, match := range matchIdxs {
|
|
||||||
if match == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
dst = append(dst, src[i])
|
|
||||||
}
|
|
||||||
tail := src[len(dst):]
|
|
||||||
clear(tail)
|
|
||||||
return dst
|
|
||||||
}
|
|
||||||
|
|
||||||
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) {
|
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) {
|
||||||
if rwctx.tryPushTimeSeriesInternal(tss) {
|
if rwctx.tryPushTimeSeriesInternal(tss) {
|
||||||
return
|
return
|
||||||
@@ -1112,7 +1060,7 @@ func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries)
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
if len(labelsGlobal) > 0 {
|
if len(labelsGlobal) > 0 {
|
||||||
// Make a copy of tss before adding extra labels to prevent
|
// Make a copy of tss before adding extra labels in order to prevent
|
||||||
// from affecting time series for other remoteWrite.url configs.
|
// from affecting time series for other remoteWrite.url configs.
|
||||||
rctx = getRelabelCtx()
|
rctx = getRelabelCtx()
|
||||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||||
@@ -1148,21 +1096,3 @@ func newMapFromStrings(a []string) map[string]struct{} {
|
|||||||
}
|
}
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMaxHourlySeries() int {
|
|
||||||
limit := *maxHourlySeries
|
|
||||||
if limit == -1 || limit > math.MaxInt32 {
|
|
||||||
return math.MaxInt32
|
|
||||||
}
|
|
||||||
|
|
||||||
return int(limit)
|
|
||||||
}
|
|
||||||
|
|
||||||
func getMaxDailySeries() int {
|
|
||||||
limit := *maxDailySeries
|
|
||||||
if limit == -1 || limit > math.MaxInt32 {
|
|
||||||
return math.MaxInt32
|
|
||||||
}
|
|
||||||
|
|
||||||
return int(limit)
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -10,8 +10,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||||
@@ -28,12 +26,12 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
|
|||||||
itemsCount := 1_000 * bucketsCount
|
itemsCount := 1_000 * bucketsCount
|
||||||
m := make([]int, bucketsCount)
|
m := make([]int, bucketsCount)
|
||||||
var labels []prompb.Label
|
var labels []prompb.Label
|
||||||
for i := range itemsCount {
|
for i := 0; i < itemsCount; i++ {
|
||||||
labels = append(labels[:0], prompb.Label{
|
labels = append(labels[:0], prompb.Label{
|
||||||
Name: "__name__",
|
Name: "__name__",
|
||||||
Value: fmt.Sprintf("some_name_%d", i),
|
Value: fmt.Sprintf("some_name_%d", i),
|
||||||
})
|
})
|
||||||
for j := range 10 {
|
for j := 0; j < 10; j++ {
|
||||||
labels = append(labels, prompb.Label{
|
labels = append(labels, prompb.Label{
|
||||||
Name: fmt.Sprintf("label_%d", j),
|
Name: fmt.Sprintf("label_%d", j),
|
||||||
Value: fmt.Sprintf("value_%d_%d", i, j),
|
Value: fmt.Sprintf("value_%d_%d", i, j),
|
||||||
@@ -59,8 +57,8 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
|
|||||||
f(10)
|
f(10)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
|
||||||
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string, expectedRowsPushedAfterRelabel, expectedPushedSample int) {
|
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
|
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -73,16 +71,10 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
|||||||
}
|
}
|
||||||
allRelabelConfigs.Store(rcs)
|
allRelabelConfigs.Store(rcs)
|
||||||
|
|
||||||
path := "fast-queue-write-test"
|
|
||||||
fs.MustRemoveDir(path)
|
|
||||||
fq := persistentqueue.MustOpenFastQueue(path, "test", 100, 0, false)
|
|
||||||
defer fs.MustRemoveDir(path)
|
|
||||||
defer fq.MustClose()
|
|
||||||
|
|
||||||
pss := make([]*pendingSeries, 1)
|
pss := make([]*pendingSeries, 1)
|
||||||
isVMProto := &atomic.Bool{}
|
isVMProto := &atomic.Bool{}
|
||||||
isVMProto.Store(true)
|
isVMProto.Store(true)
|
||||||
pss[0] = newPendingSeries(fq, isVMProto, 0, 100)
|
pss[0] = newPendingSeries(nil, isVMProto, 0, 100)
|
||||||
rwctx := &remoteWriteCtx{
|
rwctx := &remoteWriteCtx{
|
||||||
idx: 0,
|
idx: 0,
|
||||||
streamAggrKeepInput: keepInput,
|
streamAggrKeepInput: keepInput,
|
||||||
@@ -91,8 +83,6 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
|||||||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
|
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
|
||||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
|
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
|
||||||
}
|
}
|
||||||
defer metrics.UnregisterAllMetrics()
|
|
||||||
|
|
||||||
if dedupInterval > 0 {
|
if dedupInterval > 0 {
|
||||||
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
|
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
|
||||||
}
|
}
|
||||||
@@ -114,27 +104,23 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
|||||||
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
|
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
|
||||||
expectedTss := make([]prompb.TimeSeries, len(inputTss))
|
expectedTss := make([]prompb.TimeSeries, len(inputTss))
|
||||||
|
|
||||||
// check inputTss is not modified after TryPushTimeSeries
|
// copy inputTss to make sure it is not mutated during TryPush call
|
||||||
copy(expectedTss, inputTss)
|
copy(expectedTss, inputTss)
|
||||||
if !rwctx.TryPushTimeSeries(inputTss, false) {
|
if !rwctx.TryPushTimeSeries(inputTss, false) {
|
||||||
t.Fatalf("cannot push samples to rwctx")
|
t.Fatalf("cannot push samples to rwctx")
|
||||||
}
|
}
|
||||||
|
|
||||||
if int(rwctx.rowsPushedAfterRelabel.Get()) != expectedRowsPushedAfterRelabel {
|
|
||||||
t.Fatalf("unexpected number of rows after relabel; got %d; want %d", rwctx.rowsPushedAfterRelabel.Get(), expectedRowsPushedAfterRelabel)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(pss[0].wr.tss) != expectedPushedSample {
|
|
||||||
t.Fatalf("unexpected number of pushed samples; got %d; want %d", len(pss[0].wr.tss), expectedPushedSample)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !reflect.DeepEqual(expectedTss, inputTss) {
|
if !reflect.DeepEqual(expectedTss, inputTss) {
|
||||||
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
|
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// relabeling
|
f(`
|
||||||
f(``, `
|
- interval: 1m
|
||||||
|
outputs: [sum_samples]
|
||||||
|
- interval: 2m
|
||||||
|
outputs: [count_series]
|
||||||
|
`, `
|
||||||
- action: keep
|
- action: keep
|
||||||
source_labels: [env]
|
source_labels: [env]
|
||||||
regex: "dev"
|
regex: "dev"
|
||||||
@@ -143,66 +129,53 @@ metric{env="dev"} 10
|
|||||||
metric{env="bar"} 20
|
metric{env="bar"} 20
|
||||||
metric{env="dev"} 15
|
metric{env="dev"} 15
|
||||||
metric{env="bar"} 25
|
metric{env="bar"} 25
|
||||||
`, 2, 2)
|
`)
|
||||||
|
|
||||||
// relabeling + aggregation
|
|
||||||
f(`
|
|
||||||
- match: '{env="dev"}'
|
|
||||||
interval: 1m
|
|
||||||
outputs: [sum_samples]
|
|
||||||
`, `
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: ".*"
|
|
||||||
`, false, 0, false, false, `
|
|
||||||
metric{env="dev"} 10
|
|
||||||
metric{env="bar"} 20
|
|
||||||
metric{env="dev"} 15
|
|
||||||
metric{env="bar"} 25
|
|
||||||
`, 4, 2)
|
|
||||||
|
|
||||||
// aggregation + keepInput
|
|
||||||
f(`
|
|
||||||
- match: '{env="dev"}'
|
|
||||||
interval: 1m
|
|
||||||
outputs: [sum_samples]
|
|
||||||
`, ``, false, 0, true, false, `
|
|
||||||
metric{env="dev"} 10
|
|
||||||
metric{env="bar"} 20
|
|
||||||
metric{env="dev"} 15
|
|
||||||
metric{env="bar"} 25
|
|
||||||
`, 4, 4)
|
|
||||||
|
|
||||||
// aggregation + dropInput
|
|
||||||
f(`
|
|
||||||
- match: '{env="dev"}'
|
|
||||||
interval: 1m
|
|
||||||
outputs: [sum_samples]
|
|
||||||
`, ``, false, 0, false, true, `
|
|
||||||
metric{env="dev"} 10
|
|
||||||
metric{env="bar"} 20
|
|
||||||
metric{env="dev"} 15
|
|
||||||
metric{env="bar"} 25
|
|
||||||
`, 4, 0)
|
|
||||||
|
|
||||||
// aggregation + keepInput + dropInput
|
|
||||||
f(`
|
|
||||||
- match: '{env="dev"}'
|
|
||||||
interval: 1m
|
|
||||||
outputs: [sum_samples]
|
|
||||||
`, ``, false, 0, true, true, `
|
|
||||||
metric{env="dev"} 10
|
|
||||||
metric{env="bar"} 20
|
|
||||||
metric{env="bar"} 25
|
|
||||||
`, 3, 1)
|
|
||||||
|
|
||||||
// aggregation + deduplication
|
|
||||||
f(``, ``, true, time.Hour, false, false, `
|
f(``, ``, true, time.Hour, false, false, `
|
||||||
metric{env="dev"} 10
|
metric{env="dev"} 10
|
||||||
metric{env="foo"} 20
|
metric{env="foo"} 20
|
||||||
metric{env="dev"} 15
|
metric{env="dev"} 15
|
||||||
metric{env="foo"} 25
|
metric{env="foo"} 25
|
||||||
`, 4, 0)
|
`)
|
||||||
|
f(``, `
|
||||||
|
- action: keep
|
||||||
|
source_labels: [env]
|
||||||
|
regex: "dev"
|
||||||
|
`, true, time.Hour, false, false, `
|
||||||
|
metric{env="dev"} 10
|
||||||
|
metric{env="bar"} 20
|
||||||
|
metric{env="dev"} 15
|
||||||
|
metric{env="bar"} 25
|
||||||
|
`)
|
||||||
|
f(``, `
|
||||||
|
- action: keep
|
||||||
|
source_labels: [env]
|
||||||
|
regex: "dev"
|
||||||
|
`, true, time.Hour, true, false, `
|
||||||
|
metric{env="test"} 10
|
||||||
|
metric{env="dev"} 20
|
||||||
|
metric{env="foo"} 15
|
||||||
|
metric{env="dev"} 25
|
||||||
|
`)
|
||||||
|
f(``, `
|
||||||
|
- action: keep
|
||||||
|
source_labels: [env]
|
||||||
|
regex: "dev"
|
||||||
|
`, true, time.Hour, false, true, `
|
||||||
|
metric{env="foo"} 10
|
||||||
|
metric{env="dev"} 20
|
||||||
|
metric{env="foo"} 15
|
||||||
|
metric{env="dev"} 25
|
||||||
|
`)
|
||||||
|
f(``, `
|
||||||
|
- action: keep
|
||||||
|
source_labels: [env]
|
||||||
|
regex: "dev"
|
||||||
|
`, true, time.Hour, true, true, `
|
||||||
|
metric{env="dev"} 10
|
||||||
|
metric{env="test"} 20
|
||||||
|
metric{env="dev"} 15
|
||||||
|
metric{env="bar"} 25
|
||||||
|
`)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
||||||
@@ -248,7 +221,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
|||||||
seriesCount := 100000
|
seriesCount := 100000
|
||||||
// build 1000000 series
|
// build 1000000 series
|
||||||
tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
|
tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
|
||||||
for i := range seriesCount {
|
for i := 0; i < seriesCount; i++ {
|
||||||
tssBlock = append(tssBlock, prompb.TimeSeries{
|
tssBlock = append(tssBlock, prompb.TimeSeries{
|
||||||
Labels: []prompb.Label{
|
Labels: []prompb.Label{
|
||||||
{
|
{
|
||||||
@@ -269,7 +242,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
|||||||
// build active time series set
|
// build active time series set
|
||||||
nodes := make([]string, 0, remoteWriteCount)
|
nodes := make([]string, 0, remoteWriteCount)
|
||||||
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
|
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
|
||||||
for i := range remoteWriteCount {
|
for i := 0; i < remoteWriteCount; i++ {
|
||||||
nodes = append(nodes, fmt.Sprintf("node%d", i))
|
nodes = append(nodes, fmt.Sprintf("node%d", i))
|
||||||
activeTimeSeriesByNodes[i] = make(map[string]struct{})
|
activeTimeSeriesByNodes[i] = make(map[string]struct{})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,12 +18,12 @@ var (
|
|||||||
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
|
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
|
||||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
||||||
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
|
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
|
||||||
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples that match any rule in "+
|
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation "+
|
||||||
"-streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
|
"with -streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||||
"are written to the remote storage. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
"are written to remote storages write. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||||
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop input samples that not matching any rule in "+
|
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation "+
|
||||||
"-streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
|
"with -remoteWrite.streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||||
"are written to the remote storage. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
"are written to remote storages write. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||||
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
|
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
|
||||||
"aggregator before optional aggregation with -streamAggr.config . "+
|
"aggregator before optional aggregation with -streamAggr.config . "+
|
||||||
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
||||||
@@ -43,11 +43,11 @@ var (
|
|||||||
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
|
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
|
||||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
||||||
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
|
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
|
||||||
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop input samples that not matching any rule in "+
|
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop all the input samples after the aggregation "+
|
||||||
"the corresponding -remoteWrite.streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
|
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||||
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples that match any rule in "+
|
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep all the input samples after the aggregation "+
|
||||||
"the corresponding -remoteWrite.streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
|
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||||
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
|
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
|
||||||
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
||||||
|
|||||||
@@ -1,80 +0,0 @@
|
|||||||
package zabbixconnector
|
|
||||||
|
|
||||||
import (
|
|
||||||
"net/http"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector/stream"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
|
||||||
"github.com/VictoriaMetrics/metrics"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="zabbixconnector"}`)
|
|
||||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="zabbixconnector"}`)
|
|
||||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="zabbixconnector"}`)
|
|
||||||
)
|
|
||||||
|
|
||||||
// InsertHandlerForHTTP processes remote write for ZabbixConnector POST /zabbixconnector/v1/history request.
|
|
||||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
|
||||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
encoding := req.Header.Get("Content-Encoding")
|
|
||||||
return stream.Parse(req.Body, encoding, func(rows []zabbixconnector.Row) error {
|
|
||||||
return insertRows(at, rows, extraLabels)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func insertRows(at *auth.Token, rows []zabbixconnector.Row, extraLabels []prompb.Label) error {
|
|
||||||
ctx := common.GetPushCtx()
|
|
||||||
defer common.PutPushCtx(ctx)
|
|
||||||
|
|
||||||
rowsTotal := len(rows)
|
|
||||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
|
||||||
labels := ctx.Labels[:0]
|
|
||||||
samples := ctx.Samples[:0]
|
|
||||||
for i := range rows {
|
|
||||||
r := &rows[i]
|
|
||||||
|
|
||||||
labelsLen := len(labels)
|
|
||||||
for j := range r.Tags {
|
|
||||||
tag := &r.Tags[j]
|
|
||||||
labels = append(labels, prompb.Label{
|
|
||||||
Name: bytesutil.ToUnsafeString(tag.Key),
|
|
||||||
Value: bytesutil.ToUnsafeString(tag.Value),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
labels = append(labels, extraLabels...)
|
|
||||||
|
|
||||||
samplesLen := len(samples)
|
|
||||||
samples = append(samples, prompb.Sample{
|
|
||||||
Value: r.Value,
|
|
||||||
Timestamp: r.Timestamp,
|
|
||||||
})
|
|
||||||
|
|
||||||
tssDst = append(tssDst, prompb.TimeSeries{
|
|
||||||
Labels: labels[labelsLen:],
|
|
||||||
Samples: samples[samplesLen:],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
ctx.WriteRequest.Timeseries = tssDst
|
|
||||||
ctx.Labels = labels
|
|
||||||
ctx.Samples = samples
|
|
||||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
|
||||||
return remotewrite.ErrQueueFullHTTPRetry
|
|
||||||
}
|
|
||||||
rowsInserted.Add(rowsTotal)
|
|
||||||
if at != nil {
|
|
||||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
|
||||||
}
|
|
||||||
rowsPerInsert.Update(float64(rowsTotal))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -27,9 +27,6 @@ vmalert-tool-linux-ppc64le-prod:
|
|||||||
vmalert-tool-linux-386-prod:
|
vmalert-tool-linux-386-prod:
|
||||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
|
||||||
|
|
||||||
vmalert-tool-linux-s390x-prod:
|
|
||||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-s390x
|
|
||||||
|
|
||||||
vmalert-tool-darwin-amd64-prod:
|
vmalert-tool-darwin-amd64-prod:
|
||||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64
|
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64
|
||||||
|
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ func TestParseInputValue_Success(t *testing.T) {
|
|||||||
if len(outputExpected) != len(output) {
|
if len(outputExpected) != len(output) {
|
||||||
t.Fatalf("unexpected output length; got %d; want %d", len(outputExpected), len(output))
|
t.Fatalf("unexpected output length; got %d; want %d", len(outputExpected), len(output))
|
||||||
}
|
}
|
||||||
for i := range outputExpected {
|
for i := 0; i < len(outputExpected); i++ {
|
||||||
if outputExpected[i].Omitted != output[i].Omitted {
|
if outputExpected[i].Omitted != output[i].Omitted {
|
||||||
t.Fatalf("unexpected Omitted field in the output\ngot\n%v\nwant\n%v", output, outputExpected)
|
t.Fatalf("unexpected Omitted field in the output\ngot\n%v\nwant\n%v", output, outputExpected)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"maps"
|
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
@@ -13,7 +12,6 @@ import (
|
|||||||
"os/signal"
|
"os/signal"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
"slices"
|
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
@@ -36,7 +34,6 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||||
"github.com/VictoriaMetrics/metrics"
|
"github.com/VictoriaMetrics/metrics"
|
||||||
)
|
)
|
||||||
@@ -87,8 +84,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
|
|||||||
defer server.Close()
|
defer server.Close()
|
||||||
} else {
|
} else {
|
||||||
httpListenAddr = httpListenPort
|
httpListenAddr = httpListenPort
|
||||||
|
ln, err := net.Listen("tcp", fmt.Sprintf(":%s", httpListenPort))
|
||||||
ln, err := net.Listen(netutil.GetTCPNetwork(), fmt.Sprintf(":%s", httpListenPort))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Fatalf("cannot listen on port %s: %v", httpListenPort, err)
|
logger.Fatalf("cannot listen on port %s: %v", httpListenPort, err)
|
||||||
}
|
}
|
||||||
@@ -134,7 +130,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
|
|||||||
}
|
}
|
||||||
labels[s[:n]] = s[n+1:]
|
labels[s[:n]] = s[n+1:]
|
||||||
}
|
}
|
||||||
err = notifier.Init(labels, externalURL)
|
_, err = notifier.Init(nil, labels, externalURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Fatalf("failed to init notifier: %v", err)
|
logger.Fatalf("failed to init notifier: %v", err)
|
||||||
}
|
}
|
||||||
@@ -350,7 +346,9 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
|||||||
for k := range alertEvalTimesMap {
|
for k := range alertEvalTimesMap {
|
||||||
alertEvalTimes = append(alertEvalTimes, k)
|
alertEvalTimes = append(alertEvalTimes, k)
|
||||||
}
|
}
|
||||||
slices.Sort(alertEvalTimes)
|
sort.Slice(alertEvalTimes, func(i, j int) bool {
|
||||||
|
return alertEvalTimes[i] < alertEvalTimes[j]
|
||||||
|
})
|
||||||
|
|
||||||
// sort group eval order according to the given "group_eval_order".
|
// sort group eval order according to the given "group_eval_order".
|
||||||
sort.Slice(testGroups, func(i, j int) bool {
|
sort.Slice(testGroups, func(i, j int) bool {
|
||||||
@@ -361,8 +359,12 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
|||||||
var groups []*rule.Group
|
var groups []*rule.Group
|
||||||
for _, group := range testGroups {
|
for _, group := range testGroups {
|
||||||
mergedExternalLabels := make(map[string]string)
|
mergedExternalLabels := make(map[string]string)
|
||||||
maps.Copy(mergedExternalLabels, tg.ExternalLabels)
|
for k, v := range tg.ExternalLabels {
|
||||||
maps.Copy(mergedExternalLabels, externalLabels)
|
mergedExternalLabels[k] = v
|
||||||
|
}
|
||||||
|
for k, v := range externalLabels {
|
||||||
|
mergedExternalLabels[k] = v
|
||||||
|
}
|
||||||
ng := rule.NewGroup(group, q, time.Minute, mergedExternalLabels)
|
ng := rule.NewGroup(group, q, time.Minute, mergedExternalLabels)
|
||||||
ng.Init()
|
ng.Init()
|
||||||
groups = append(groups, ng)
|
groups = append(groups, ng)
|
||||||
@@ -375,7 +377,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
|||||||
if len(g.Rules) == 0 {
|
if len(g.Rules) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
errs := g.ExecOnce(context.Background(), rw, ts)
|
errs := g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, rw, ts)
|
||||||
for err := range errs {
|
for err := range errs {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,
|
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,
|
||||||
|
|||||||
@@ -27,9 +27,6 @@ vmalert-linux-ppc64le-prod:
|
|||||||
vmalert-linux-386-prod:
|
vmalert-linux-386-prod:
|
||||||
APP_NAME=vmalert $(MAKE) app-via-docker-linux-386
|
APP_NAME=vmalert $(MAKE) app-via-docker-linux-386
|
||||||
|
|
||||||
vmalert-linux-s390x-prod:
|
|
||||||
APP_NAME=vmalert $(MAKE) app-via-docker-linux-s390x
|
|
||||||
|
|
||||||
vmalert-darwin-amd64-prod:
|
vmalert-darwin-amd64-prod:
|
||||||
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64
|
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ type Group struct {
|
|||||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
||||||
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
|
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
|
||||||
Limit *int `yaml:"limit,omitempty"`
|
Limit int `yaml:"limit,omitempty"`
|
||||||
Rules []Rule `yaml:"rules"`
|
Rules []Rule `yaml:"rules"`
|
||||||
Concurrency int `yaml:"concurrency"`
|
Concurrency int `yaml:"concurrency"`
|
||||||
// Labels is a set of label value pairs, that will be added to every rule.
|
// Labels is a set of label value pairs, that will be added to every rule.
|
||||||
@@ -81,15 +81,18 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
|
|||||||
if g.Interval.Duration() < 0 {
|
if g.Interval.Duration() < 0 {
|
||||||
return fmt.Errorf("interval shouldn't be lower than 0")
|
return fmt.Errorf("interval shouldn't be lower than 0")
|
||||||
}
|
}
|
||||||
// if `eval_offset` is set, the group interval must be specified explicitly(instead of inherited from global evaluationInterval flag) and must bigger than offset.
|
if g.EvalOffset.Duration() < 0 {
|
||||||
if g.EvalOffset.Duration().Abs() > g.Interval.Duration() {
|
return fmt.Errorf("eval_offset shouldn't be lower than 0")
|
||||||
return fmt.Errorf("the abs value of eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
|
}
|
||||||
|
// if `eval_offset` is set, interval won't use global evaluationInterval flag and must bigger than offset.
|
||||||
|
if g.EvalOffset.Duration() > g.Interval.Duration() {
|
||||||
|
return fmt.Errorf("eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
|
||||||
}
|
}
|
||||||
if g.EvalOffset != nil && g.EvalDelay != nil {
|
if g.EvalOffset != nil && g.EvalDelay != nil {
|
||||||
return fmt.Errorf("eval_offset cannot be used with eval_delay")
|
return fmt.Errorf("eval_offset cannot be used with eval_delay")
|
||||||
}
|
}
|
||||||
if g.Limit != nil && *g.Limit < 0 {
|
if g.Limit < 0 {
|
||||||
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", *g.Limit)
|
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
|
||||||
}
|
}
|
||||||
if g.Concurrency < 0 {
|
if g.Concurrency < 0 {
|
||||||
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)
|
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)
|
||||||
|
|||||||
@@ -116,7 +116,7 @@ func TestParse_Failure(t *testing.T) {
|
|||||||
|
|
||||||
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
|
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
|
||||||
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
|
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
|
||||||
f([]string{"testdata/dir/rules0-bad.rules"}, "invalid annotations")
|
f([]string{"testdata/dir/rules0-bad.rules"}, "error parsing annotation")
|
||||||
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
|
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
|
||||||
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
|
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
|
||||||
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
|
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
|
||||||
@@ -176,21 +176,14 @@ func TestGroupValidate_Failure(t *testing.T) {
|
|||||||
}, false, "interval shouldn't be lower than 0")
|
}, false, "interval shouldn't be lower than 0")
|
||||||
|
|
||||||
f(&Group{
|
f(&Group{
|
||||||
Name: "too big eval_offset",
|
Name: "wrong eval_offset",
|
||||||
Interval: promutil.NewDuration(time.Minute),
|
Interval: promutil.NewDuration(time.Minute),
|
||||||
EvalOffset: promutil.NewDuration(2 * time.Minute),
|
EvalOffset: promutil.NewDuration(2 * time.Minute),
|
||||||
}, false, "eval_offset should be smaller than interval")
|
}, false, "eval_offset should be smaller than interval")
|
||||||
|
|
||||||
f(&Group{
|
|
||||||
Name: "too big negative eval_offset",
|
|
||||||
Interval: promutil.NewDuration(time.Minute),
|
|
||||||
EvalOffset: promutil.NewDuration(-2 * time.Minute),
|
|
||||||
}, false, "eval_offset should be smaller than interval")
|
|
||||||
|
|
||||||
limit := -1
|
|
||||||
f(&Group{
|
f(&Group{
|
||||||
Name: "wrong limit",
|
Name: "wrong limit",
|
||||||
Limit: &limit,
|
Limit: -1,
|
||||||
}, false, "invalid limit")
|
}, false, "invalid limit")
|
||||||
|
|
||||||
f(&Group{
|
f(&Group{
|
||||||
@@ -349,6 +342,7 @@ func TestGroupValidate_Failure(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}, true, "bad prometheus expr")
|
}, true, "bad prometheus expr")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGroupValidate_Success(t *testing.T) {
|
func TestGroupValidate_Success(t *testing.T) {
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package config
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"slices"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
|
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
|
||||||
@@ -77,12 +76,13 @@ func (t *Type) ValidateExpr(expr string) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
|
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
|
||||||
}
|
}
|
||||||
labels, err := q.GetStatsLabels()
|
fields, _ := q.GetStatsByFields()
|
||||||
if err != nil {
|
for i := range fields {
|
||||||
return fmt.Errorf("cannot obtain labels from LogsQL expr: %q, err: %w", expr, err)
|
// VictoriaLogs inserts `_time` field as a label in result when query with `stats by (_time:step)`,
|
||||||
}
|
// making the result meaningless and may lead to cardinality issues.
|
||||||
if slices.Contains(labels, "_time") {
|
if fields[i] == "_time" {
|
||||||
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
|
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("unknown datasource type=%q", t.Name)
|
return fmt.Errorf("unknown datasource type=%q", t.Name)
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"maps"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -92,7 +91,9 @@ func (c *Client) Clone() *Client {
|
|||||||
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
|
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
|
||||||
copy(ns.extraHeaders, c.extraHeaders)
|
copy(ns.extraHeaders, c.extraHeaders)
|
||||||
}
|
}
|
||||||
maps.Copy(ns.extraParams, c.extraParams)
|
for k, v := range c.extraParams {
|
||||||
|
ns.extraParams[k] = v
|
||||||
|
}
|
||||||
|
|
||||||
return ns
|
return ns
|
||||||
}
|
}
|
||||||
@@ -172,26 +173,22 @@ func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result,
|
|||||||
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
defer func() { _ = resp.Body.Close() }()
|
|
||||||
|
|
||||||
// Process the received response.
|
// Process the received response.
|
||||||
var parseFn func(resp *http.Response) (Result, error)
|
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
|
||||||
switch c.dataSourceType {
|
switch c.dataSourceType {
|
||||||
case datasourcePrometheus:
|
case datasourcePrometheus:
|
||||||
parseFn = parsePrometheusInstantResponse
|
parseFn = parsePrometheusResponse
|
||||||
case datasourceGraphite:
|
case datasourceGraphite:
|
||||||
parseFn = parseGraphiteResponse
|
parseFn = parseGraphiteResponse
|
||||||
case datasourceVLogs:
|
case datasourceVLogs:
|
||||||
parseFn = parseVLogsInstantResponse
|
parseFn = parseVLogsResponse
|
||||||
default:
|
default:
|
||||||
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
|
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
|
||||||
}
|
}
|
||||||
|
result, err := parseFn(req, resp)
|
||||||
result, err := parseFn(resp)
|
_ = resp.Body.Close()
|
||||||
if err != nil {
|
return result, req, err
|
||||||
return Result{}, nil, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
|
|
||||||
}
|
|
||||||
return result, req, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// QueryRange executes the given query on the given time range.
|
// QueryRange executes the given query on the given time range.
|
||||||
@@ -232,23 +229,19 @@ func (c *Client) QueryRange(ctx context.Context, query string, start, end time.T
|
|||||||
return res, fmt.Errorf("second attempt: %w", err)
|
return res, fmt.Errorf("second attempt: %w", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
defer func() { _ = resp.Body.Close() }()
|
|
||||||
|
|
||||||
// Process the received response.
|
// Process the received response.
|
||||||
var parseFn func(resp *http.Response) (Result, error)
|
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
|
||||||
switch c.dataSourceType {
|
switch c.dataSourceType {
|
||||||
case datasourcePrometheus:
|
case datasourcePrometheus:
|
||||||
parseFn = parsePrometheusRangeResponse
|
parseFn = parsePrometheusResponse
|
||||||
case datasourceVLogs:
|
case datasourceVLogs:
|
||||||
parseFn = parseVLogsRangeResponse
|
parseFn = parseVLogsResponse
|
||||||
default:
|
default:
|
||||||
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
|
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
|
||||||
}
|
}
|
||||||
|
res, err = parseFn(req, resp)
|
||||||
res, err = parseFn(resp)
|
_ = resp.Body.Close()
|
||||||
if err != nil {
|
|
||||||
return Result{}, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
|
|
||||||
}
|
|
||||||
return res, err
|
return res, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -33,10 +33,10 @@ func (r graphiteResponse) metrics() []Metric {
|
|||||||
return ms
|
return ms
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseGraphiteResponse(resp *http.Response) (Result, error) {
|
func parseGraphiteResponse(req *http.Request, resp *http.Response) (Result, error) {
|
||||||
r := &graphiteResponse{}
|
r := &graphiteResponse{}
|
||||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||||
return Result{}, fmt.Errorf("error parsing graphite metrics: %w", err)
|
return Result{}, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
|
||||||
}
|
}
|
||||||
return Result{Data: r.metrics()}, nil
|
return Result{Data: r.metrics()}, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ type promResponse struct {
|
|||||||
// Stats supported by VictoriaMetrics since v1.90
|
// Stats supported by VictoriaMetrics since v1.90
|
||||||
Stats struct {
|
Stats struct {
|
||||||
SeriesFetched *string `json:"seriesFetched,omitempty"`
|
SeriesFetched *string `json:"seriesFetched,omitempty"`
|
||||||
} `json:"stats"`
|
} `json:"stats,omitempty"`
|
||||||
// IsPartial supported by VictoriaMetrics
|
// IsPartial supported by VictoriaMetrics
|
||||||
IsPartial *bool `json:"isPartial,omitempty"`
|
IsPartial *bool `json:"isPartial,omitempty"`
|
||||||
}
|
}
|
||||||
@@ -172,26 +172,17 @@ const (
|
|||||||
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
|
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
|
||||||
)
|
)
|
||||||
|
|
||||||
func parsePromResponse(resp *http.Response) (*promResponse, error) {
|
func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
|
||||||
r := &promResponse{}
|
r := &promResponse{}
|
||||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
return res, fmt.Errorf("error parsing response from %s: %w", req.URL.Redacted(), err)
|
||||||
}
|
}
|
||||||
if r.Status == statusError {
|
if r.Status == statusError {
|
||||||
return nil, fmt.Errorf("response error %q: %s", r.ErrorType, r.Error)
|
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
|
||||||
}
|
}
|
||||||
if r.Status != statusSuccess {
|
if r.Status != statusSuccess {
|
||||||
return nil, fmt.Errorf("unknown response status %q", r.Status)
|
return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
|
||||||
}
|
}
|
||||||
return r, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error) {
|
|
||||||
r, err := parsePromResponse(resp)
|
|
||||||
if err != nil {
|
|
||||||
return res, fmt.Errorf("failed to parse response: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var parseFn func() ([]Metric, error)
|
var parseFn func() ([]Metric, error)
|
||||||
switch r.Data.ResultType {
|
switch r.Data.ResultType {
|
||||||
case rtVector:
|
case rtVector:
|
||||||
@@ -200,6 +191,12 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
|
|||||||
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
|
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
|
||||||
}
|
}
|
||||||
parseFn = pi.metrics
|
parseFn = pi.metrics
|
||||||
|
case rtMatrix:
|
||||||
|
var pr promRange
|
||||||
|
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
parseFn = pr.metrics
|
||||||
case rScalar:
|
case rScalar:
|
||||||
var ps promScalar
|
var ps promScalar
|
||||||
if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
|
if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
|
||||||
@@ -209,6 +206,7 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
|
|||||||
default:
|
default:
|
||||||
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
|
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
|
||||||
}
|
}
|
||||||
|
|
||||||
ms, err := parseFn()
|
ms, err := parseFn()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return res, err
|
return res, err
|
||||||
@@ -224,34 +222,6 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
|
|||||||
return res, nil
|
return res, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func parsePrometheusRangeResponse(resp *http.Response) (res Result, err error) {
|
|
||||||
r, err := parsePromResponse(resp)
|
|
||||||
if err != nil {
|
|
||||||
return res, fmt.Errorf("failed to parse response: %w", err)
|
|
||||||
}
|
|
||||||
if r.Data.ResultType != rtMatrix {
|
|
||||||
return res, fmt.Errorf("unexpected result type %q; expected result type %q", r.Data.ResultType, rtMatrix)
|
|
||||||
}
|
|
||||||
|
|
||||||
var pr promRange
|
|
||||||
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
|
|
||||||
return res, err
|
|
||||||
}
|
|
||||||
ms, err := pr.metrics()
|
|
||||||
if err != nil {
|
|
||||||
return res, err
|
|
||||||
}
|
|
||||||
res = Result{Data: ms, IsPartial: r.IsPartial}
|
|
||||||
if r.Stats.SeriesFetched != nil {
|
|
||||||
intV, err := strconv.Atoi(*r.Stats.SeriesFetched)
|
|
||||||
if err != nil {
|
|
||||||
return res, fmt.Errorf("failed to convert stats.seriesFetched to int: %w", err)
|
|
||||||
}
|
|
||||||
res.SeriesFetched = &intV
|
|
||||||
}
|
|
||||||
return res, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
|
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
|
||||||
if c.appendTypePrefix {
|
if c.appendTypePrefix {
|
||||||
r.URL.Path += "/prometheus"
|
r.URL.Path += "/prometheus"
|
||||||
|
|||||||
@@ -65,23 +65,21 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
case 3:
|
case 3:
|
||||||
w.Write([]byte(`{"status":"unknown"}`))
|
w.Write([]byte(`{"status":"unknown"}`))
|
||||||
case 4:
|
case 4:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector"}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
|
||||||
case 5:
|
case 5:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
|
|
||||||
case 6:
|
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||||
case 7:
|
case 6:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||||
case 8:
|
case 7:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
|
||||||
case 9:
|
case 8:
|
||||||
w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) {
|
mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
c++
|
c++
|
||||||
switch c {
|
switch c {
|
||||||
case 10:
|
case 9:
|
||||||
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -104,9 +102,9 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
|
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
|
||||||
}
|
}
|
||||||
switch c {
|
switch c {
|
||||||
case 11:
|
case 10:
|
||||||
w.Write([]byte("[]"))
|
w.Write([]byte("[]"))
|
||||||
case 12:
|
case 11:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -125,7 +123,6 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
ts := time.Now()
|
ts := time.Now()
|
||||||
|
|
||||||
expErr := func(query, err string) {
|
expErr := func(query, err string) {
|
||||||
t.Helper()
|
|
||||||
_, _, gotErr := pq.Query(ctx, query, ts)
|
_, _, gotErr := pq.Query(ctx, query, ts)
|
||||||
if gotErr == nil {
|
if gotErr == nil {
|
||||||
t.Fatalf("expected %q got nil", err)
|
t.Fatalf("expected %q got nil", err)
|
||||||
@@ -138,11 +135,10 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
expErr(vmQuery, "500") // 0
|
expErr(vmQuery, "500") // 0
|
||||||
expErr(vmQuery, "error parsing response") // 1
|
expErr(vmQuery, "error parsing response") // 1
|
||||||
expErr(vmQuery, "response error") // 2
|
expErr(vmQuery, "response error") // 2
|
||||||
expErr(vmQuery, "unknown response status") // 3
|
expErr(vmQuery, "unknown status") // 3
|
||||||
expErr(vmQuery, "unexpected end of JSON input") // 4
|
expErr(vmQuery, "unexpected end of JSON input") // 4
|
||||||
expErr(vmQuery, "unknown result type") // 5
|
|
||||||
|
|
||||||
res, _, err := pq.Query(ctx, vmQuery, ts) // 6 - vector
|
res, _, err := pq.Query(ctx, vmQuery, ts) // 5 - vector
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
@@ -163,7 +159,7 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
}
|
}
|
||||||
metricsEqual(t, res.Data, expected)
|
metricsEqual(t, res.Data, expected)
|
||||||
|
|
||||||
res, req, err := pq.Query(ctx, vmQuery, ts) // 7 - scalar
|
res, req, err := pq.Query(ctx, vmQuery, ts) // 6 - scalar
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
@@ -188,7 +184,7 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
res.SeriesFetched)
|
res.SeriesFetched)
|
||||||
}
|
}
|
||||||
|
|
||||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 8 - scalar with stats
|
res, _, err = pq.Query(ctx, vmQuery, ts) // 7 - scalar with stats
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
@@ -209,7 +205,7 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
*res.SeriesFetched)
|
*res.SeriesFetched)
|
||||||
}
|
}
|
||||||
|
|
||||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 9
|
res, _, err = pq.Query(ctx, vmQuery, ts) // 8
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
@@ -220,7 +216,7 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
// test graphite
|
// test graphite
|
||||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||||
|
|
||||||
res, _, err = gq.Query(ctx, queryRender, ts) // 10 - graphite
|
res, _, err = gq.Query(ctx, queryRender, ts) // 9 - graphite
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
@@ -240,9 +236,9 @@ func TestVMInstantQuery(t *testing.T) {
|
|||||||
vlogs := datasourceVLogs
|
vlogs := datasourceVLogs
|
||||||
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
|
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
|
||||||
|
|
||||||
expErr(vlogsQuery, "error parsing response") // 11
|
expErr(vlogsQuery, "error parsing response") // 10
|
||||||
|
|
||||||
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 12
|
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 11
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
@@ -394,8 +390,6 @@ func TestVMRangeQuery(t *testing.T) {
|
|||||||
switch c {
|
switch c {
|
||||||
case 0:
|
case 0:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
|
||||||
case 1:
|
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[1583786142, "1"]}}`))
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
|
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
|
||||||
@@ -428,7 +422,7 @@ func TestVMRangeQuery(t *testing.T) {
|
|||||||
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
|
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
|
||||||
}
|
}
|
||||||
switch c {
|
switch c {
|
||||||
case 2:
|
case 1:
|
||||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
|
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -452,13 +446,13 @@ func TestVMRangeQuery(t *testing.T) {
|
|||||||
|
|
||||||
start, end := time.Now().Add(-time.Minute), time.Now()
|
start, end := time.Now().Add(-time.Minute), time.Now()
|
||||||
|
|
||||||
res, err := pq.QueryRange(ctx, vmQuery, start, end) // case 0
|
res, err := pq.QueryRange(ctx, vmQuery, start, end)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected %s", err)
|
t.Fatalf("unexpected %s", err)
|
||||||
}
|
}
|
||||||
m := res.Data
|
m := res.Data
|
||||||
if len(m) != 1 {
|
if len(m) != 1 {
|
||||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||||
}
|
}
|
||||||
expected := Metric{
|
expected := Metric{
|
||||||
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}},
|
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}},
|
||||||
@@ -469,9 +463,6 @@ func TestVMRangeQuery(t *testing.T) {
|
|||||||
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = pq.QueryRange(ctx, vmQuery, start, end) // case 1
|
|
||||||
expectError(t, err, "unexpected result type")
|
|
||||||
|
|
||||||
// test unsupported graphite
|
// test unsupported graphite
|
||||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||||
|
|
||||||
|
|||||||
@@ -40,28 +40,8 @@ func (c *Client) setVLogsRangeReqParams(r *http.Request, query string, start, en
|
|||||||
c.setReqParams(r, query)
|
c.setReqParams(r, query)
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseVLogsInstantResponse(resp *http.Response) (res Result, err error) {
|
func parseVLogsResponse(req *http.Request, resp *http.Response) (res Result, err error) {
|
||||||
res, err = parsePrometheusInstantResponse(resp)
|
res, err = parsePrometheusResponse(req, resp)
|
||||||
if err != nil {
|
|
||||||
return Result{}, err
|
|
||||||
}
|
|
||||||
for i := range res.Data {
|
|
||||||
m := &res.Data[i]
|
|
||||||
for j := range m.Labels {
|
|
||||||
// reserve the stats func result name with a new label `stats_result` instead of dropping it,
|
|
||||||
// since there could be multiple stats results in a single query, for instance:
|
|
||||||
// _time:5m | stats quantile(0.5, request_duration_seconds) p50, quantile(0.9, request_duration_seconds) p90
|
|
||||||
if m.Labels[j].Name == "__name__" {
|
|
||||||
m.Labels[j].Name = "stats_result"
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseVLogsRangeResponse(resp *http.Response) (res Result, err error) {
|
|
||||||
res, err = parsePrometheusRangeResponse(resp)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Result{}, err
|
return Result{}, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -132,9 +132,12 @@ func (ls Labels) String() string {
|
|||||||
// a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1
|
// a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1
|
||||||
// a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0
|
// a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0
|
||||||
func LabelCompare(a, b Labels) int {
|
func LabelCompare(a, b Labels) int {
|
||||||
l := min(len(b), len(a))
|
l := len(a)
|
||||||
|
if len(b) < l {
|
||||||
|
l = len(b)
|
||||||
|
}
|
||||||
|
|
||||||
for i := range l {
|
for i := 0; i < l; i++ {
|
||||||
if a[i].Name != b[i].Name {
|
if a[i].Name != b[i].Name {
|
||||||
if a[i].Name < b[i].Name {
|
if a[i].Name < b[i].Name {
|
||||||
return -1
|
return -1
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ func BenchmarkPromInstantUnmarshal(b *testing.B) {
|
|||||||
|
|
||||||
// BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op
|
// BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op
|
||||||
b.Run("Instant std+fastjson", func(b *testing.B) {
|
b.Run("Instant std+fastjson", func(b *testing.B) {
|
||||||
for range b.N {
|
for i := 0; i < b.N; i++ {
|
||||||
var pi promInstant
|
var pi promInstant
|
||||||
err = pi.Unmarshal(data)
|
err = pi.Unmarshal(data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -56,7 +57,7 @@ absolute path to all .tpl files in root.
|
|||||||
-rule.templates="dir/**/*.tpl". Includes all the .tpl files in "dir" subfolders recursively.
|
-rule.templates="dir/**/*.tpl". Includes all the .tpl files in "dir" subfolders recursively.
|
||||||
`)
|
`)
|
||||||
|
|
||||||
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule', '-rule.templates' and '-notifier.config' files. "+
|
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule' or '-notifier.config' files. "+
|
||||||
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
|
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
|
||||||
|
|
||||||
httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "Address to listen for incoming http requests. See also -tls and -httpListenAddr.useProxyProtocol")
|
httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "Address to listen for incoming http requests. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||||
@@ -76,12 +77,15 @@ absolute path to all .tpl files in root.
|
|||||||
`Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+
|
`Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+
|
||||||
`If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
|
`If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
|
||||||
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
|
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
|
||||||
"In case of conflicts, original labels are kept with prefix 'exported_'.")
|
"In case of conflicts, original labels are kept with prefix `exported_`.")
|
||||||
|
|
||||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
|
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
|
||||||
)
|
)
|
||||||
|
|
||||||
var extURL *url.URL
|
var (
|
||||||
|
alertURLGeneratorFn notifier.AlertURLGenerator
|
||||||
|
extURL *url.URL
|
||||||
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||||
@@ -117,7 +121,7 @@ func main() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
err = notifier.InitAlertURLGeneratorFn(extURL, *externalAlertSource, *validateTemplates)
|
alertURLGeneratorFn, err = getAlertURLGenerator(extURL, *externalAlertSource, *validateTemplates)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Fatalf("failed to init `external.alert.source`: %s", err)
|
logger.Fatalf("failed to init `external.alert.source`: %s", err)
|
||||||
}
|
}
|
||||||
@@ -159,7 +163,7 @@ func main() {
|
|||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
manager, err := newManager(ctx)
|
manager, err := newManager(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Fatalf("failed to create manager: %s", err)
|
logger.Fatalf("failed to init: %s", err)
|
||||||
}
|
}
|
||||||
logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";"))
|
logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";"))
|
||||||
groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)
|
groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)
|
||||||
@@ -224,13 +228,14 @@ func newManager(ctx context.Context) (*manager, error) {
|
|||||||
labels[s[:n]] = s[n+1:]
|
labels[s[:n]] = s[n+1:]
|
||||||
}
|
}
|
||||||
|
|
||||||
err = notifier.Init(labels, *externalURL)
|
nts, err := notifier.Init(alertURLGeneratorFn, labels, *externalURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to init notifier: %w", err)
|
return nil, fmt.Errorf("failed to init notifier: %w", err)
|
||||||
}
|
}
|
||||||
manager := &manager{
|
manager := &manager{
|
||||||
groups: make(map[uint64]*rule.Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
querierBuilder: q,
|
querierBuilder: q,
|
||||||
|
notifiers: nts,
|
||||||
labels: labels,
|
labels: labels,
|
||||||
}
|
}
|
||||||
rw, err := remotewrite.Init(ctx)
|
rw, err := remotewrite.Init(ctx)
|
||||||
@@ -287,6 +292,35 @@ func getHostnameAsExternalURL(addr string, isSecure bool) (*url.URL, error) {
|
|||||||
return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
|
return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, validateTemplate bool) (notifier.AlertURLGenerator, error) {
|
||||||
|
if externalAlertSource == "" {
|
||||||
|
return func(a notifier.Alert) string {
|
||||||
|
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
|
||||||
|
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, paramGroupID, gID, paramAlertID, aID)
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
if validateTemplate {
|
||||||
|
if err := notifier.ValidateTemplates(map[string]string{
|
||||||
|
"tpl": externalAlertSource,
|
||||||
|
}); err != nil {
|
||||||
|
return nil, fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m := map[string]string{
|
||||||
|
"tpl": externalAlertSource,
|
||||||
|
}
|
||||||
|
return func(alert notifier.Alert) string {
|
||||||
|
qFn := func(_ string) ([]datasource.Metric, error) {
|
||||||
|
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
|
||||||
|
}
|
||||||
|
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("cannot template alert source: %s", err)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
func usage() {
|
func usage() {
|
||||||
const s = `
|
const s = `
|
||||||
vmalert processes alerts and recording rules.
|
vmalert processes alerts and recording rules.
|
||||||
|
|||||||
@@ -49,6 +49,30 @@ func TestGetExternalURL(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetAlertURLGenerator(t *testing.T) {
|
||||||
|
testAlert := notifier.Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
|
||||||
|
u, _ := url.Parse("https://victoriametrics.com/path")
|
||||||
|
fn, err := getAlertURLGenerator(u, "", false)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error %s", err)
|
||||||
|
}
|
||||||
|
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", paramGroupID, paramAlertID)
|
||||||
|
if exp != fn(testAlert) {
|
||||||
|
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
|
||||||
|
}
|
||||||
|
_, err = getAlertURLGenerator(nil, "foo?{{invalid}}", true)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected template validation error got nil")
|
||||||
|
}
|
||||||
|
fn, err = getAlertURLGenerator(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error %s", err)
|
||||||
|
}
|
||||||
|
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != fn(testAlert) {
|
||||||
|
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestConfigReload(t *testing.T) {
|
func TestConfigReload(t *testing.T) {
|
||||||
originalRulePath := *rulePath
|
originalRulePath := *rulePath
|
||||||
originalExternalURL := extURL
|
originalExternalURL := extURL
|
||||||
@@ -96,10 +120,9 @@ groups:
|
|||||||
querierBuilder: &datasource.FakeQuerier{},
|
querierBuilder: &datasource.FakeQuerier{},
|
||||||
groups: make(map[uint64]*rule.Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
labels: map[string]string{},
|
labels: map[string]string{},
|
||||||
|
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||||
rw: &remotewrite.Client{},
|
rw: &remotewrite.Client{},
|
||||||
}
|
}
|
||||||
_, cleanup := notifier.InitFakeNotifier()
|
|
||||||
defer cleanup()
|
|
||||||
|
|
||||||
syncCh := make(chan struct{})
|
syncCh := make(chan struct{})
|
||||||
sighupCh := procutil.NewSighupChan()
|
sighupCh := procutil.NewSighupChan()
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||||
@@ -17,6 +16,7 @@ import (
|
|||||||
// manager controls group states
|
// manager controls group states
|
||||||
type manager struct {
|
type manager struct {
|
||||||
querierBuilder datasource.QuerierBuilder
|
querierBuilder datasource.QuerierBuilder
|
||||||
|
notifiers func() []notifier.Notifier
|
||||||
|
|
||||||
rw remotewrite.RWClient
|
rw remotewrite.RWClient
|
||||||
// remote read builder.
|
// remote read builder.
|
||||||
@@ -29,8 +29,25 @@ type manager struct {
|
|||||||
groups map[uint64]*rule.Group
|
groups map[uint64]*rule.Group
|
||||||
}
|
}
|
||||||
|
|
||||||
// groupAPI generates apiGroup object from group by its ID(hash)
|
// ruleAPI generates apiRule object from alert by its ID(hash)
|
||||||
func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
|
func (m *manager) ruleAPI(gID, rID uint64) (apiRule, error) {
|
||||||
|
m.groupsMu.RLock()
|
||||||
|
defer m.groupsMu.RUnlock()
|
||||||
|
|
||||||
|
g, ok := m.groups[gID]
|
||||||
|
if !ok {
|
||||||
|
return apiRule{}, fmt.Errorf("can't find group with id %d", gID)
|
||||||
|
}
|
||||||
|
for _, rule := range g.Rules {
|
||||||
|
if rule.ID() == rID {
|
||||||
|
return ruleToAPI(rule), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return apiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// alertAPI generates apiAlert object from alert by its ID(hash)
|
||||||
|
func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
|
||||||
m.groupsMu.RLock()
|
m.groupsMu.RLock()
|
||||||
defer m.groupsMu.RUnlock()
|
defer m.groupsMu.RUnlock()
|
||||||
|
|
||||||
@@ -38,47 +55,13 @@ func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
|
|||||||
if !ok {
|
if !ok {
|
||||||
return nil, fmt.Errorf("can't find group with id %d", gID)
|
return nil, fmt.Errorf("can't find group with id %d", gID)
|
||||||
}
|
}
|
||||||
return g.ToAPI(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// ruleAPI generates apiRule object from alert by its ID(hash)
|
|
||||||
func (m *manager) ruleAPI(gID, rID uint64) (rule.ApiRule, error) {
|
|
||||||
m.groupsMu.RLock()
|
|
||||||
defer m.groupsMu.RUnlock()
|
|
||||||
|
|
||||||
group, ok := m.groups[gID]
|
|
||||||
if !ok {
|
|
||||||
return rule.ApiRule{}, fmt.Errorf("can't find group with id %d", gID)
|
|
||||||
}
|
|
||||||
g := group.ToAPI()
|
|
||||||
ruleID := strconv.FormatUint(rID, 10)
|
|
||||||
for _, r := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
if r.ID == ruleID {
|
ar, ok := r.(*rule.AlertingRule)
|
||||||
return r, nil
|
if !ok {
|
||||||
}
|
|
||||||
}
|
|
||||||
return rule.ApiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// alertAPI generates apiAlert object from alert by its ID(hash)
|
|
||||||
func (m *manager) alertAPI(gID, aID uint64) (*rule.ApiAlert, error) {
|
|
||||||
m.groupsMu.RLock()
|
|
||||||
defer m.groupsMu.RUnlock()
|
|
||||||
|
|
||||||
group, ok := m.groups[gID]
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("can't find group with id %d", gID)
|
|
||||||
}
|
|
||||||
g := group.ToAPI()
|
|
||||||
for _, r := range g.Rules {
|
|
||||||
if r.Type != rule.TypeAlerting {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
alertID := strconv.FormatUint(aID, 10)
|
if apiAlert := alertToAPI(ar, aID); apiAlert != nil {
|
||||||
for _, a := range r.Alerts {
|
return apiAlert, nil
|
||||||
if a.ID == alertID {
|
|
||||||
return a, nil
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)
|
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)
|
||||||
@@ -98,18 +81,20 @@ func (m *manager) close() {
|
|||||||
m.wg.Wait()
|
m.wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) {
|
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
|
||||||
|
m.wg.Add(1)
|
||||||
id := g.GetID()
|
id := g.GetID()
|
||||||
g.Init()
|
g.Init()
|
||||||
m.wg.Go(func() {
|
go func() {
|
||||||
|
defer m.wg.Done()
|
||||||
if restore {
|
if restore {
|
||||||
g.Start(ctx, m.rw, m.rr)
|
g.Start(ctx, m.notifiers, m.rw, m.rr)
|
||||||
} else {
|
} else {
|
||||||
g.Start(ctx, m.rw, nil)
|
g.Start(ctx, m.notifiers, m.rw, nil)
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
|
|
||||||
m.groups[id] = g
|
m.groups[id] = g
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
|
func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
|
||||||
@@ -118,7 +103,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
|||||||
for _, cfg := range groupsCfg {
|
for _, cfg := range groupsCfg {
|
||||||
for _, r := range cfg.Rules {
|
for _, r := range cfg.Rules {
|
||||||
if rrPresent && arPresent {
|
if rrPresent && arPresent {
|
||||||
break
|
continue
|
||||||
}
|
}
|
||||||
if r.Record != "" {
|
if r.Record != "" {
|
||||||
rrPresent = true
|
rrPresent = true
|
||||||
@@ -134,7 +119,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
|||||||
if rrPresent && m.rw == nil {
|
if rrPresent && m.rw == nil {
|
||||||
return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set")
|
return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set")
|
||||||
}
|
}
|
||||||
if arPresent && notifier.GetTargets() == nil {
|
if arPresent && m.notifiers == nil {
|
||||||
return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set")
|
return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -161,22 +146,25 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, ng := range groupsRegistry {
|
for _, ng := range groupsRegistry {
|
||||||
m.startGroup(ctx, ng, restore)
|
if err := m.startGroup(ctx, ng, restore); err != nil {
|
||||||
|
m.groupsMu.Unlock()
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
m.groupsMu.Unlock()
|
m.groupsMu.Unlock()
|
||||||
|
|
||||||
if len(toUpdate) > 0 {
|
if len(toUpdate) > 0 {
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
for _, item := range toUpdate {
|
for _, item := range toUpdate {
|
||||||
oldG := item.old
|
wg.Add(1)
|
||||||
newG := item.new
|
// cancel evaluation so the Update will be applied as fast as possible.
|
||||||
wg.Go(func() {
|
// it is important to call InterruptEval before the update, because cancel fn
|
||||||
// cancel evaluation so the Update will be applied as fast as possible.
|
// can be re-assigned during the update.
|
||||||
// it is important to call InterruptEval before the update, because cancel fn
|
item.old.InterruptEval()
|
||||||
// can be re-assigned during the update.
|
go func(oldGroup *rule.Group, newGroup *rule.Group) {
|
||||||
oldG.InterruptEval()
|
oldGroup.UpdateWith(newGroup)
|
||||||
oldG.UpdateWith(newG)
|
wg.Done()
|
||||||
})
|
}(item.old, item.new)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,11 +40,10 @@ func TestManagerEmptyRulesDir(t *testing.T) {
|
|||||||
// execution of configuration update.
|
// execution of configuration update.
|
||||||
// Should be executed with -race flag
|
// Should be executed with -race flag
|
||||||
func TestManagerUpdateConcurrent(t *testing.T) {
|
func TestManagerUpdateConcurrent(t *testing.T) {
|
||||||
_, cleanup := notifier.InitFakeNotifier()
|
|
||||||
defer cleanup()
|
|
||||||
m := &manager{
|
m := &manager{
|
||||||
groups: make(map[uint64]*rule.Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
querierBuilder: &datasource.FakeQuerier{},
|
querierBuilder: &datasource.FakeQuerier{},
|
||||||
|
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||||
}
|
}
|
||||||
paths := []string{
|
paths := []string{
|
||||||
"config/testdata/dir/rules0-good.rules",
|
"config/testdata/dir/rules0-good.rules",
|
||||||
@@ -65,11 +64,13 @@ func TestManagerUpdateConcurrent(t *testing.T) {
|
|||||||
|
|
||||||
const workers = 500
|
const workers = 500
|
||||||
const iterations = 10
|
const iterations = 10
|
||||||
var wg sync.WaitGroup
|
wg := sync.WaitGroup{}
|
||||||
for n := range workers {
|
wg.Add(workers)
|
||||||
wg.Go(func() {
|
for i := 0; i < workers; i++ {
|
||||||
|
go func(n int) {
|
||||||
|
defer wg.Done()
|
||||||
r := rand.New(rand.NewSource(int64(n)))
|
r := rand.New(rand.NewSource(int64(n)))
|
||||||
for range iterations {
|
for i := 0; i < iterations; i++ {
|
||||||
rnd := r.Intn(len(paths))
|
rnd := r.Intn(len(paths))
|
||||||
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
|
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
|
||||||
if err != nil { // update can fail and this is expected
|
if err != nil { // update can fail and this is expected
|
||||||
@@ -77,7 +78,7 @@ func TestManagerUpdateConcurrent(t *testing.T) {
|
|||||||
}
|
}
|
||||||
_ = m.update(context.Background(), cfg, false)
|
_ = m.update(context.Background(), cfg, false)
|
||||||
}
|
}
|
||||||
})
|
}(i)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
@@ -126,9 +127,8 @@ func TestManagerUpdate_Success(t *testing.T) {
|
|||||||
m := &manager{
|
m := &manager{
|
||||||
groups: make(map[uint64]*rule.Group),
|
groups: make(map[uint64]*rule.Group),
|
||||||
querierBuilder: &datasource.FakeQuerier{},
|
querierBuilder: &datasource.FakeQuerier{},
|
||||||
|
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||||
}
|
}
|
||||||
_, cleanup := notifier.InitFakeNotifier()
|
|
||||||
defer cleanup()
|
|
||||||
|
|
||||||
cfgInit := loadCfg(t, []string{initPath}, true, true)
|
cfgInit := loadCfg(t, []string{initPath}, true, true)
|
||||||
if err := m.update(ctx, cfgInit, false); err != nil {
|
if err := m.update(ctx, cfgInit, false); err != nil {
|
||||||
@@ -259,7 +259,7 @@ func compareGroups(t *testing.T, a, b *rule.Group) {
|
|||||||
for i, r := range a.Rules {
|
for i, r := range a.Rules {
|
||||||
got, want := r, b.Rules[i]
|
got, want := r, b.Rules[i]
|
||||||
if a.CreateID() != b.CreateID() {
|
if a.CreateID() != b.CreateID() {
|
||||||
t.Fatalf("expected to have rule %d; got %d", want.ID(), got.ID())
|
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
|
||||||
}
|
}
|
||||||
if err := rule.CompareRules(t, want, got); err != nil {
|
if err := rule.CompareRules(t, want, got); err != nil {
|
||||||
t.Fatalf("comparison error: %s", err)
|
t.Fatalf("comparison error: %s", err)
|
||||||
@@ -277,8 +277,7 @@ func TestManagerUpdate_Failure(t *testing.T) {
|
|||||||
rw: rw,
|
rw: rw,
|
||||||
}
|
}
|
||||||
if notifiers != nil {
|
if notifiers != nil {
|
||||||
_, cleanup := notifier.InitFakeNotifier()
|
m.notifiers = func() []notifier.Notifier { return notifiers }
|
||||||
defer cleanup()
|
|
||||||
}
|
}
|
||||||
err := m.update(context.Background(), []config.Group{cfg}, false)
|
err := m.update(context.Background(), []config.Group{cfg}, false)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
|||||||
@@ -80,15 +80,14 @@ func (as AlertState) String() string {
|
|||||||
|
|
||||||
// AlertTplData is used to execute templating
|
// AlertTplData is used to execute templating
|
||||||
type AlertTplData struct {
|
type AlertTplData struct {
|
||||||
Type string
|
Type string
|
||||||
Labels map[string]string
|
Labels map[string]string
|
||||||
Value float64
|
Value float64
|
||||||
Expr string
|
Expr string
|
||||||
AlertID uint64
|
AlertID uint64
|
||||||
GroupID uint64
|
GroupID uint64
|
||||||
ActiveAt time.Time
|
ActiveAt time.Time
|
||||||
For time.Duration
|
For time.Duration
|
||||||
IsPartial bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var tplHeaders = []string{
|
var tplHeaders = []string{
|
||||||
@@ -102,7 +101,6 @@ var tplHeaders = []string{
|
|||||||
"{{ $groupID := .GroupID }}",
|
"{{ $groupID := .GroupID }}",
|
||||||
"{{ $activeAt := .ActiveAt }}",
|
"{{ $activeAt := .ActiveAt }}",
|
||||||
"{{ $for := .For }}",
|
"{{ $for := .For }}",
|
||||||
"{{ $isPartial := .IsPartial }}",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExecTemplate executes the Alert template for given
|
// ExecTemplate executes the Alert template for given
|
||||||
@@ -168,8 +166,8 @@ func templateAnnotations(annotations map[string]string, data AlertTplData, tmpl
|
|||||||
ctmpl, _ := tmpl.Clone()
|
ctmpl, _ := tmpl.Clone()
|
||||||
ctmpl = ctmpl.Option("missingkey=zero")
|
ctmpl = ctmpl.Option("missingkey=zero")
|
||||||
if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil {
|
if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil {
|
||||||
r[key] = err.Error()
|
r[key] = text
|
||||||
eg.Add(fmt.Errorf("(key: %q, value: %q): %w", key, text, err))
|
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
r[key] = buf.String()
|
r[key] = buf.String()
|
||||||
@@ -186,13 +184,13 @@ type tplData struct {
|
|||||||
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
|
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
|
||||||
tpl, err := tpl.Parse(text)
|
tpl, err := tpl.Parse(text)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error parsing template: %w", err)
|
return fmt.Errorf("error parsing annotation template: %w", err)
|
||||||
}
|
}
|
||||||
if !execute {
|
if !execute {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if err = tpl.Execute(dst, data); err != nil {
|
if err = tpl.Execute(dst, data); err != nil {
|
||||||
return fmt.Errorf("error evaluating template: %w", err)
|
return fmt.Errorf("error evaluating annotation template: %w", err)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ func TestAlertExecTemplate(t *testing.T) {
|
|||||||
)
|
)
|
||||||
extLabels["cluster"] = extCluster
|
extLabels["cluster"] = extCluster
|
||||||
extLabels["dc"] = extDC
|
extLabels["dc"] = extDC
|
||||||
err := Init(extLabels, extURL)
|
_, err := Init(nil, extLabels, extURL)
|
||||||
checkErr(t, err)
|
checkErr(t, err)
|
||||||
|
|
||||||
f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) {
|
f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) {
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package notifier
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -14,6 +13,7 @@ import (
|
|||||||
"github.com/VictoriaMetrics/metrics"
|
"github.com/VictoriaMetrics/metrics"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||||
@@ -22,11 +22,10 @@ import (
|
|||||||
// AlertManager represents integration provider with Prometheus alert manager
|
// AlertManager represents integration provider with Prometheus alert manager
|
||||||
// https://github.com/prometheus/alertmanager
|
// https://github.com/prometheus/alertmanager
|
||||||
type AlertManager struct {
|
type AlertManager struct {
|
||||||
addr *url.URL
|
addr *url.URL
|
||||||
argFunc AlertURLGenerator
|
argFunc AlertURLGenerator
|
||||||
client *http.Client
|
client *http.Client
|
||||||
timeout time.Duration
|
timeout time.Duration
|
||||||
lastError string
|
|
||||||
|
|
||||||
authCfg *promauth.Config
|
authCfg *promauth.Config
|
||||||
// stores already parsed RelabelConfigs object
|
// stores already parsed RelabelConfigs object
|
||||||
@@ -72,42 +71,24 @@ func (am AlertManager) Addr() string {
|
|||||||
return am.addr.Redacted()
|
return am.addr.Redacted()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (am *AlertManager) LastError() string {
|
|
||||||
return am.lastError
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send an alert or resolve message
|
// Send an alert or resolve message
|
||||||
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
|
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[string]string) error {
|
||||||
if len(alerts) != len(alertLabels) {
|
|
||||||
return fmt.Errorf("mismatched number of alerts and label sets after global alert relabeling")
|
|
||||||
}
|
|
||||||
am.metrics.alertsSent.Add(len(alerts))
|
am.metrics.alertsSent.Add(len(alerts))
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
err := am.send(ctx, alerts, alertLabels, headers)
|
err := am.send(ctx, alerts, headers)
|
||||||
am.metrics.alertsSendDuration.UpdateDuration(startTime)
|
am.metrics.alertsSendDuration.UpdateDuration(startTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// the context can be cancelled on graceful shutdown
|
|
||||||
// or on group update. So no need to handle the error as usual.
|
|
||||||
if errors.Is(err, context.Canceled) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
am.metrics.alertsSendErrors.Add(len(alerts))
|
am.metrics.alertsSendErrors.Add(len(alerts))
|
||||||
am.lastError = err.Error()
|
|
||||||
} else {
|
|
||||||
am.lastError = ""
|
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (am *AlertManager) send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
|
func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[string]string) error {
|
||||||
b := &bytes.Buffer{}
|
b := &bytes.Buffer{}
|
||||||
alertsToSend := make([]Alert, 0, len(alerts))
|
alertsToSend := make([]Alert, 0, len(alerts))
|
||||||
lblss := make([][]prompb.Label, 0, len(alerts))
|
lblss := make([][]prompb.Label, 0, len(alerts))
|
||||||
for i, a := range alerts {
|
for _, a := range alerts {
|
||||||
lbls := alertLabels[i]
|
lbls := a.applyRelabelingIfNeeded(am.relabelConfigs)
|
||||||
if am.relabelConfigs != nil {
|
|
||||||
lbls = am.relabelConfigs.Apply(lbls, 0)
|
|
||||||
}
|
|
||||||
if len(lbls) == 0 {
|
if len(lbls) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -171,6 +152,11 @@ const alertManagerPath = "/api/v2/alerts"
|
|||||||
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
|
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
|
||||||
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
|
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
|
||||||
) (*AlertManager, error) {
|
) (*AlertManager, error) {
|
||||||
|
|
||||||
|
if err := httputil.CheckURL(alertManagerURL); err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid alertmanager URL: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
tls := &promauth.TLSConfig{}
|
tls := &promauth.TLSConfig{}
|
||||||
if authCfg.TLSConfig != nil {
|
if authCfg.TLSConfig != nil {
|
||||||
tls = authCfg.TLSConfig
|
tls = authCfg.TLSConfig
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -146,11 +145,11 @@ func TestAlertManager_Send(t *testing.T) {
|
|||||||
t.Fatalf("unexpected error: %s", err)
|
t.Fatalf("unexpected error: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
|
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
|
||||||
t.Fatalf("expected connection error got nil")
|
t.Fatalf("expected connection error got nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
|
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
|
||||||
t.Fatalf("expected wrong http code error got nil")
|
t.Fatalf("expected wrong http code error got nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -161,7 +160,7 @@ func TestAlertManager_Send(t *testing.T) {
|
|||||||
End: time.Now().UTC(),
|
End: time.Now().UTC(),
|
||||||
Labels: map[string]string{"alertname": "alert0"},
|
Labels: map[string]string{"alertname": "alert0"},
|
||||||
Annotations: map[string]string{"a": "b", "c": "d"},
|
Annotations: map[string]string{"a": "b", "c": "d"},
|
||||||
}}, [][]prompb.Label{{{Name: "alertname", Value: "alert0"}}}, map[string]string{headerKey: "bar"}); err != nil {
|
}}, map[string]string{headerKey: "bar"}); err != nil {
|
||||||
t.Fatalf("unexpected error %s", err)
|
t.Fatalf("unexpected error %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -175,7 +174,7 @@ func TestAlertManager_Send(t *testing.T) {
|
|||||||
Name: "alert2",
|
Name: "alert2",
|
||||||
Labels: map[string]string{"rule": "test", "tenant": "1"},
|
Labels: map[string]string{"rule": "test", "tenant": "1"},
|
||||||
},
|
},
|
||||||
}, [][]prompb.Label{{{Name: "rule", Value: "test"}, {Name: "tenant", Value: "0"}}, {{Name: "rule", Value: "test"}, {Name: "tenant", Value: "1"}}}, map[string]string{headerKey: "bar"}); err != nil {
|
}, map[string]string{headerKey: "bar"}); err != nil {
|
||||||
t.Fatalf("unexpected error %s", err)
|
t.Fatalf("unexpected error %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -188,7 +187,7 @@ func TestAlertManager_Send(t *testing.T) {
|
|||||||
Name: "alert2",
|
Name: "alert2",
|
||||||
Labels: map[string]string{},
|
Labels: map[string]string{},
|
||||||
},
|
},
|
||||||
}, [][]prompb.Label{{{Name: "rule", Value: "test"}}, {{}}}, map[string]string{}); err != nil {
|
}, map[string]string{}); err != nil {
|
||||||
t.Fatalf("unexpected error %s", err)
|
t.Fatalf("unexpected error %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -27,9 +27,15 @@ type Config struct {
|
|||||||
// PathPrefix is added to URL path before adding alertManagerPath value
|
// PathPrefix is added to URL path before adding alertManagerPath value
|
||||||
PathPrefix string `yaml:"path_prefix,omitempty"`
|
PathPrefix string `yaml:"path_prefix,omitempty"`
|
||||||
|
|
||||||
ConsulSDConfigs []ConsulSDConfigs `yaml:"consul_sd_configs,omitempty"`
|
// ConsulSDConfigs contains list of settings for service discovery via Consul
|
||||||
DNSSDConfigs []DNSSDConfigs `yaml:"dns_sd_configs,omitempty"`
|
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
|
||||||
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
|
ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"`
|
||||||
|
// DNSSDConfigs contains list of settings for service discovery via DNS.
|
||||||
|
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
|
||||||
|
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"`
|
||||||
|
|
||||||
|
// StaticConfigs contains list of static targets
|
||||||
|
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
|
||||||
|
|
||||||
// HTTPClientConfig contains HTTP configuration for Notifier clients
|
// HTTPClientConfig contains HTTP configuration for Notifier clients
|
||||||
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
||||||
@@ -56,29 +62,14 @@ type Config struct {
|
|||||||
parsedAlertRelabelConfigs *promrelabel.ParsedConfigs
|
parsedAlertRelabelConfigs *promrelabel.ParsedConfigs
|
||||||
}
|
}
|
||||||
|
|
||||||
// staticConfig contains list of static targets in the following form:
|
// StaticConfig contains list of static targets in the following form:
|
||||||
//
|
//
|
||||||
// targets:
|
// targets:
|
||||||
// [ - '<host>' ]
|
// [ - '<host>' ]
|
||||||
type StaticConfig struct {
|
type StaticConfig struct {
|
||||||
Targets []string `yaml:"targets"`
|
Targets []string `yaml:"targets"`
|
||||||
// HTTPClientConfig contains HTTP configuration for the Targets
|
// HTTPClientConfig contains HTTP configuration for the Targets
|
||||||
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
||||||
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ConsulSDConfigs contains list of settings for service discovery via Consul,
|
|
||||||
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
|
|
||||||
type ConsulSDConfigs struct {
|
|
||||||
consul.SDConfig `yaml:",inline"`
|
|
||||||
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// DNSSDConfigs contains list of settings for service discovery via DNS,
|
|
||||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
|
|
||||||
type DNSSDConfigs struct {
|
|
||||||
dns.SDConfig `yaml:",inline"`
|
|
||||||
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||||
@@ -104,31 +95,6 @@ func (cfg *Config) UnmarshalYAML(unmarshal func(any) error) error {
|
|||||||
}
|
}
|
||||||
cfg.parsedAlertRelabelConfigs = arCfg
|
cfg.parsedAlertRelabelConfigs = arCfg
|
||||||
|
|
||||||
for _, s := range cfg.StaticConfigs {
|
|
||||||
if len(s.AlertRelabelConfigs) > 0 {
|
|
||||||
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to parse alert_relabel_configs in static_config: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, s := range cfg.ConsulSDConfigs {
|
|
||||||
if len(s.AlertRelabelConfigs) > 0 {
|
|
||||||
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to parse alert_relabel_configs in consul_sd_config: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, s := range cfg.DNSSDConfigs {
|
|
||||||
if len(s.AlertRelabelConfigs) > 0 {
|
|
||||||
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to parse alert_relabel_configs in dns_sd_config: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
b, err := yaml.Marshal(cfg)
|
b, err := yaml.Marshal(cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to marshal configuration for checksum: %w", err)
|
return fmt.Errorf("failed to marshal configuration for checksum: %w", err)
|
||||||
|
|||||||
@@ -35,6 +35,4 @@ func TestParseConfig_Failure(t *testing.T) {
|
|||||||
|
|
||||||
f("testdata/unknownFields.bad.yaml", "unknown field")
|
f("testdata/unknownFields.bad.yaml", "unknown field")
|
||||||
f("non-existing-file", "error reading")
|
f("non-existing-file", "error reading")
|
||||||
f("testdata/consul.bad.yaml", "failed to parse alert_relabel_configs in consul_sd_config")
|
|
||||||
f("testdata/dns.bad.yaml", "failed to parse alert relabeling config")
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import (
|
|||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||||
@@ -29,7 +28,11 @@ type configWatcher struct {
|
|||||||
targets map[TargetType][]Target
|
targets map[TargetType][]Target
|
||||||
}
|
}
|
||||||
|
|
||||||
func newWatcher(cfg *Config, gen AlertURLGenerator) (*configWatcher, error) {
|
func newWatcher(path string, gen AlertURLGenerator) (*configWatcher, error) {
|
||||||
|
cfg, err := parseConfig(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
cw := &configWatcher{
|
cw := &configWatcher{
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
wg: sync.WaitGroup{},
|
wg: sync.WaitGroup{},
|
||||||
@@ -85,15 +88,18 @@ func (cw *configWatcher) reload(path string) error {
|
|||||||
return cw.start()
|
return cw.start()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn getTargets) error {
|
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
|
||||||
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
|
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
|
||||||
for _, err := range errors {
|
for _, err := range errors {
|
||||||
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
|
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||||
|
|
||||||
cw.wg.Go(func() {
|
cw.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer cw.wg.Done()
|
||||||
|
|
||||||
ticker := time.NewTicker(interval)
|
ticker := time.NewTicker(interval)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
@@ -103,77 +109,62 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn
|
|||||||
return
|
return
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
}
|
}
|
||||||
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
|
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
|
||||||
for _, err := range errors {
|
for _, err := range errors {
|
||||||
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
|
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||||
}
|
}
|
||||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type targetMetadata struct {
|
func getTargetMetadata(labelsFn getLabels, cfg *Config) (map[string]*promutil.Labels, []error) {
|
||||||
*promutil.Labels
|
metaLabels, err := labelsFn()
|
||||||
alertRelabelConfigs *promrelabel.ParsedConfigs
|
|
||||||
}
|
|
||||||
|
|
||||||
func getTargetMetadata(targetsFn getTargets, cfg *Config) (map[string]targetMetadata, []error) {
|
|
||||||
metaLabelsList, alertRelabelCfgs, err := targetsFn()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
|
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
|
||||||
}
|
}
|
||||||
targetMts := make(map[string]targetMetadata, len(metaLabelsList))
|
targetMetadata := make(map[string]*promutil.Labels, len(metaLabels))
|
||||||
var errors []error
|
var errors []error
|
||||||
duplicates := make(map[string]struct{})
|
duplicates := make(map[string]struct{})
|
||||||
for i := range metaLabelsList {
|
for _, labels := range metaLabels {
|
||||||
metaLabels := metaLabelsList[i]
|
target := labels.Get("__address__")
|
||||||
alertRelabelCfg := alertRelabelCfgs[i]
|
u, processedLabels, err := parseLabels(target, labels, cfg)
|
||||||
for _, labels := range metaLabels {
|
if err != nil {
|
||||||
target := labels.Get("__address__")
|
errors = append(errors, err)
|
||||||
u, processedLabels, err := parseLabels(target, labels, cfg)
|
continue
|
||||||
if err != nil {
|
|
||||||
errors = append(errors, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if len(u) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// check for duplicated targets
|
|
||||||
// targets with same address but different alert_relabel_configs are still considered duplicates since it's mostly due to misconfiguration and could cause duplicated notifications.
|
|
||||||
if _, ok := duplicates[u]; ok {
|
|
||||||
if !*suppressDuplicateTargetErrors {
|
|
||||||
logger.Errorf("skipping duplicate target with identical address %q; "+
|
|
||||||
"make sure service discovery and relabeling is set up properly; "+
|
|
||||||
"original labels: %s; resulting labels: %s",
|
|
||||||
u, labels, processedLabels)
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
duplicates[u] = struct{}{}
|
|
||||||
targetMts[u] = targetMetadata{
|
|
||||||
Labels: processedLabels,
|
|
||||||
alertRelabelConfigs: alertRelabelCfg,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if len(u) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok := duplicates[u]; ok { // check for duplicates
|
||||||
|
if !*suppressDuplicateTargetErrors {
|
||||||
|
logger.Errorf("skipping duplicate target with identical address %q; "+
|
||||||
|
"make sure service discovery and relabeling is set up properly; "+
|
||||||
|
"original labels: %s; resulting labels: %s",
|
||||||
|
u, labels, processedLabels)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
duplicates[u] = struct{}{}
|
||||||
|
targetMetadata[u] = processedLabels
|
||||||
}
|
}
|
||||||
return targetMts, errors
|
return targetMetadata, errors
|
||||||
}
|
}
|
||||||
|
|
||||||
type getTargets func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error)
|
type getLabels func() ([]*promutil.Labels, error)
|
||||||
|
|
||||||
func (cw *configWatcher) start() error {
|
func (cw *configWatcher) start() error {
|
||||||
if len(cw.cfg.StaticConfigs) > 0 {
|
if len(cw.cfg.StaticConfigs) > 0 {
|
||||||
var targets []Target
|
var targets []Target
|
||||||
for i, cfg := range cw.cfg.StaticConfigs {
|
for _, cfg := range cw.cfg.StaticConfigs {
|
||||||
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.StaticConfigs[i].AlertRelabelConfigs)
|
|
||||||
httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig)
|
httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig)
|
||||||
for _, target := range cfg.Targets {
|
for _, target := range cfg.Targets {
|
||||||
address, labels, err := parseLabels(target, nil, cw.cfg)
|
address, labels, err := parseLabels(target, nil, cw.cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
|
return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
|
||||||
}
|
}
|
||||||
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, alertRelabelConfig, cw.cfg.Timeout.Duration())
|
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
|
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
|
||||||
}
|
}
|
||||||
@@ -187,20 +178,17 @@ func (cw *configWatcher) start() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(cw.cfg.ConsulSDConfigs) > 0 {
|
if len(cw.cfg.ConsulSDConfigs) > 0 {
|
||||||
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
|
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]*promutil.Labels, error) {
|
||||||
var labels [][]*promutil.Labels
|
var labels []*promutil.Labels
|
||||||
var alertRelabelConfigs []*promrelabel.ParsedConfigs
|
|
||||||
for i := range cw.cfg.ConsulSDConfigs {
|
for i := range cw.cfg.ConsulSDConfigs {
|
||||||
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.ConsulSDConfigs[i].AlertRelabelConfigs)
|
|
||||||
sdc := &cw.cfg.ConsulSDConfigs[i]
|
sdc := &cw.cfg.ConsulSDConfigs[i]
|
||||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("got labels err: %w", err)
|
return nil, fmt.Errorf("got labels err: %w", err)
|
||||||
}
|
}
|
||||||
labels = append(labels, targetLabels)
|
labels = append(labels, targetLabels...)
|
||||||
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
|
|
||||||
}
|
}
|
||||||
return labels, alertRelabelConfigs, nil
|
return labels, nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to start consulSD discovery: %w", err)
|
return fmt.Errorf("failed to start consulSD discovery: %w", err)
|
||||||
@@ -208,21 +196,17 @@ func (cw *configWatcher) start() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(cw.cfg.DNSSDConfigs) > 0 {
|
if len(cw.cfg.DNSSDConfigs) > 0 {
|
||||||
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
|
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]*promutil.Labels, error) {
|
||||||
var labels [][]*promutil.Labels
|
var labels []*promutil.Labels
|
||||||
var alertRelabelConfigs []*promrelabel.ParsedConfigs
|
|
||||||
for i := range cw.cfg.DNSSDConfigs {
|
for i := range cw.cfg.DNSSDConfigs {
|
||||||
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.DNSSDConfigs[i].AlertRelabelConfigs)
|
|
||||||
sdc := &cw.cfg.DNSSDConfigs[i]
|
sdc := &cw.cfg.DNSSDConfigs[i]
|
||||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("got labels err: %w", err)
|
return nil, fmt.Errorf("got labels err: %w", err)
|
||||||
}
|
}
|
||||||
labels = append(labels, targetLabels)
|
labels = append(labels, targetLabels...)
|
||||||
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
|
|
||||||
|
|
||||||
}
|
}
|
||||||
return labels, alertRelabelConfigs, nil
|
return labels, nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to start DNSSD discovery: %w", err)
|
return fmt.Errorf("failed to start DNSSD discovery: %w", err)
|
||||||
@@ -256,30 +240,30 @@ func (cw *configWatcher) setTargets(key TargetType, targets []Target) {
|
|||||||
cw.targetsMu.Unlock()
|
cw.targetsMu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cw *configWatcher) updateTargets(key TargetType, targetMts map[string]targetMetadata, cfg *Config, genFn AlertURLGenerator) {
|
func (cw *configWatcher) updateTargets(key TargetType, targetMetadata map[string]*promutil.Labels, cfg *Config, genFn AlertURLGenerator) {
|
||||||
cw.targetsMu.Lock()
|
cw.targetsMu.Lock()
|
||||||
defer cw.targetsMu.Unlock()
|
defer cw.targetsMu.Unlock()
|
||||||
oldTargets := cw.targets[key]
|
oldTargets := cw.targets[key]
|
||||||
var updatedTargets []Target
|
var updatedTargets []Target
|
||||||
for _, ot := range oldTargets {
|
for _, ot := range oldTargets {
|
||||||
if _, ok := targetMts[ot.Addr()]; !ok {
|
if _, ok := targetMetadata[ot.Addr()]; !ok {
|
||||||
// if target not exists in currentTargets, close it
|
// if target not exists in currentTargets, close it
|
||||||
ot.Close()
|
ot.Close()
|
||||||
} else {
|
} else {
|
||||||
updatedTargets = append(updatedTargets, ot)
|
updatedTargets = append(updatedTargets, ot)
|
||||||
delete(targetMts, ot.Addr())
|
delete(targetMetadata, ot.Addr())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// create new resources for the new targets
|
// create new resources for the new targets
|
||||||
for addr, metadata := range targetMts {
|
for addr, labels := range targetMetadata {
|
||||||
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, metadata.alertRelabelConfigs, cfg.Timeout.Duration())
|
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
|
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
updatedTargets = append(updatedTargets, Target{
|
updatedTargets = append(updatedTargets, Target{
|
||||||
Notifier: am,
|
Notifier: am,
|
||||||
Labels: metadata.Labels,
|
Labels: labels,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ import (
|
|||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"os"
|
"os"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -29,11 +28,7 @@ static_configs:
|
|||||||
- localhost:9093
|
- localhost:9093
|
||||||
- localhost:9094
|
- localhost:9094
|
||||||
`)
|
`)
|
||||||
cfg, err := parseConfig(f.Name())
|
cw, err := newWatcher(f.Name(), nil)
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to parse config: %s", err)
|
|
||||||
}
|
|
||||||
cw, err := newWatcher(cfg, nil)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to start config watcher: %s", err)
|
t.Fatalf("failed to start config watcher: %s", err)
|
||||||
}
|
}
|
||||||
@@ -88,64 +83,33 @@ consul_sd_configs:
|
|||||||
- server: %s
|
- server: %s
|
||||||
services:
|
services:
|
||||||
- alertmanager
|
- alertmanager
|
||||||
- server: %s
|
`, consulSDServer.URL))
|
||||||
services:
|
|
||||||
- alertmanager
|
|
||||||
alert_relabel_configs:
|
|
||||||
- target_label: "foo"
|
|
||||||
replacement: "tar"
|
|
||||||
`, consulSDServer.URL, consulSDServer.URL))
|
|
||||||
|
|
||||||
cfg, err := parseConfig(consulSDFile.Name())
|
cw, err := newWatcher(consulSDFile.Name(), nil)
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to parse config: %s", err)
|
|
||||||
}
|
|
||||||
cw, err := newWatcher(cfg, nil)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to start config watcher: %s", err)
|
t.Fatalf("failed to start config watcher: %s", err)
|
||||||
}
|
}
|
||||||
defer cw.mustStop()
|
defer cw.mustStop()
|
||||||
|
|
||||||
if len(cw.notifiers()) != 3 {
|
if len(cw.notifiers()) != 2 {
|
||||||
t.Fatalf("expected to get 3 notifiers; got %d", len(cw.notifiers()))
|
t.Fatalf("expected to get 2 notifiers; got %d", len(cw.notifiers()))
|
||||||
}
|
}
|
||||||
|
|
||||||
expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1)
|
expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1)
|
||||||
expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2)
|
expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2)
|
||||||
expAddr3 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService3)
|
|
||||||
|
|
||||||
n1, n2, n3 := cw.notifiers()[0], cw.notifiers()[1], cw.notifiers()[2]
|
n1, n2 := cw.notifiers()[0], cw.notifiers()[1]
|
||||||
if n1.Addr() != expAddr1 {
|
if n1.Addr() != expAddr1 {
|
||||||
t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr())
|
t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr())
|
||||||
}
|
}
|
||||||
if n2.Addr() != expAddr2 {
|
if n2.Addr() != expAddr2 {
|
||||||
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
|
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
|
||||||
}
|
}
|
||||||
if n3.Addr() != expAddr3 {
|
|
||||||
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
|
|
||||||
}
|
|
||||||
|
|
||||||
if n1.(*AlertManager).relabelConfigs.String() != "" {
|
|
||||||
t.Fatalf("unexpected relabel configs: %q", n1.(*AlertManager).relabelConfigs.String())
|
|
||||||
}
|
|
||||||
if n2.(*AlertManager).relabelConfigs.String() != "" {
|
|
||||||
t.Fatalf("unexpected relabel configs: %q", n2.(*AlertManager).relabelConfigs.String())
|
|
||||||
}
|
|
||||||
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
|
|
||||||
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
f := func() bool { return len(cw.notifiers()) == 1 }
|
f := func() bool { return len(cw.notifiers()) == 1 }
|
||||||
if !waitFor(f, time.Second) {
|
if !waitFor(f, time.Second) {
|
||||||
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
|
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
|
||||||
}
|
}
|
||||||
n3 = cw.notifiers()[0]
|
|
||||||
if n3.Addr() != expAddr3 {
|
|
||||||
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
|
|
||||||
}
|
|
||||||
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
|
|
||||||
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestConfigWatcherReloadConcurrent supposed to test concurrent
|
// TestConfigWatcherReloadConcurrent supposed to test concurrent
|
||||||
@@ -200,11 +164,7 @@ consul_sd_configs:
|
|||||||
"unknownFields.bad.yaml",
|
"unknownFields.bad.yaml",
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg, err := parseConfig(paths[0])
|
cw, err := newWatcher(paths[0], nil)
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to parse config: %s", err)
|
|
||||||
}
|
|
||||||
cw, err := newWatcher(cfg, nil)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to start config watcher: %s", err)
|
t.Fatalf("failed to start config watcher: %s", err)
|
||||||
}
|
}
|
||||||
@@ -212,16 +172,18 @@ consul_sd_configs:
|
|||||||
|
|
||||||
const workers = 500
|
const workers = 500
|
||||||
const iterations = 10
|
const iterations = 10
|
||||||
var wg sync.WaitGroup
|
wg := sync.WaitGroup{}
|
||||||
for n := range workers {
|
wg.Add(workers)
|
||||||
wg.Go(func() {
|
for i := 0; i < workers; i++ {
|
||||||
|
go func(n int) {
|
||||||
|
defer wg.Done()
|
||||||
r := rand.New(rand.NewSource(int64(n)))
|
r := rand.New(rand.NewSource(int64(n)))
|
||||||
for range iterations {
|
for i := 0; i < iterations; i++ {
|
||||||
rnd := r.Intn(len(paths))
|
rnd := r.Intn(len(paths))
|
||||||
_ = cw.reload(paths[rnd]) // update can fail and this is expected
|
_ = cw.reload(paths[rnd]) // update can fail and this is expected
|
||||||
_ = cw.notifiers()
|
_ = cw.notifiers()
|
||||||
}
|
}
|
||||||
})
|
}(i)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
@@ -240,11 +202,10 @@ func checkErr(t *testing.T, err error) {
|
|||||||
const (
|
const (
|
||||||
fakeConsulService1 = "127.0.0.1:9093"
|
fakeConsulService1 = "127.0.0.1:9093"
|
||||||
fakeConsulService2 = "127.0.0.1:9095"
|
fakeConsulService2 = "127.0.0.1:9095"
|
||||||
fakeConsulService3 = "127.0.0.1:9097"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func newFakeConsulServer() *httptest.Server {
|
func newFakeConsulServer() *httptest.Server {
|
||||||
var requestCount atomic.Int32
|
requestCount := 0
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
|
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
|
||||||
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
|
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
|
||||||
@@ -259,7 +220,7 @@ func newFakeConsulServer() *httptest.Server {
|
|||||||
}`))
|
}`))
|
||||||
})
|
})
|
||||||
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
|
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
|
||||||
if requestCount.Load() == 0 {
|
if requestCount == 0 {
|
||||||
rw.Header().Set("X-Consul-Index", "1")
|
rw.Header().Set("X-Consul-Index", "1")
|
||||||
rw.Write([]byte(`
|
rw.Write([]byte(`
|
||||||
[
|
[
|
||||||
@@ -399,7 +360,7 @@ func newFakeConsulServer() *httptest.Server {
|
|||||||
}
|
}
|
||||||
]`))
|
]`))
|
||||||
}
|
}
|
||||||
requestCount.Add(1)
|
requestCount++
|
||||||
})
|
})
|
||||||
|
|
||||||
return httptest.NewServer(mux)
|
return httptest.NewServer(mux)
|
||||||
|
|||||||
@@ -5,8 +5,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// FakeNotifier is a mock notifier
|
// FakeNotifier is a mock notifier
|
||||||
@@ -17,32 +15,14 @@ type FakeNotifier struct {
|
|||||||
counter int
|
counter int
|
||||||
}
|
}
|
||||||
|
|
||||||
// InitFakeNotifier initializes global notifier to FakeNotifier,
|
|
||||||
// and returns a cleanup function to restore the original getActiveNotifiers.
|
|
||||||
func InitFakeNotifier() (*FakeNotifier, func()) {
|
|
||||||
originalGetActiveNotifiers := getActiveNotifiers
|
|
||||||
fn := &FakeNotifier{}
|
|
||||||
getActiveNotifiers = func() []Notifier {
|
|
||||||
return []Notifier{fn}
|
|
||||||
}
|
|
||||||
return fn, func() {
|
|
||||||
getActiveNotifiers = originalGetActiveNotifiers
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close does nothing
|
// Close does nothing
|
||||||
func (*FakeNotifier) Close() {}
|
func (*FakeNotifier) Close() {}
|
||||||
|
|
||||||
// LastError returns last error message
|
|
||||||
func (*FakeNotifier) LastError() string {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// Addr returns ""
|
// Addr returns ""
|
||||||
func (*FakeNotifier) Addr() string { return "" }
|
func (*FakeNotifier) Addr() string { return "" }
|
||||||
|
|
||||||
// Send sets alerts and increases counter
|
// Send sets alerts and increases counter
|
||||||
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error {
|
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error {
|
||||||
fn.Lock()
|
fn.Lock()
|
||||||
defer fn.Unlock()
|
defer fn.Unlock()
|
||||||
fn.counter += len(alerts)
|
fn.counter += len(alerts)
|
||||||
|
|||||||
@@ -1,22 +1,14 @@
|
|||||||
package notifier
|
package notifier
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -65,61 +57,11 @@ var (
|
|||||||
sendTimeout = flagutil.NewArrayDuration("notifier.sendTimeout", 10*time.Second, "Timeout when sending alerts to the corresponding -notifier.url")
|
sendTimeout = flagutil.NewArrayDuration("notifier.sendTimeout", 10*time.Second, "Timeout when sending alerts to the corresponding -notifier.url")
|
||||||
)
|
)
|
||||||
|
|
||||||
// AlertURLGeneratorFn returns a URL to the passed alert object.
|
// cw holds a configWatcher for configPath configuration file
|
||||||
// Call InitAlertURLGeneratorFn before using this function.
|
// configWatcher provides a list of Notifier objects discovered
|
||||||
var AlertURLGeneratorFn AlertURLGenerator
|
// from static config or via service discovery.
|
||||||
|
// cw is not nil only if configPath is provided.
|
||||||
// InitAlertURLGeneratorFn populates AlertURLGeneratorFn
|
var cw *configWatcher
|
||||||
func InitAlertURLGeneratorFn(externalURL *url.URL, externalAlertSource string, validateTemplate bool) error {
|
|
||||||
if externalAlertSource == "" {
|
|
||||||
AlertURLGeneratorFn = func(a Alert) string {
|
|
||||||
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
|
|
||||||
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, "group_id", gID, "alert_id", aID)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if validateTemplate {
|
|
||||||
if err := ValidateTemplates(map[string]string{
|
|
||||||
"tpl": externalAlertSource,
|
|
||||||
}); err != nil {
|
|
||||||
return fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
m := map[string]string{
|
|
||||||
"tpl": externalAlertSource,
|
|
||||||
}
|
|
||||||
AlertURLGeneratorFn = func(alert Alert) string {
|
|
||||||
qFn := func(_ string) ([]datasource.Metric, error) {
|
|
||||||
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
|
|
||||||
}
|
|
||||||
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
|
|
||||||
if err != nil {
|
|
||||||
logger.Errorf("cannot template alert source: %s", err)
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
// getActiveNotifiers returns the current list of Notifier objects.
|
|
||||||
getActiveNotifiers func() []Notifier
|
|
||||||
// globalRelabelCfg stores the parsed alert relabeling config from the config file if there is
|
|
||||||
globalRelabelCfg *promrelabel.ParsedConfigs
|
|
||||||
|
|
||||||
// cw holds a configWatcher for configPath configuration file
|
|
||||||
// configWatcher provides a list of Notifier objects discovered
|
|
||||||
// from static config or via service discovery.
|
|
||||||
// cw is not nil only if configPath is provided.
|
|
||||||
cw *configWatcher
|
|
||||||
|
|
||||||
// externalLabels is a global variable for holding external labels configured via flags
|
|
||||||
// It is supposed to be inited via Init function only.
|
|
||||||
externalLabels map[string]string
|
|
||||||
// externalURL is a global variable for holding external URL value configured via flag
|
|
||||||
// It is supposed to be inited via Init function only.
|
|
||||||
externalURL string
|
|
||||||
)
|
|
||||||
|
|
||||||
// Reload checks the changes in configPath configuration file
|
// Reload checks the changes in configPath configuration file
|
||||||
// and applies changes if any.
|
// and applies changes if any.
|
||||||
@@ -130,62 +72,66 @@ func Reload() error {
|
|||||||
return cw.reload(*configPath)
|
return cw.reload(*configPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var staticNotifiersFn func() []Notifier
|
||||||
|
|
||||||
|
var (
|
||||||
|
// externalLabels is a global variable for holding external labels configured via flags
|
||||||
|
// It is supposed to be inited via Init function only.
|
||||||
|
externalLabels map[string]string
|
||||||
|
// externalURL is a global variable for holding external URL value configured via flag
|
||||||
|
// It is supposed to be inited via Init function only.
|
||||||
|
externalURL string
|
||||||
|
)
|
||||||
|
|
||||||
|
// Init returns a function for retrieving actual list of Notifier objects.
|
||||||
// Init works in two mods:
|
// Init works in two mods:
|
||||||
// - configuration via flags (for backward compatibility). Is always static
|
// - configuration via flags (for backward compatibility). Is always static
|
||||||
// and don't support live reloads.
|
// and don't support live reloads.
|
||||||
// - configuration via file. Supports live reloads and service discovery.
|
// - configuration via file. Supports live reloads and service discovery.
|
||||||
//
|
//
|
||||||
// Init returns an error if both mods are used.
|
// Init returns an error if both mods are used.
|
||||||
func Init(extLabels map[string]string, extURL string) error {
|
func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (func() []Notifier, error) {
|
||||||
externalURL = extURL
|
externalURL = extURL
|
||||||
externalLabels = extLabels
|
externalLabels = extLabels
|
||||||
_, err := url.Parse(externalURL)
|
_, err := url.Parse(externalURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to parse external URL: %w", err)
|
return nil, fmt.Errorf("failed to parse external URL: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if *blackHole {
|
if *blackHole {
|
||||||
if len(*addrs) > 0 || *configPath != "" {
|
if len(*addrs) > 0 || *configPath != "" {
|
||||||
return fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
|
return nil, fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
|
||||||
}
|
}
|
||||||
notifier := newBlackHoleNotifier()
|
notifier := newBlackHoleNotifier()
|
||||||
getActiveNotifiers = func() []Notifier {
|
staticNotifiersFn = func() []Notifier {
|
||||||
return []Notifier{notifier}
|
return []Notifier{notifier}
|
||||||
}
|
}
|
||||||
return nil
|
return staticNotifiersFn, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if *configPath == "" && len(*addrs) == 0 {
|
if *configPath == "" && len(*addrs) == 0 {
|
||||||
return nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
if *configPath != "" && len(*addrs) > 0 {
|
if *configPath != "" && len(*addrs) > 0 {
|
||||||
return fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
|
return nil, fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(*addrs) > 0 {
|
if len(*addrs) > 0 {
|
||||||
notifiers, err := notifiersFromFlags(AlertURLGeneratorFn)
|
notifiers, err := notifiersFromFlags(gen)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to create notifier from flag values: %w", err)
|
return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
|
||||||
}
|
}
|
||||||
getActiveNotifiers = func() []Notifier {
|
staticNotifiersFn = func() []Notifier {
|
||||||
return notifiers
|
return notifiers
|
||||||
}
|
}
|
||||||
return nil
|
return staticNotifiersFn, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg, err := parseConfig(*configPath)
|
cw, err = newWatcher(*configPath, gen)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, fmt.Errorf("failed to init config watcher: %w", err)
|
||||||
}
|
}
|
||||||
if cfg.AlertRelabelConfigs != nil {
|
return cw.notifiers, nil
|
||||||
globalRelabelCfg = cfg.parsedAlertRelabelConfigs
|
|
||||||
}
|
|
||||||
cw, err = newWatcher(cfg, AlertURLGeneratorFn)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to init config watcher: %w", err)
|
|
||||||
}
|
|
||||||
getActiveNotifiers = cw.notifiers
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// InitSecretFlags must be called after flag.Parse and before any logging
|
// InitSecretFlags must be called after flag.Parse and before any logging
|
||||||
@@ -229,9 +175,6 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
|
|||||||
Headers: []string{headers.GetOptionalArg(i)},
|
Headers: []string{headers.GetOptionalArg(i)},
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := httputil.CheckURL(addr); err != nil {
|
|
||||||
return nil, fmt.Errorf("invalid notifier.url %q: %w", addr, err)
|
|
||||||
}
|
|
||||||
addr = strings.TrimSuffix(addr, "/")
|
addr = strings.TrimSuffix(addr, "/")
|
||||||
am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i))
|
am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -263,58 +206,23 @@ const (
|
|||||||
|
|
||||||
// GetTargets returns list of static or discovered targets
|
// GetTargets returns list of static or discovered targets
|
||||||
// via notifier configuration.
|
// via notifier configuration.
|
||||||
//
|
|
||||||
// Must be called after Init.
|
|
||||||
func GetTargets() map[TargetType][]Target {
|
func GetTargets() map[TargetType][]Target {
|
||||||
if getActiveNotifiers == nil {
|
var targets = make(map[TargetType][]Target)
|
||||||
return nil
|
|
||||||
|
if staticNotifiersFn != nil {
|
||||||
|
for _, ns := range staticNotifiersFn() {
|
||||||
|
targets[TargetStatic] = append(targets[TargetStatic], Target{
|
||||||
|
Notifier: ns,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
targets := make(map[TargetType][]Target)
|
|
||||||
// use cached targets from configWatcher instead of getActiveNotifiers for the extra target labels
|
|
||||||
if cw != nil {
|
if cw != nil {
|
||||||
cw.targetsMu.RLock()
|
cw.targetsMu.RLock()
|
||||||
for key, ns := range cw.targets {
|
for key, ns := range cw.targets {
|
||||||
targets[key] = append(targets[key], ns...)
|
targets[key] = append(targets[key], ns...)
|
||||||
}
|
}
|
||||||
cw.targetsMu.RUnlock()
|
cw.targetsMu.RUnlock()
|
||||||
return targets
|
|
||||||
}
|
|
||||||
|
|
||||||
// static notifiers don't have labels
|
|
||||||
for _, ns := range getActiveNotifiers() {
|
|
||||||
targets[TargetStatic] = append(targets[TargetStatic], Target{
|
|
||||||
Notifier: ns,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
return targets
|
return targets
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send sends alerts to all active notifiers
|
|
||||||
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) chan error {
|
|
||||||
alertsToSend := make([]Alert, 0, len(alerts))
|
|
||||||
lblss := make([][]prompb.Label, 0, len(alerts))
|
|
||||||
// apply global relabel config first without modifying original alerts in alerts
|
|
||||||
for _, a := range alerts {
|
|
||||||
lbls := a.applyRelabelingIfNeeded(globalRelabelCfg)
|
|
||||||
if len(lbls) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
alertsToSend = append(alertsToSend, a)
|
|
||||||
lblss = append(lblss, lbls)
|
|
||||||
}
|
|
||||||
|
|
||||||
wg := sync.WaitGroup{}
|
|
||||||
activeNotifiers := getActiveNotifiers()
|
|
||||||
errCh := make(chan error, len(activeNotifiers))
|
|
||||||
defer close(errCh)
|
|
||||||
for i := range activeNotifiers {
|
|
||||||
nt := activeNotifiers[i]
|
|
||||||
wg.Go(func() {
|
|
||||||
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
|
|
||||||
errCh <- fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
wg.Wait()
|
|
||||||
return errCh
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,17 +1,9 @@
|
|||||||
package notifier
|
package notifier
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"net/http"
|
|
||||||
"net/http/httptest"
|
|
||||||
"net/url"
|
|
||||||
"os"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestInit(t *testing.T) {
|
func TestInit(t *testing.T) {
|
||||||
@@ -20,13 +12,14 @@ func TestInit(t *testing.T) {
|
|||||||
|
|
||||||
*addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"}
|
*addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"}
|
||||||
|
|
||||||
err := Init(nil, "")
|
fn, err := Init(nil, nil, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("%s", err)
|
t.Fatalf("%s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(getActiveNotifiers()) != 2 {
|
nfs := fn()
|
||||||
t.Fatalf("expected to get 2 notifiers; got %d", len(getActiveNotifiers()))
|
if len(nfs) != 2 {
|
||||||
|
t.Fatalf("expected to get 2 notifiers; got %d", len(nfs))
|
||||||
}
|
}
|
||||||
|
|
||||||
targets := GetTargets()
|
targets := GetTargets()
|
||||||
@@ -55,22 +48,19 @@ func TestInitNegative(t *testing.T) {
|
|||||||
*blackHole = oldBlackHole
|
*blackHole = oldBlackHole
|
||||||
}()
|
}()
|
||||||
|
|
||||||
f := func(path string, addr []string, bh bool) {
|
f := func(path, addr string, bh bool) {
|
||||||
*configPath = path
|
*configPath = path
|
||||||
*addrs = flagutil.ArrayString(addr)
|
*addrs = flagutil.ArrayString{addr}
|
||||||
*blackHole = bh
|
*blackHole = bh
|
||||||
if err := Init(nil, ""); err == nil {
|
if _, err := Init(nil, nil, ""); err == nil {
|
||||||
t.Fatalf("expected to get error; got nil instead")
|
t.Fatalf("expected to get error; got nil instead")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// *configPath, *addrs and *blackhole are mutually exclusive
|
// *configPath, *addrs and *blackhole are mutually exclusive
|
||||||
f("/dummy/path", []string{"127.0.0.1"}, false)
|
f("/dummy/path", "127.0.0.1", false)
|
||||||
f("/dummy/path", []string{}, true)
|
f("/dummy/path", "", true)
|
||||||
f("", []string{"127.0.0.1"}, true)
|
f("", "127.0.0.1", true)
|
||||||
// addr cannot be ""
|
|
||||||
f("", []string{""}, false)
|
|
||||||
f("", []string{"127.0.0.1", ""}, false)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBlackHole(t *testing.T) {
|
func TestBlackHole(t *testing.T) {
|
||||||
@@ -79,13 +69,14 @@ func TestBlackHole(t *testing.T) {
|
|||||||
|
|
||||||
*blackHole = true
|
*blackHole = true
|
||||||
|
|
||||||
err := Init(nil, "")
|
fn, err := Init(nil, nil, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("%s", err)
|
t.Fatalf("%s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(getActiveNotifiers()) != 1 {
|
nfs := fn()
|
||||||
t.Fatalf("expected to get 1 notifier; got %d", len(getActiveNotifiers()))
|
if len(nfs) != 1 {
|
||||||
|
t.Fatalf("expected to get 1 notifier; got %d", len(nfs))
|
||||||
}
|
}
|
||||||
|
|
||||||
targets := GetTargets()
|
targets := GetTargets()
|
||||||
@@ -100,114 +91,3 @@ func TestBlackHole(t *testing.T) {
|
|||||||
t.Fatalf("expected to get \"blackhole\"; got %q instead", nf1.Addr())
|
t.Fatalf("expected to get \"blackhole\"; got %q instead", nf1.Addr())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetAlertURLGenerator(t *testing.T) {
|
|
||||||
oldAlertURLGeneratorFn := AlertURLGeneratorFn
|
|
||||||
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
|
|
||||||
|
|
||||||
testAlert := Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
|
|
||||||
u, _ := url.Parse("https://victoriametrics.com/path")
|
|
||||||
err := InitAlertURLGeneratorFn(u, "", false)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error %s", err)
|
|
||||||
}
|
|
||||||
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", "group_id", "alert_id")
|
|
||||||
if exp != AlertURLGeneratorFn(testAlert) {
|
|
||||||
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
|
|
||||||
}
|
|
||||||
err = InitAlertURLGeneratorFn(nil, "foo?{{invalid}}", true)
|
|
||||||
if err == nil {
|
|
||||||
t.Fatalf("expected template validation error got nil")
|
|
||||||
}
|
|
||||||
err = InitAlertURLGeneratorFn(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error %s", err)
|
|
||||||
}
|
|
||||||
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != AlertURLGeneratorFn(testAlert) {
|
|
||||||
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSendAlerts(t *testing.T) {
|
|
||||||
oldAlertURLGeneratorFn := AlertURLGeneratorFn
|
|
||||||
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
|
|
||||||
AlertURLGeneratorFn = func(alert Alert) string {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
mux := http.NewServeMux()
|
|
||||||
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
|
|
||||||
t.Fatalf("should not be called")
|
|
||||||
})
|
|
||||||
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
var a []struct {
|
|
||||||
Labels map[string]string `json:"labels"`
|
|
||||||
}
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&a); err != nil {
|
|
||||||
t.Fatalf("can not unmarshal data into alert %s", err)
|
|
||||||
}
|
|
||||||
if len(a) != 2 {
|
|
||||||
t.Fatalf("expected 2 alert in array got %d", len(a))
|
|
||||||
}
|
|
||||||
if len(a[0].Labels) != 4 {
|
|
||||||
t.Fatalf("expected 4 labels got %d", len(a[0].Labels))
|
|
||||||
}
|
|
||||||
if a[0].Labels["env"] != "prod" {
|
|
||||||
t.Fatalf("expected env label to be prod during relabeling, got %s", a[0].Labels["env"])
|
|
||||||
}
|
|
||||||
if a[0].Labels["c"] != "baz" {
|
|
||||||
t.Fatalf("expected c label to be baz during relabeling, got %s", a[0].Labels["c"])
|
|
||||||
}
|
|
||||||
if len(a[1].Labels) != 1 {
|
|
||||||
t.Fatalf("expected 1 labels got %d", len(a[1].Labels))
|
|
||||||
}
|
|
||||||
})
|
|
||||||
srv := httptest.NewServer(mux)
|
|
||||||
defer srv.Close()
|
|
||||||
|
|
||||||
f, err := os.CreateTemp("", "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer fs.MustRemovePath(f.Name())
|
|
||||||
|
|
||||||
rawConfig := `
|
|
||||||
static_configs:
|
|
||||||
- targets:
|
|
||||||
- %s
|
|
||||||
alert_relabel_configs:
|
|
||||||
- source_labels: [b]
|
|
||||||
target_label: "c"
|
|
||||||
alert_relabel_configs:
|
|
||||||
- source_labels: [a]
|
|
||||||
target_label: "b"
|
|
||||||
- target_label: "env"
|
|
||||||
replacement: "prod"
|
|
||||||
`
|
|
||||||
config := fmt.Sprintf(rawConfig, srv.URL+alertManagerPath)
|
|
||||||
writeToFile(f.Name(), config)
|
|
||||||
|
|
||||||
oldConfigPath := configPath
|
|
||||||
defer func() { configPath = oldConfigPath }()
|
|
||||||
*configPath = f.Name()
|
|
||||||
err = Init(nil, "")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error when parse notifier config: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
firingAlerts := []Alert{
|
|
||||||
{
|
|
||||||
Name: "alert1",
|
|
||||||
Labels: map[string]string{"a": "baz"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: "alert2",
|
|
||||||
Labels: map[string]string{},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
errG := Send(context.Background(), firingAlerts, nil)
|
|
||||||
for err := range errG {
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("unexpected error when sending alerts: %s", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,21 +1,15 @@
|
|||||||
package notifier
|
package notifier
|
||||||
|
|
||||||
import (
|
import "context"
|
||||||
"context"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Notifier is a common interface for alert manager provider
|
// Notifier is a common interface for alert manager provider
|
||||||
type Notifier interface {
|
type Notifier interface {
|
||||||
// Send sends the given list of alerts.
|
// Send sends the given list of alerts.
|
||||||
// Returns an error if fails to send the alerts.
|
// Returns an error if fails to send the alerts.
|
||||||
// Must unblock if the given ctx is cancelled.
|
// Must unblock if the given ctx is cancelled.
|
||||||
Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, notifierHeaders map[string]string) error
|
Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) error
|
||||||
// Addr returns address where alerts are sent.
|
// Addr returns address where alerts are sent.
|
||||||
Addr() string
|
Addr() string
|
||||||
// LastError returns error, that occured during last attempt to send data
|
|
||||||
LastError() string
|
|
||||||
// Close is a destructor for the Notifier
|
// Close is a destructor for the Notifier
|
||||||
Close()
|
Close()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
package notifier
|
package notifier
|
||||||
|
|
||||||
import (
|
import "context"
|
||||||
"context"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
)
|
|
||||||
|
|
||||||
// blackHoleNotifier is a Notifier stub, used when no notifications need
|
// blackHoleNotifier is a Notifier stub, used when no notifications need
|
||||||
// to be sent.
|
// to be sent.
|
||||||
@@ -14,7 +10,7 @@ type blackHoleNotifier struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Send will send no notifications, but increase the metric.
|
// Send will send no notifications, but increase the metric.
|
||||||
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error { //nolint:revive
|
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error { //nolint:revive
|
||||||
bh.metrics.alertsSent.Add(len(alerts))
|
bh.metrics.alertsSent.Add(len(alerts))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -29,11 +25,6 @@ func (bh *blackHoleNotifier) Close() {
|
|||||||
bh.metrics.close()
|
bh.metrics.close()
|
||||||
}
|
}
|
||||||
|
|
||||||
// LastError return last notifier's error
|
|
||||||
func (bh *blackHoleNotifier) LastError() string {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// newBlackHoleNotifier creates a new blackHoleNotifier
|
// newBlackHoleNotifier creates a new blackHoleNotifier
|
||||||
func newBlackHoleNotifier() *blackHoleNotifier {
|
func newBlackHoleNotifier() *blackHoleNotifier {
|
||||||
address := "blackhole"
|
address := "blackhole"
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
|
||||||
metricset "github.com/VictoriaMetrics/metrics"
|
metricset "github.com/VictoriaMetrics/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -17,7 +16,7 @@ func TestBlackHoleNotifier_Send(t *testing.T) {
|
|||||||
Start: time.Now().UTC(),
|
Start: time.Now().UTC(),
|
||||||
End: time.Now().UTC(),
|
End: time.Now().UTC(),
|
||||||
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
||||||
}}, [][]prompb.Label{{}}, nil); err != nil {
|
}}, nil); err != nil {
|
||||||
t.Fatalf("unexpected error %s", err)
|
t.Fatalf("unexpected error %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -35,7 +34,7 @@ func TestBlackHoleNotifier_Close(t *testing.T) {
|
|||||||
Start: time.Now().UTC(),
|
Start: time.Now().UTC(),
|
||||||
End: time.Now().UTC(),
|
End: time.Now().UTC(),
|
||||||
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
||||||
}}, [][]prompb.Label{{}}, nil); err != nil {
|
}}, nil); err != nil {
|
||||||
t.Fatalf("unexpected error %s", err)
|
t.Fatalf("unexpected error %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
19
app/vmalert/notifier/testdata/consul.bad.yaml
vendored
19
app/vmalert/notifier/testdata/consul.bad.yaml
vendored
@@ -1,19 +0,0 @@
|
|||||||
consul_sd_configs:
|
|
||||||
- server: localhost:8500
|
|
||||||
scheme: http
|
|
||||||
services:
|
|
||||||
- alertmanager
|
|
||||||
alert_relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: "prod"
|
|
||||||
- server: localhost:8500
|
|
||||||
services:
|
|
||||||
- consul
|
|
||||||
alert_relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: "(abc"
|
|
||||||
alert_relabel_configs:
|
|
||||||
- target_label: "foo"
|
|
||||||
replacement: "aaa"
|
|
||||||
13
app/vmalert/notifier/testdata/dns.bad.yaml
vendored
13
app/vmalert/notifier/testdata/dns.bad.yaml
vendored
@@ -1,13 +0,0 @@
|
|||||||
dns_sd_configs:
|
|
||||||
- names:
|
|
||||||
- cloudflare.com
|
|
||||||
type: 'A'
|
|
||||||
port: 9093
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__meta_dns_name]
|
|
||||||
replacement: '${1}'
|
|
||||||
target_label: dns_name
|
|
||||||
alert_relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: "(abc"
|
|
||||||
15
app/vmalert/notifier/testdata/mixed.good.yaml
vendored
15
app/vmalert/notifier/testdata/mixed.good.yaml
vendored
@@ -2,19 +2,12 @@ static_configs:
|
|||||||
- targets:
|
- targets:
|
||||||
- localhost:9093
|
- localhost:9093
|
||||||
- localhost:9095
|
- localhost:9095
|
||||||
alert_relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: "static"
|
|
||||||
consul_sd_configs:
|
consul_sd_configs:
|
||||||
- server: localhost:8500
|
- server: localhost:8500
|
||||||
scheme: http
|
scheme: http
|
||||||
services:
|
services:
|
||||||
- alertmanager
|
- alertmanager
|
||||||
alert_relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: "consul"
|
|
||||||
- server: localhost:8500
|
- server: localhost:8500
|
||||||
services:
|
services:
|
||||||
- consul
|
- consul
|
||||||
@@ -24,10 +17,6 @@ dns_sd_configs:
|
|||||||
- cloudflare.com
|
- cloudflare.com
|
||||||
type: 'A'
|
type: 'A'
|
||||||
port: 9093
|
port: 9093
|
||||||
alert_relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
source_labels: [env]
|
|
||||||
regex: "dns"
|
|
||||||
|
|
||||||
relabel_configs:
|
relabel_configs:
|
||||||
- source_labels: [__meta_consul_tags]
|
- source_labels: [__meta_consul_tags]
|
||||||
@@ -36,4 +25,4 @@ relabel_configs:
|
|||||||
target_label: __scheme__
|
target_label: __scheme__
|
||||||
- source_labels: [__meta_dns_name]
|
- source_labels: [__meta_dns_name]
|
||||||
replacement: '${1}'
|
replacement: '${1}'
|
||||||
target_label: dns_name
|
target_label: dns_name
|
||||||
26
app/vmalert/notifier/testdata/static.good.yaml
vendored
26
app/vmalert/notifier/testdata/static.good.yaml
vendored
@@ -1,14 +1,22 @@
|
|||||||
|
headers:
|
||||||
|
- 'CustomHeader: foo'
|
||||||
|
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets:
|
- targets:
|
||||||
- http://192.168.0.101:9093
|
- localhost:9093
|
||||||
alert_relabel_configs:
|
- localhost:9095
|
||||||
- target_label: "foo"
|
- https://localhost:9093/test/api/v2/alerts
|
||||||
replacement: "aaa"
|
basic_auth:
|
||||||
|
username: foo
|
||||||
|
password: bar
|
||||||
|
|
||||||
- targets:
|
- targets:
|
||||||
- http://192.168.0.101:9093
|
- localhost:9096
|
||||||
alert_relabel_configs:
|
- localhost:9097
|
||||||
- target_label: "foo"
|
basic_auth:
|
||||||
replacement: "ccc"
|
username: foo
|
||||||
|
password: baz
|
||||||
|
|
||||||
|
alert_relabel_configs:
|
||||||
|
- target_label: "foo"
|
||||||
|
replacement: "aaa"
|
||||||
|
|||||||
@@ -1,19 +0,0 @@
|
|||||||
package notifier
|
|
||||||
|
|
||||||
// ApiNotifier represents a Notifier configuration for WEB view
|
|
||||||
type ApiNotifier struct {
|
|
||||||
// Kind is a Notifier type
|
|
||||||
Kind TargetType `json:"kind"`
|
|
||||||
// Targets is a list of Notifier targets
|
|
||||||
Targets []*ApiTarget `json:"targets"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ApiTarget represents a specific Notifier target for WEB view
|
|
||||||
type ApiTarget struct {
|
|
||||||
// Address is a URL for sending notifications
|
|
||||||
Address string `json:"address"`
|
|
||||||
// Labels is a list of labels to add to each sent notification
|
|
||||||
Labels map[string]string `json:"labels"`
|
|
||||||
// LastError contains the error faced while sending to notifier.
|
|
||||||
LastError string `json:"lastError"`
|
|
||||||
}
|
|
||||||
@@ -14,9 +14,9 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect. "+
|
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect."+
|
||||||
"Remote read is used to restore alerts state. "+
|
"Remote read is used to restore alerts state."+
|
||||||
"This configuration makes sense only if vmalert was configured with '-remoteWrite.url' before and has been successfully persisted its state. "+
|
"This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state. "+
|
||||||
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
|
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
|
||||||
"See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.")
|
"See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.")
|
||||||
|
|
||||||
|
|||||||
@@ -13,18 +13,14 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/cespare/xxhash/v2"
|
|
||||||
"github.com/golang/snappy"
|
"github.com/golang/snappy"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/metrics"
|
"github.com/VictoriaMetrics/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -118,9 +114,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i := 0; i < cc; i++ {
|
for i := 0; i < cc; i++ {
|
||||||
c.wg.Go(func() {
|
c.run(ctx)
|
||||||
c.run(ctx, i)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
return c, nil
|
return c, nil
|
||||||
}
|
}
|
||||||
@@ -162,7 +156,8 @@ func (c *Client) Close() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) run(ctx context.Context, id int) {
|
func (c *Client) run(ctx context.Context) {
|
||||||
|
ticker := time.NewTicker(c.flushInterval)
|
||||||
wr := &prompb.WriteRequest{}
|
wr := &prompb.WriteRequest{}
|
||||||
shutdown := func() {
|
shutdown := func() {
|
||||||
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
|
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
|
||||||
@@ -178,73 +173,42 @@ func (c *Client) run(ctx context.Context, id int) {
|
|||||||
|
|
||||||
cancel()
|
cancel()
|
||||||
}
|
}
|
||||||
|
c.wg.Add(1)
|
||||||
// add jitter to spread remote write flushes over the flush interval to avoid congestion at the remote write destination
|
go func() {
|
||||||
h := xxhash.Sum64(bytesutil.ToUnsafeBytes(fmt.Sprintf("%d", id)))
|
defer c.wg.Done()
|
||||||
randJitter := uint64(float64(c.flushInterval) * (float64(h) / (1 << 64)))
|
defer ticker.Stop()
|
||||||
timer := time.NewTimer(time.Duration(randJitter))
|
for {
|
||||||
addJitter:
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-c.doneCh:
|
|
||||||
timer.Stop()
|
|
||||||
shutdown()
|
|
||||||
return
|
|
||||||
case <-ctx.Done():
|
|
||||||
timer.Stop()
|
|
||||||
shutdown()
|
|
||||||
return
|
|
||||||
case <-timer.C:
|
|
||||||
break addJitter
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ticker := time.NewTicker(c.flushInterval)
|
|
||||||
defer ticker.Stop()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-c.doneCh:
|
|
||||||
shutdown()
|
|
||||||
return
|
|
||||||
case <-ctx.Done():
|
|
||||||
shutdown()
|
|
||||||
return
|
|
||||||
case <-ticker.C:
|
|
||||||
c.flush(ctx, wr)
|
|
||||||
// drain the potential stale tick to avoid small or empty flushes after a slow flush.
|
|
||||||
select {
|
select {
|
||||||
|
case <-c.doneCh:
|
||||||
|
shutdown()
|
||||||
|
return
|
||||||
|
case <-ctx.Done():
|
||||||
|
shutdown()
|
||||||
|
return
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
default:
|
|
||||||
}
|
|
||||||
case ts, ok := <-c.input:
|
|
||||||
if !ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
wr.Timeseries = append(wr.Timeseries, ts)
|
|
||||||
if len(wr.Timeseries) >= c.maxBatchSize {
|
|
||||||
c.flush(ctx, wr)
|
c.flush(ctx, wr)
|
||||||
|
case ts, ok := <-c.input:
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
wr.Timeseries = append(wr.Timeseries, ts)
|
||||||
|
if len(wr.Timeseries) >= c.maxBatchSize {
|
||||||
|
c.flush(ctx, wr)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
rwErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
rwErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||||
rwTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
|
rwTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
|
||||||
|
|
||||||
// sentRows and sentBytes are historical counters that can now be replaced by flushedRows and flushedBytes histograms. They may be deprecated in the future after the new histograms have been adopted for some time.
|
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
|
||||||
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
|
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
|
||||||
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
|
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
|
||||||
flushedRows = metrics.NewHistogram(`vmalert_remotewrite_sent_rows`)
|
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
|
||||||
flushedBytes = metrics.NewHistogram(`vmalert_remotewrite_sent_bytes`)
|
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
|
||||||
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
|
|
||||||
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
|
|
||||||
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
|
|
||||||
remoteWriteQueueSize = metrics.NewHistogram(`vmalert_remotewrite_queue_size`)
|
|
||||||
|
|
||||||
_ = metrics.NewGauge(`vmalert_remotewrite_queue_capacity`, func() float64 {
|
|
||||||
return float64(*maxQueueSize)
|
|
||||||
})
|
|
||||||
|
|
||||||
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
|
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
|
||||||
return float64(*concurrency)
|
return float64(*concurrency)
|
||||||
@@ -258,7 +222,6 @@ func GetDroppedRows() int { return int(droppedRows.Get()) }
|
|||||||
// it to remote-write endpoint. Flush performs limited amount of retries
|
// it to remote-write endpoint. Flush performs limited amount of retries
|
||||||
// if request fails.
|
// if request fails.
|
||||||
func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
|
func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
|
||||||
remoteWriteQueueSize.Update(float64(len(c.input)))
|
|
||||||
if len(wr.Timeseries) < 1 {
|
if len(wr.Timeseries) < 1 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -268,16 +231,16 @@ func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
|
|||||||
data := wr.MarshalProtobuf(nil)
|
data := wr.MarshalProtobuf(nil)
|
||||||
b := snappy.Encode(nil, data)
|
b := snappy.Encode(nil, data)
|
||||||
|
|
||||||
maxRetryInterval := *retryMaxTime
|
retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
|
||||||
bt := timeutil.NewBackoffTimer(*retryMinInterval, maxRetryInterval)
|
if retryInterval > maxRetryInterval {
|
||||||
|
retryInterval = maxRetryInterval
|
||||||
|
}
|
||||||
timeStart := time.Now()
|
timeStart := time.Now()
|
||||||
defer func() {
|
defer func() {
|
||||||
sendDuration.Add(time.Since(timeStart).Seconds())
|
sendDuration.Add(time.Since(timeStart).Seconds())
|
||||||
}()
|
}()
|
||||||
|
|
||||||
attempts := 0
|
|
||||||
L:
|
L:
|
||||||
for {
|
for attempts := 0; ; attempts++ {
|
||||||
err := c.send(ctx, b)
|
err := c.send(ctx, b)
|
||||||
if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) {
|
if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) {
|
||||||
// Something in the middle between client and destination might be closing
|
// Something in the middle between client and destination might be closing
|
||||||
@@ -287,8 +250,6 @@ L:
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
sentRows.Add(len(wr.Timeseries))
|
sentRows.Add(len(wr.Timeseries))
|
||||||
sentBytes.Add(len(b))
|
sentBytes.Add(len(b))
|
||||||
flushedRows.Update(float64(len(wr.Timeseries)))
|
|
||||||
flushedBytes.Update(float64(len(b)))
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -314,13 +275,13 @@ L:
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
if bt.CurrentDelay() > timeLeftForRetries {
|
if retryInterval > timeLeftForRetries {
|
||||||
bt.SetDelay(timeLeftForRetries)
|
retryInterval = timeLeftForRetries
|
||||||
}
|
}
|
||||||
// sleeping to prevent remote db hammering
|
// sleeping to prevent remote db hammering
|
||||||
bt.Wait(ctx.Done())
|
time.Sleep(retryInterval)
|
||||||
|
retryInterval *= 2
|
||||||
|
|
||||||
attempts++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rwErrors.Inc()
|
rwErrors.Inc()
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ func TestClient_Push(t *testing.T) {
|
|||||||
|
|
||||||
r := rand.New(rand.NewSource(1))
|
r := rand.New(rand.NewSource(1))
|
||||||
const rowsN = int(1e4)
|
const rowsN = int(1e4)
|
||||||
for range rowsN {
|
for i := 0; i < rowsN; i++ {
|
||||||
s := prompb.TimeSeries{
|
s := prompb.TimeSeries{
|
||||||
Samples: []prompb.Sample{{
|
Samples: []prompb.Sample{{
|
||||||
Value: r.Float64(),
|
Value: r.Float64(),
|
||||||
@@ -102,7 +102,7 @@ func TestClient_run_maxBatchSizeDuringShutdown(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// push time series to the client.
|
// push time series to the client.
|
||||||
for range pushCnt {
|
for i := 0; i < pushCnt; i++ {
|
||||||
if err = rwClient.Push(prompb.TimeSeries{}); err != nil {
|
if err = rwClient.Push(prompb.TimeSeries{}); err != nil {
|
||||||
t.Fatalf("cannot time series to the client: %s", err)
|
t.Fatalf("cannot time series to the client: %s", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ func TestDebugClient_Push(t *testing.T) {
|
|||||||
|
|
||||||
const rowsN = 100
|
const rowsN = 100
|
||||||
var sent int
|
var sent int
|
||||||
for i := range rowsN {
|
for i := 0; i < rowsN; i++ {
|
||||||
s := prompb.TimeSeries{
|
s := prompb.TimeSeries{
|
||||||
Samples: []prompb.Sample{{
|
Samples: []prompb.Sample{{
|
||||||
Value: float64(i),
|
Value: float64(i),
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package rule
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash/fnv"
|
"hash/fnv"
|
||||||
"math"
|
"math"
|
||||||
@@ -188,54 +187,6 @@ func (ar *AlertingRule) ID() uint64 {
|
|||||||
return ar.RuleID
|
return ar.RuleID
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToAPI returns ApiRule representation of ar
|
|
||||||
func (ar *AlertingRule) ToAPI() ApiRule {
|
|
||||||
state := ar.state
|
|
||||||
lastState := state.getLast()
|
|
||||||
r := ApiRule{
|
|
||||||
Type: TypeAlerting,
|
|
||||||
DatasourceType: ar.Type.String(),
|
|
||||||
Name: ar.Name,
|
|
||||||
Query: ar.Expr,
|
|
||||||
Duration: ar.For.Seconds(),
|
|
||||||
KeepFiringFor: ar.KeepFiringFor.Seconds(),
|
|
||||||
Labels: ar.Labels,
|
|
||||||
Annotations: ar.Annotations,
|
|
||||||
LastEvaluation: lastState.Time,
|
|
||||||
EvaluationTime: lastState.Duration.Seconds(),
|
|
||||||
Health: "ok",
|
|
||||||
State: "inactive",
|
|
||||||
Alerts: ar.AlertsToAPI(),
|
|
||||||
LastSamples: lastState.Samples,
|
|
||||||
LastSeriesFetched: lastState.SeriesFetched,
|
|
||||||
MaxUpdates: state.size(),
|
|
||||||
Updates: state.getAll(),
|
|
||||||
Debug: ar.Debug,
|
|
||||||
|
|
||||||
// encode as strings to avoid rounding in JSON
|
|
||||||
ID: fmt.Sprintf("%d", ar.ID()),
|
|
||||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
|
||||||
GroupName: ar.GroupName,
|
|
||||||
File: ar.File,
|
|
||||||
}
|
|
||||||
if lastState.Err != nil {
|
|
||||||
r.LastError = lastState.Err.Error()
|
|
||||||
r.Health = "err"
|
|
||||||
}
|
|
||||||
// satisfy apiRule.State logic
|
|
||||||
if len(r.Alerts) > 0 {
|
|
||||||
r.State = notifier.StatePending.String()
|
|
||||||
stateFiring := notifier.StateFiring.String()
|
|
||||||
for _, a := range r.Alerts {
|
|
||||||
if a.State == stateFiring {
|
|
||||||
r.State = stateFiring
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetAlerts returns active alerts of rule
|
// GetAlerts returns active alerts of rule
|
||||||
func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
|
func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
|
||||||
ar.alertsMu.RLock()
|
ar.alertsMu.RLock()
|
||||||
@@ -247,6 +198,16 @@ func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
|
|||||||
return alerts
|
return alerts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetAlert returns alert if id exists
|
||||||
|
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
|
||||||
|
ar.alertsMu.RLock()
|
||||||
|
defer ar.alertsMu.RUnlock()
|
||||||
|
if ar.alerts == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return ar.alerts[id]
|
||||||
|
}
|
||||||
|
|
||||||
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) {
|
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) {
|
||||||
if !ar.Debug {
|
if !ar.Debug {
|
||||||
return
|
return
|
||||||
@@ -312,11 +273,6 @@ type labelSet struct {
|
|||||||
// On k conflicts in origin set, the original value is preferred and copied
|
// On k conflicts in origin set, the original value is preferred and copied
|
||||||
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
|
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
|
||||||
func (ls *labelSet) add(k, v string) {
|
func (ls *labelSet) add(k, v string) {
|
||||||
// do not add label with empty value, since it has no meaning.
|
|
||||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
|
|
||||||
if v == "" {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
ls.processed[k] = v
|
ls.processed[k] = v
|
||||||
ov, ok := ls.origin[k]
|
ov, ok := ls.origin[k]
|
||||||
if !ok {
|
if !ok {
|
||||||
@@ -346,13 +302,14 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
|||||||
ls.processed[l.Name] = l.Value
|
ls.processed[l.Name] = l.Value
|
||||||
}
|
}
|
||||||
|
|
||||||
// labels only support limited templating variables,
|
|
||||||
// including `labels`, `value` and `expr`, to avoid breaking alert states or causing cardinality issue with results
|
|
||||||
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
|
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
|
||||||
Labels: ls.origin,
|
Labels: ls.origin,
|
||||||
Value: m.Values[0],
|
Value: m.Values[0],
|
||||||
Expr: ar.Expr,
|
Expr: ar.Expr,
|
||||||
})
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to expand labels: %w", err)
|
||||||
|
}
|
||||||
for k, v := range extraLabels {
|
for k, v := range extraLabels {
|
||||||
ls.add(k, v)
|
ls.add(k, v)
|
||||||
}
|
}
|
||||||
@@ -363,7 +320,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
|||||||
if !*disableAlertGroupLabel && ar.GroupName != "" {
|
if !*disableAlertGroupLabel && ar.GroupName != "" {
|
||||||
ls.add(alertGroupNameLabel, ar.GroupName)
|
ls.add(alertGroupNameLabel, ar.GroupName)
|
||||||
}
|
}
|
||||||
return ls, err
|
return ls, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// execRange executes alerting rule on the given time range similarly to exec.
|
// execRange executes alerting rule on the given time range similarly to exec.
|
||||||
@@ -384,12 +341,16 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
|||||||
return []datasource.Metric{{Timestamps: []int64{0}, Values: []float64{math.NaN()}}}, nil
|
return []datasource.Metric{{Timestamps: []int64{0}, Values: []float64{math.NaN()}}}, nil
|
||||||
}
|
}
|
||||||
for _, s := range res.Data {
|
for _, s := range res.Data {
|
||||||
ls, err := ar.expandLabelTemplates(s, qFn)
|
ls, err := ar.expandLabelTemplates(s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
alertID := hash(ls.processed)
|
alertID := hash(ls.processed)
|
||||||
a := ar.newAlert(s, time.Time{}, ls.processed, nil) // initial alert
|
as, err := ar.expandAnnotationTemplates(s, qFn, time.Time{}, ls)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
a := ar.newAlert(s, time.Time{}, ls.processed, as) // initial alert
|
||||||
|
|
||||||
prevT := time.Time{}
|
prevT := time.Time{}
|
||||||
for i := range s.Values {
|
for i := range s.Values {
|
||||||
@@ -405,6 +366,8 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
|||||||
// reset to Pending if there are gaps > EvalInterval between DPs
|
// reset to Pending if there are gaps > EvalInterval between DPs
|
||||||
a.State = notifier.StatePending
|
a.State = notifier.StatePending
|
||||||
a.ActiveAt = at
|
a.ActiveAt = at
|
||||||
|
// re-template the annotations as active timestamp is changed
|
||||||
|
a.Annotations, _ = ar.expandAnnotationTemplates(s, qFn, at, ls)
|
||||||
a.Start = time.Time{}
|
a.Start = time.Time{}
|
||||||
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
|
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
|
||||||
a.State = notifier.StateFiring
|
a.State = notifier.StateFiring
|
||||||
@@ -450,7 +413,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
ar.state.add(curState)
|
ar.state.add(curState)
|
||||||
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
|
if curState.Err != nil {
|
||||||
ar.metrics.errors.Inc()
|
ar.metrics.errors.Inc()
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@@ -459,8 +422,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||||||
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
isPartial := isPartialResponse(res)
|
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartialResponse(res))
|
||||||
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartial)
|
|
||||||
qFn := func(query string) ([]datasource.Metric, error) {
|
qFn := func(query string) ([]datasource.Metric, error) {
|
||||||
res, _, err := ar.q.Query(ctx, query, ts)
|
res, _, err := ar.q.Query(ctx, query, ts)
|
||||||
return res.Data, err
|
return res.Data, err
|
||||||
@@ -472,11 +434,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||||||
expandedLabels := make([]*labelSet, len(res.Data))
|
expandedLabels := make([]*labelSet, len(res.Data))
|
||||||
expandedAnnotations := make([]map[string]string, len(res.Data))
|
expandedAnnotations := make([]map[string]string, len(res.Data))
|
||||||
for i, m := range res.Data {
|
for i, m := range res.Data {
|
||||||
ls, err := ar.expandLabelTemplates(m, qFn)
|
ls, err := ar.expandLabelTemplates(m)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// only set error in current state, but do not break alert processing
|
|
||||||
curState.Err = err
|
curState.Err = err
|
||||||
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
|
return nil, curState.Err
|
||||||
}
|
}
|
||||||
at := ts
|
at := ts
|
||||||
alertID := hash(ls.processed)
|
alertID := hash(ls.processed)
|
||||||
@@ -486,11 +447,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||||||
at = a.ActiveAt
|
at = a.ActiveAt
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls, isPartial)
|
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// only set error in current state, but do not break alert processing
|
|
||||||
curState.Err = err
|
curState.Err = err
|
||||||
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
|
return nil, curState.Err
|
||||||
}
|
}
|
||||||
expandedLabels[i] = ls
|
expandedLabels[i] = ls
|
||||||
expandedAnnotations[i] = as
|
expandedAnnotations[i] = as
|
||||||
@@ -596,29 +556,31 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||||||
return append(tss, ar.toTimeSeries(ts.Unix())...), nil
|
return append(tss, ar.toTimeSeries(ts.Unix())...), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
|
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric) (*labelSet, error) {
|
||||||
|
qFn := func(_ string) ([]datasource.Metric, error) {
|
||||||
|
return nil, fmt.Errorf("`query` template isn't supported in rule label")
|
||||||
|
}
|
||||||
ls, err := ar.toLabels(m, qFn)
|
ls, err := ar.toLabels(m, qFn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return ls, fmt.Errorf("failed to expand label templates: %s", err)
|
return nil, fmt.Errorf("failed to expand label templates: %s", err)
|
||||||
}
|
}
|
||||||
return ls, nil
|
return ls, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet, isPartial bool) (map[string]string, error) {
|
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet) (map[string]string, error) {
|
||||||
tplData := notifier.AlertTplData{
|
tplData := notifier.AlertTplData{
|
||||||
Value: m.Values[0],
|
Value: m.Values[0],
|
||||||
Type: ar.Type.String(),
|
Type: ar.Type.String(),
|
||||||
Labels: ls.origin,
|
Labels: ls.origin,
|
||||||
Expr: ar.Expr,
|
Expr: ar.Expr,
|
||||||
AlertID: hash(ls.processed),
|
AlertID: hash(ls.processed),
|
||||||
GroupID: ar.GroupID,
|
GroupID: ar.GroupID,
|
||||||
ActiveAt: activeAt,
|
ActiveAt: activeAt,
|
||||||
For: ar.For,
|
For: ar.For,
|
||||||
IsPartial: isPartial,
|
|
||||||
}
|
}
|
||||||
as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData)
|
as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return as, fmt.Errorf("failed to expand annotation templates: %s", err)
|
return nil, fmt.Errorf("failed to expand annotation templates: %s", err)
|
||||||
}
|
}
|
||||||
return as, nil
|
return as, nil
|
||||||
}
|
}
|
||||||
@@ -818,9 +780,7 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
|
|||||||
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
|
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
|
||||||
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
|
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
|
||||||
|
|
||||||
// query ALERTS_FOR_STATE at `ts-1s` instead `ts` to avoid retrieving data written in the current run,
|
res, _, err := q.Query(ctx, expr, ts)
|
||||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10335
|
|
||||||
res, _, err := q.Query(ctx, expr, ts.Add(-1*time.Second))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
|
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,106 +0,0 @@
|
|||||||
//go:build synctest
|
|
||||||
|
|
||||||
package rule
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
"testing/synctest"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
|
||||||
)
|
|
||||||
|
|
||||||
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
|
|
||||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
|
|
||||||
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
|
|
||||||
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
|
|
||||||
// wrap into synctest because of time manipulations
|
|
||||||
synctest.Test(t, func(t *testing.T) {
|
|
||||||
fq := &datasource.FakeQuerier{}
|
|
||||||
|
|
||||||
ar := &AlertingRule{
|
|
||||||
Name: "TestActiveAtPreservation",
|
|
||||||
Labels: map[string]string{
|
|
||||||
"test_query_in_label": `{{ "static_value" }}`,
|
|
||||||
},
|
|
||||||
Annotations: map[string]string{
|
|
||||||
"description": "Alert active since {{ $activeAt }}",
|
|
||||||
},
|
|
||||||
alerts: make(map[uint64]*notifier.Alert),
|
|
||||||
q: fq,
|
|
||||||
state: &ruleState{
|
|
||||||
entries: make([]StateEntry, 10),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mock query result - return empty result to make suppress_for_mass_alert = false
|
|
||||||
// (no need to add anything to fq for empty result)
|
|
||||||
|
|
||||||
// Add a metric that should trigger the alert
|
|
||||||
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
|
|
||||||
|
|
||||||
// First execution - creates new alert
|
|
||||||
ts1 := time.Now()
|
|
||||||
_, err := ar.exec(context.TODO(), ts1, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error on first exec: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(ar.alerts) != 1 {
|
|
||||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
|
||||||
}
|
|
||||||
|
|
||||||
firstAlert := ar.GetAlerts()[0]
|
|
||||||
// Verify first execution: activeAt should be ts1 and annotation should reflect it
|
|
||||||
if !firstAlert.ActiveAt.Equal(ts1) {
|
|
||||||
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
|
|
||||||
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
|
|
||||||
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
|
|
||||||
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Second execution - should preserve activeAt in annotation
|
|
||||||
|
|
||||||
// Ensure different timestamp with different seconds
|
|
||||||
// sleep is non-blocking thanks to synctest
|
|
||||||
time.Sleep(2 * time.Second)
|
|
||||||
ts2 := time.Now()
|
|
||||||
_, err = ar.exec(context.TODO(), ts2, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error on second exec: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the alert again (should be the same alert)
|
|
||||||
if len(ar.alerts) != 1 {
|
|
||||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
|
||||||
}
|
|
||||||
secondAlert := ar.GetAlerts()[0]
|
|
||||||
|
|
||||||
// Critical test: activeAt should still be ts1, not ts2
|
|
||||||
if !secondAlert.ActiveAt.Equal(ts1) {
|
|
||||||
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Critical test: annotation should still contain ts1 time, not ts2
|
|
||||||
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
|
|
||||||
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Additional verification: annotation should NOT contain ts2 time
|
|
||||||
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
|
|
||||||
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
|
|
||||||
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify query template in labels still works (this would fail if query templates were broken)
|
|
||||||
if firstAlert.Labels["test_query_in_label"] != "static_value" {
|
|
||||||
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
@@ -663,7 +663,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
|
|||||||
Name: "for-pending",
|
Name: "for-pending",
|
||||||
Type: config.NewPrometheusType().String(),
|
Type: config.NewPrometheusType().String(),
|
||||||
Labels: map[string]string{"alertname": "for-pending"},
|
Labels: map[string]string{"alertname": "for-pending"},
|
||||||
Annotations: map[string]string{},
|
Annotations: map[string]string{"activeAt": "5000"},
|
||||||
State: notifier.StatePending,
|
State: notifier.StatePending,
|
||||||
ActiveAt: time.Unix(5, 0),
|
ActiveAt: time.Unix(5, 0),
|
||||||
Value: 1,
|
Value: 1,
|
||||||
@@ -683,7 +683,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
|
|||||||
Name: "for-firing",
|
Name: "for-firing",
|
||||||
Type: config.NewPrometheusType().String(),
|
Type: config.NewPrometheusType().String(),
|
||||||
Labels: map[string]string{"alertname": "for-firing"},
|
Labels: map[string]string{"alertname": "for-firing"},
|
||||||
Annotations: map[string]string{},
|
Annotations: map[string]string{"activeAt": "1000"},
|
||||||
State: notifier.StateFiring,
|
State: notifier.StateFiring,
|
||||||
ActiveAt: time.Unix(1, 0),
|
ActiveAt: time.Unix(1, 0),
|
||||||
Start: time.Unix(5, 0),
|
Start: time.Unix(5, 0),
|
||||||
@@ -704,7 +704,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
|
|||||||
Name: "for-hold-pending",
|
Name: "for-hold-pending",
|
||||||
Type: config.NewPrometheusType().String(),
|
Type: config.NewPrometheusType().String(),
|
||||||
Labels: map[string]string{"alertname": "for-hold-pending"},
|
Labels: map[string]string{"alertname": "for-hold-pending"},
|
||||||
Annotations: map[string]string{},
|
Annotations: map[string]string{"activeAt": "5000"},
|
||||||
State: notifier.StatePending,
|
State: notifier.StatePending,
|
||||||
ActiveAt: time.Unix(5, 0),
|
ActiveAt: time.Unix(5, 0),
|
||||||
Value: 1,
|
Value: 1,
|
||||||
@@ -826,9 +826,12 @@ func TestGroup_Restore(t *testing.T) {
|
|||||||
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
||||||
fg.Init()
|
fg.Init()
|
||||||
wg := sync.WaitGroup{}
|
wg := sync.WaitGroup{}
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
fg.Start(context.Background(), nil, fqr)
|
go func() {
|
||||||
})
|
nts := func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} }
|
||||||
|
fg.Start(context.Background(), nts, nil, fqr)
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
fg.Close()
|
fg.Close()
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
@@ -1119,7 +1122,7 @@ func TestAlertingRuleLimit_Success(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestAlertingRule_Template(t *testing.T) {
|
func TestAlertingRule_Template(t *testing.T) {
|
||||||
f := func(rule *AlertingRule, metrics []datasource.Metric, isResponsePartial bool, alertsExpected map[uint64]*notifier.Alert) {
|
f := func(rule *AlertingRule, metrics []datasource.Metric, alertsExpected map[uint64]*notifier.Alert) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
fakeGroup := Group{
|
fakeGroup := Group{
|
||||||
@@ -1132,7 +1135,6 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
entries: make([]StateEntry, 10),
|
entries: make([]StateEntry, 10),
|
||||||
}
|
}
|
||||||
fq.Add(metrics...)
|
fq.Add(metrics...)
|
||||||
fq.SetPartialResponse(isResponsePartial)
|
|
||||||
|
|
||||||
if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
||||||
t.Fatalf("unexpected error: %s", err)
|
t.Fatalf("unexpected error: %s", err)
|
||||||
@@ -1163,7 +1165,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
}, []datasource.Metric{
|
}, []datasource.Metric{
|
||||||
metricWithValueAndLabels(t, 1, "instance", "foo"),
|
metricWithValueAndLabels(t, 1, "instance", "foo"),
|
||||||
metricWithValueAndLabels(t, 1, "instance", "bar"),
|
metricWithValueAndLabels(t, 1, "instance", "bar"),
|
||||||
}, false, map[uint64]*notifier.Alert{
|
}, map[uint64]*notifier.Alert{
|
||||||
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
|
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
"summary": `common: Too high connection number for "foo"`,
|
"summary": `common: Too high connection number for "foo"`,
|
||||||
@@ -1192,14 +1194,14 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
"instance": "{{ $labels.instance }}",
|
"instance": "{{ $labels.instance }}",
|
||||||
},
|
},
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}".{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
|
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}"`,
|
||||||
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
|
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
|
||||||
},
|
},
|
||||||
alerts: make(map[uint64]*notifier.Alert),
|
alerts: make(map[uint64]*notifier.Alert),
|
||||||
}, []datasource.Metric{
|
}, []datasource.Metric{
|
||||||
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
|
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
|
||||||
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
|
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
|
||||||
}, false, map[uint64]*notifier.Alert{
|
}, map[uint64]*notifier.Alert{
|
||||||
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
|
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
alertNameLabel: "override label",
|
alertNameLabel: "override label",
|
||||||
@@ -1207,7 +1209,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
"instance": "foo",
|
"instance": "foo",
|
||||||
},
|
},
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
"summary": `first: Too high connection number for "foo".`,
|
"summary": `first: Too high connection number for "foo"`,
|
||||||
"description": `override: It is 2 connections for "foo"`,
|
"description": `override: It is 2 connections for "foo"`,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -1218,7 +1220,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
"instance": "bar",
|
"instance": "bar",
|
||||||
},
|
},
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
"summary": `second: Too high connection number for "bar".`,
|
"summary": `second: Too high connection number for "bar"`,
|
||||||
"description": `override: It is 10 connections for "bar"`,
|
"description": `override: It is 10 connections for "bar"`,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -1231,7 +1233,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
"instance": "{{ $labels.instance }}",
|
"instance": "{{ $labels.instance }}",
|
||||||
},
|
},
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}.{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
|
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
|
||||||
},
|
},
|
||||||
alerts: make(map[uint64]*notifier.Alert),
|
alerts: make(map[uint64]*notifier.Alert),
|
||||||
}, []datasource.Metric{
|
}, []datasource.Metric{
|
||||||
@@ -1239,7 +1241,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
alertNameLabel, "originAlertname",
|
alertNameLabel, "originAlertname",
|
||||||
alertGroupNameLabel, "originGroupname",
|
alertGroupNameLabel, "originGroupname",
|
||||||
"instance", "foo"),
|
"instance", "foo"),
|
||||||
}, true, map[uint64]*notifier.Alert{
|
}, map[uint64]*notifier.Alert{
|
||||||
hash(map[string]string{
|
hash(map[string]string{
|
||||||
alertNameLabel: "OriginLabels",
|
alertNameLabel: "OriginLabels",
|
||||||
"exported_alertname": "originAlertname",
|
"exported_alertname": "originAlertname",
|
||||||
@@ -1255,7 +1257,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
|||||||
"instance": "foo",
|
"instance": "foo",
|
||||||
},
|
},
|
||||||
Annotations: map[string]string{
|
Annotations: map[string]string{
|
||||||
"summary": `Alert "originAlertname(originGroupname)" for instance foo. WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.`,
|
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
@@ -1370,10 +1372,8 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
|||||||
|
|
||||||
ar := &AlertingRule{
|
ar := &AlertingRule{
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
"instance": "override", // this should override instance with new value
|
"instance": "override", // this should override instance with new value
|
||||||
"group": "vmalert", // this shouldn't have effect since value in metric is equal
|
"group": "vmalert", // this shouldn't have effect since value in metric is equal
|
||||||
"invalid_label": "{{ .Values.mustRuntimeFail }}",
|
|
||||||
"empty_label": "", // this should be dropped
|
|
||||||
},
|
},
|
||||||
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
|
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
|
||||||
Name: "AlertingRulesError",
|
Name: "AlertingRulesError",
|
||||||
@@ -1381,11 +1381,10 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
expectedOriginLabels := map[string]string{
|
expectedOriginLabels := map[string]string{
|
||||||
"instance": "0.0.0.0:8800",
|
"instance": "0.0.0.0:8800",
|
||||||
"group": "vmalert",
|
"group": "vmalert",
|
||||||
"alertname": "ConfigurationReloadFailure",
|
"alertname": "ConfigurationReloadFailure",
|
||||||
"alertgroup": "vmalert",
|
"alertgroup": "vmalert",
|
||||||
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
expectedProcessedLabels := map[string]string{
|
expectedProcessedLabels := map[string]string{
|
||||||
@@ -1395,12 +1394,11 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
|||||||
"exported_alertname": "ConfigurationReloadFailure",
|
"exported_alertname": "ConfigurationReloadFailure",
|
||||||
"group": "vmalert",
|
"group": "vmalert",
|
||||||
"alertgroup": "vmalert",
|
"alertgroup": "vmalert",
|
||||||
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ls, err := ar.toLabels(metric, nil)
|
ls, err := ar.toLabels(metric, nil)
|
||||||
if err == nil || !strings.Contains(err.Error(), "error evaluating template") {
|
if err != nil {
|
||||||
t.Fatalf("unexpected error %q", err.Error())
|
t.Fatalf("unexpected error: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
|
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
|
||||||
@@ -1431,50 +1429,3 @@ func TestAlertingRuleExec_Partial(t *testing.T) {
|
|||||||
t.Fatalf("unexpected error: %s", err)
|
t.Fatalf("unexpected error: %s", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAlertingRule_QueryTemplateInLabels(t *testing.T) {
|
|
||||||
fq := &datasource.FakeQuerier{}
|
|
||||||
fakeGroup := Group{
|
|
||||||
Name: "TestQueryTemplateInLabels",
|
|
||||||
}
|
|
||||||
|
|
||||||
ar := &AlertingRule{
|
|
||||||
Name: "test_alert",
|
|
||||||
Labels: map[string]string{
|
|
||||||
"suppress_for_mass_alert": `{{ if (printf "ALERTS{alertname='SomeAlert', alertstate='firing', device='%s'} == 1" $labels.device | query) }}true{{ else }}false{{ end }}`,
|
|
||||||
},
|
|
||||||
Annotations: map[string]string{
|
|
||||||
"summary": "Test alert with query template in labels",
|
|
||||||
},
|
|
||||||
alerts: make(map[uint64]*notifier.Alert),
|
|
||||||
}
|
|
||||||
ar.GroupID = fakeGroup.GetID()
|
|
||||||
ar.q = fq
|
|
||||||
ar.state = &ruleState{
|
|
||||||
entries: make([]StateEntry, 10),
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add a metric that should trigger the alert
|
|
||||||
fq.Add(metricWithValueAndLabels(t, 1, "device", "sda1"))
|
|
||||||
|
|
||||||
ts := time.Now()
|
|
||||||
_, err := ar.exec(context.TODO(), ts, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error with query template in labels: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify that the alert was created and the query template was executed
|
|
||||||
if len(ar.alerts) != 1 {
|
|
||||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
|
||||||
}
|
|
||||||
|
|
||||||
alert := ar.GetAlerts()[0]
|
|
||||||
suppressLabel, exists := alert.Labels["suppress_for_mass_alert"]
|
|
||||||
if !exists {
|
|
||||||
t.Fatalf("expected 'suppress_for_mass_alert' label to exist")
|
|
||||||
}
|
|
||||||
// The query template should have been executed (even if it returns false due to mock data)
|
|
||||||
if suppressLabel != "true" && suppressLabel != "false" {
|
|
||||||
t.Fatalf("expected 'suppress_for_mass_alert' label to be 'true' or 'false', got '%s'", suppressLabel)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -2,11 +2,11 @@ package rule
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash/fnv"
|
"hash/fnv"
|
||||||
"maps"
|
|
||||||
"net/url"
|
"net/url"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -25,14 +25,10 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ruleResultsLimit = flag.Int("rule.resultsLimit", 0, "Limits the number of alerts or recording results a single rule can produce. "+
|
|
||||||
"Can be overridden by the limit option under group if specified. "+
|
|
||||||
"If exceeded, the rule will be marked with an error and all its results will be discarded. "+
|
|
||||||
"0 means no limit.")
|
|
||||||
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
||||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
||||||
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier.")
|
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
|
||||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
|
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
|
||||||
"which by default is 4 times evaluationInterval of the parent group")
|
"which by default is 4 times evaluationInterval of the parent group")
|
||||||
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+
|
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+
|
||||||
"Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+
|
"Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+
|
||||||
@@ -40,8 +36,6 @@ var (
|
|||||||
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
||||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+
|
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+
|
||||||
"For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
"For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||||
maxStartDelay = flag.Duration("group.maxStartDelay", 5*time.Minute, "Defines the max delay before starting the group evaluation. Group's start is artificially delayed for random duration on interval"+
|
|
||||||
" [0..min(--group.maxStartDelay, group.interval)]. This helps smoothing out the load on the configured datasource, so evaluations aren't executed too close to each other.")
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Group is an entity for grouping rules
|
// Group is an entity for grouping rules
|
||||||
@@ -98,7 +92,9 @@ type groupMetrics struct {
|
|||||||
// set2 has priority over set1.
|
// set2 has priority over set1.
|
||||||
func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string {
|
func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string {
|
||||||
r := map[string]string{}
|
r := map[string]string{}
|
||||||
maps.Copy(r, set1)
|
for k, v := range set1 {
|
||||||
|
r[k] = v
|
||||||
|
}
|
||||||
for k, v := range set2 {
|
for k, v := range set2 {
|
||||||
if prevV, ok := r[k]; ok {
|
if prevV, ok := r[k]; ok {
|
||||||
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
|
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
|
||||||
@@ -116,6 +112,7 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
|||||||
Name: cfg.Name,
|
Name: cfg.Name,
|
||||||
File: cfg.File,
|
File: cfg.File,
|
||||||
Interval: cfg.Interval.Duration(),
|
Interval: cfg.Interval.Duration(),
|
||||||
|
Limit: cfg.Limit,
|
||||||
Concurrency: cfg.Concurrency,
|
Concurrency: cfg.Concurrency,
|
||||||
checksum: cfg.Checksum,
|
checksum: cfg.Checksum,
|
||||||
Params: cfg.Params,
|
Params: cfg.Params,
|
||||||
@@ -132,11 +129,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
|||||||
if g.Interval == 0 {
|
if g.Interval == 0 {
|
||||||
g.Interval = defaultInterval
|
g.Interval = defaultInterval
|
||||||
}
|
}
|
||||||
if cfg.Limit != nil {
|
|
||||||
g.Limit = *cfg.Limit
|
|
||||||
} else {
|
|
||||||
g.Limit = *ruleResultsLimit
|
|
||||||
}
|
|
||||||
if g.Concurrency < 1 {
|
if g.Concurrency < 1 {
|
||||||
g.Concurrency = 1
|
g.Concurrency = 1
|
||||||
}
|
}
|
||||||
@@ -297,7 +289,7 @@ func (g *Group) InterruptEval() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close stops the group and its rules, unregisters group metrics
|
// Close stops the group and it's rules, unregisters group metrics
|
||||||
func (g *Group) Close() {
|
func (g *Group) Close() {
|
||||||
if g.doneCh == nil {
|
if g.doneCh == nil {
|
||||||
return
|
return
|
||||||
@@ -306,6 +298,10 @@ func (g *Group) Close() {
|
|||||||
g.InterruptEval()
|
g.InterruptEval()
|
||||||
<-g.finishedCh
|
<-g.finishedCh
|
||||||
|
|
||||||
|
g.closeGroupMetrics()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *Group) closeGroupMetrics() {
|
||||||
metrics.UnregisterSet(g.metrics.set, true)
|
metrics.UnregisterSet(g.metrics.set, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -331,13 +327,13 @@ func (g *Group) Init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Start starts group's evaluation
|
// Start starts group's evaluation
|
||||||
func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
|
func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
|
||||||
defer func() { close(g.finishedCh) }()
|
defer func() { close(g.finishedCh) }()
|
||||||
evalTS := time.Now()
|
evalTS := time.Now()
|
||||||
// sleep random duration to spread group rules evaluation
|
// sleep random duration to spread group rules evaluation
|
||||||
// over maxStartDelay to reduce the load on datasource.
|
// over time in order to reduce load on datasource.
|
||||||
if !SkipRandSleepOnGroupStart {
|
if !SkipRandSleepOnGroupStart {
|
||||||
sleepBeforeStart := g.delayBeforeStart(evalTS, *maxStartDelay)
|
sleepBeforeStart := delayBeforeStart(evalTS, g.GetID(), g.Interval, g.EvalOffset)
|
||||||
g.infof("will start in %v", sleepBeforeStart)
|
g.infof("will start in %v", sleepBeforeStart)
|
||||||
|
|
||||||
sleepTimer := time.NewTimer(sleepBeforeStart)
|
sleepTimer := time.NewTimer(sleepBeforeStart)
|
||||||
@@ -369,22 +365,21 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
|||||||
|
|
||||||
e := &executor{
|
e := &executor{
|
||||||
Rw: rw,
|
Rw: rw,
|
||||||
|
Notifiers: nts,
|
||||||
notifierHeaders: g.NotifierHeaders,
|
notifierHeaders: g.NotifierHeaders,
|
||||||
}
|
}
|
||||||
|
|
||||||
g.infof("started")
|
g.infof("started")
|
||||||
|
|
||||||
eval := func(ctx context.Context, ts time.Time) time.Time {
|
eval := func(ctx context.Context, ts time.Time) {
|
||||||
g.metrics.iterationTotal.Inc()
|
g.metrics.iterationTotal.Inc()
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
if len(g.Rules) < 1 {
|
if len(g.Rules) < 1 {
|
||||||
g.metrics.iterationDuration.UpdateDuration(start)
|
g.metrics.iterationDuration.UpdateDuration(start)
|
||||||
g.mu.Lock()
|
|
||||||
g.LastEvaluation = start
|
g.LastEvaluation = start
|
||||||
g.mu.Unlock()
|
return
|
||||||
return ts
|
|
||||||
}
|
}
|
||||||
|
|
||||||
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
|
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
|
||||||
@@ -397,10 +392,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
g.metrics.iterationDuration.UpdateDuration(start)
|
g.metrics.iterationDuration.UpdateDuration(start)
|
||||||
g.mu.Lock()
|
|
||||||
g.LastEvaluation = start
|
g.LastEvaluation = start
|
||||||
g.mu.Unlock()
|
|
||||||
return ts
|
|
||||||
}
|
}
|
||||||
|
|
||||||
evalCtx, cancel := context.WithCancel(ctx)
|
evalCtx, cancel := context.WithCancel(ctx)
|
||||||
@@ -409,15 +401,15 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
|||||||
g.mu.Unlock()
|
g.mu.Unlock()
|
||||||
defer g.evalCancel()
|
defer g.evalCancel()
|
||||||
|
|
||||||
|
eval(evalCtx, evalTS)
|
||||||
|
|
||||||
t := time.NewTicker(g.Interval)
|
t := time.NewTicker(g.Interval)
|
||||||
defer t.Stop()
|
defer t.Stop()
|
||||||
|
|
||||||
realEvalTS := eval(evalCtx, evalTS)
|
|
||||||
|
|
||||||
// restore the rules state after the first evaluation
|
// restore the rules state after the first evaluation
|
||||||
// so only active alerts can be restored.
|
// so only active alerts can be restored.
|
||||||
if rr != nil {
|
if rr != nil {
|
||||||
err := g.restore(ctx, rr, realEvalTS, *remoteReadLookBack)
|
err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
|
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
|
||||||
}
|
}
|
||||||
@@ -480,35 +472,32 @@ func (g *Group) UpdateWith(newGroup *Group) {
|
|||||||
g.updateCh <- newGroup
|
g.updateCh <- newGroup
|
||||||
}
|
}
|
||||||
|
|
||||||
// delayBeforeStart returns duration for delaying the evaluation start
|
// DeepCopy returns a deep copy of group
|
||||||
// based on given ts and Group settings. The delay can't exceed maxDelay.
|
func (g *Group) DeepCopy() *Group {
|
||||||
// maxDelay is ignored if g.EvalOffset != nil.
|
g.mu.RLock()
|
||||||
//
|
data, _ := json.Marshal(g)
|
||||||
// Delaying is important to smooth out the load on the datasource when all groups start at the same time.
|
g.mu.RUnlock()
|
||||||
// delayBeforeStart calculates delay based on Group ID, so all groups will start at different moments of time.
|
newG := Group{}
|
||||||
func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Duration {
|
_ = json.Unmarshal(data, &newG)
|
||||||
if g.EvalOffset != nil {
|
newG.Rules = g.Rules
|
||||||
offset := *g.EvalOffset
|
newG.id = g.id
|
||||||
// adjust the offset for negative evalOffset, the rule is:
|
return &newG
|
||||||
// `eval_offset: -x` is equivalent to `eval_offset: y` for `interval: x+y`.
|
}
|
||||||
// For example, `eval_offset: -6m` is equivalent to `eval_offset: 4m` for `interval: 10m`.
|
|
||||||
if offset < 0 {
|
// if offset is specified, delayBeforeStart returns a duration to help aligning timestamp with offset;
|
||||||
offset += g.Interval
|
// otherwise, it returns a random duration between [0..interval] based on group key.
|
||||||
}
|
func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *time.Duration) time.Duration {
|
||||||
// if offset is specified, ignore the maxDelay and return a duration aligned with offset
|
if offset != nil {
|
||||||
currentOffsetPoint := ts.Truncate(g.Interval).Add(offset)
|
currentOffsetPoint := ts.Truncate(interval).Add(*offset)
|
||||||
if currentOffsetPoint.Before(ts) {
|
if currentOffsetPoint.Before(ts) {
|
||||||
// wait until the next offset point
|
// wait until the next offset point
|
||||||
return currentOffsetPoint.Add(g.Interval).Sub(ts)
|
return currentOffsetPoint.Add(interval).Sub(ts)
|
||||||
}
|
}
|
||||||
return currentOffsetPoint.Sub(ts)
|
return currentOffsetPoint.Sub(ts)
|
||||||
}
|
}
|
||||||
|
|
||||||
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
|
|
||||||
// artificially limit interval, so groups with big intervals could start sooner.
|
|
||||||
interval := min(g.Interval, maxDelay)
|
|
||||||
var randSleep time.Duration
|
var randSleep time.Duration
|
||||||
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64)))
|
randSleep = time.Duration(float64(interval) * (float64(key) / (1 << 64)))
|
||||||
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
|
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
|
||||||
if randSleep < sleepOffset {
|
if randSleep < sleepOffset {
|
||||||
randSleep += interval
|
randSleep += interval
|
||||||
@@ -570,13 +559,15 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
|
|||||||
if !disableProgressBar {
|
if !disableProgressBar {
|
||||||
bar = pb.StartNew(iterations * len(g.Rules))
|
bar = pb.StartNew(iterations * len(g.Rules))
|
||||||
}
|
}
|
||||||
for i := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
rule := g.Rules[i]
|
|
||||||
sem <- struct{}{}
|
sem <- struct{}{}
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
res <- replayRuleRange(rule, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
|
go func(r Rule, ri rangeIterator) {
|
||||||
|
// pass ri as a copy, so it can be modified within the replayRuleRange
|
||||||
|
res <- replayRuleRange(r, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
|
||||||
<-sem
|
<-sem
|
||||||
})
|
wg.Done()
|
||||||
|
}(r, ri)
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
@@ -606,10 +597,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
|
|||||||
res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1)
|
res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1)
|
||||||
for ri.next() {
|
for ri.next() {
|
||||||
sem <- struct{}{}
|
sem <- struct{}{}
|
||||||
start := ri.s
|
wg.Add(1)
|
||||||
end := ri.e
|
|
||||||
wg.Go(func() {
|
go func(s, e time.Time) {
|
||||||
n, err := replayRule(r, start, end, rw, replayRuleRetryAttempts)
|
n, err := replayRule(r, s, e, rw, replayRuleRetryAttempts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Fatalf("rule %q: %s", r, err)
|
logger.Fatalf("rule %q: %s", r, err)
|
||||||
}
|
}
|
||||||
@@ -618,7 +609,8 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
|
|||||||
}
|
}
|
||||||
res <- n
|
res <- n
|
||||||
<-sem
|
<-sem
|
||||||
})
|
wg.Done()
|
||||||
|
}(ri.s, ri.e)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
close(res)
|
close(res)
|
||||||
@@ -632,9 +624,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ExecOnce evaluates all the rules under group for once with given timestamp.
|
// ExecOnce evaluates all the rules under group for once with given timestamp.
|
||||||
func (g *Group) ExecOnce(ctx context.Context, rw remotewrite.RWClient, evalTS time.Time) chan error {
|
func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
|
||||||
e := &executor{
|
e := &executor{
|
||||||
Rw: rw,
|
Rw: rw,
|
||||||
|
Notifiers: nts,
|
||||||
notifierHeaders: g.NotifierHeaders,
|
notifierHeaders: g.NotifierHeaders,
|
||||||
}
|
}
|
||||||
if len(g.Rules) < 1 {
|
if len(g.Rules) < 1 {
|
||||||
@@ -709,6 +702,7 @@ func (g *Group) getEvalDelay() time.Duration {
|
|||||||
|
|
||||||
// executor contains group's notify and rw configs
|
// executor contains group's notify and rw configs
|
||||||
type executor struct {
|
type executor struct {
|
||||||
|
Notifiers func() []notifier.Notifier
|
||||||
notifierHeaders map[string]string
|
notifierHeaders map[string]string
|
||||||
|
|
||||||
Rw remotewrite.RWClient
|
Rw remotewrite.RWClient
|
||||||
@@ -729,13 +723,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
|
|||||||
sem := make(chan struct{}, concurrency)
|
sem := make(chan struct{}, concurrency)
|
||||||
go func() {
|
go func() {
|
||||||
wg := sync.WaitGroup{}
|
wg := sync.WaitGroup{}
|
||||||
for i := range rules {
|
for _, r := range rules {
|
||||||
rule := rules[i]
|
|
||||||
sem <- struct{}{}
|
sem <- struct{}{}
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
res <- e.exec(ctx, rule, ts, resolveDuration, limit)
|
go func(r Rule) {
|
||||||
|
res <- e.exec(ctx, r, ts, resolveDuration, limit)
|
||||||
<-sem
|
<-sem
|
||||||
})
|
wg.Done()
|
||||||
|
}(r)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
close(res)
|
close(res)
|
||||||
@@ -764,7 +759,6 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
|||||||
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
|
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var errG vmalertutil.ErrGroup
|
|
||||||
if e.Rw != nil {
|
if e.Rw != nil {
|
||||||
pushToRW := func(tss []prompb.TimeSeries) error {
|
pushToRW := func(tss []prompb.TimeSeries) error {
|
||||||
var lastErr error
|
var lastErr error
|
||||||
@@ -776,26 +770,31 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
|||||||
return lastErr
|
return lastErr
|
||||||
}
|
}
|
||||||
if err := pushToRW(tss); err != nil {
|
if err := pushToRW(tss); err != nil {
|
||||||
errG.Add(err)
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ar, ok := r.(*AlertingRule)
|
ar, ok := r.(*AlertingRule)
|
||||||
if !ok {
|
if !ok {
|
||||||
return errG.Err()
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
alerts := ar.alertsToSend(resolveDuration, *resendDelay)
|
alerts := ar.alertsToSend(resolveDuration, *resendDelay)
|
||||||
if len(alerts) < 1 {
|
if len(alerts) < 1 {
|
||||||
return errG.Err()
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
notifierErr := notifier.Send(ctx, alerts, e.notifierHeaders)
|
wg := sync.WaitGroup{}
|
||||||
for err := range notifierErr {
|
errGr := new(vmalertutil.ErrGroup)
|
||||||
if err != nil {
|
for _, nt := range e.Notifiers() {
|
||||||
errG.Add(fmt.Errorf("rule %q: notifier failure: %w", r, err))
|
wg.Add(1)
|
||||||
}
|
go func(nt notifier.Notifier) {
|
||||||
|
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
|
||||||
|
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
|
||||||
|
}
|
||||||
|
wg.Done()
|
||||||
|
}(nt)
|
||||||
}
|
}
|
||||||
|
wg.Wait()
|
||||||
return errG.Err()
|
return errGr.Err()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -262,7 +262,7 @@ func TestUpdateDuringRandSleep(t *testing.T) {
|
|||||||
updateCh: make(chan *Group),
|
updateCh: make(chan *Group),
|
||||||
}
|
}
|
||||||
g.Init()
|
g.Init()
|
||||||
go g.Start(context.Background(), nil, nil)
|
go g.Start(context.Background(), nil, nil, nil)
|
||||||
|
|
||||||
rule1 := AlertingRule{
|
rule1 := AlertingRule{
|
||||||
Name: "jobDown",
|
Name: "jobDown",
|
||||||
@@ -346,8 +346,7 @@ func TestGroupStart(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fs := &datasource.FakeQuerier{}
|
fs := &datasource.FakeQuerier{}
|
||||||
fn, cleanup := notifier.InitFakeNotifier()
|
fn := ¬ifier.FakeNotifier{}
|
||||||
defer cleanup()
|
|
||||||
|
|
||||||
const evalInterval = time.Millisecond
|
const evalInterval = time.Millisecond
|
||||||
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
|
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
|
||||||
@@ -396,7 +395,7 @@ func TestGroupStart(t *testing.T) {
|
|||||||
fs.Add(m2)
|
fs.Add(m2)
|
||||||
g.Init()
|
g.Init()
|
||||||
go func() {
|
go func() {
|
||||||
g.Start(context.Background(), nil, fs)
|
g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
|
||||||
close(finished)
|
close(finished)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
@@ -405,8 +404,7 @@ func TestGroupStart(t *testing.T) {
|
|||||||
|
|
||||||
var cur uint64
|
var cur uint64
|
||||||
prev := g.metrics.iterationTotal.Get()
|
prev := g.metrics.iterationTotal.Get()
|
||||||
i := 0
|
for i := 0; ; i++ {
|
||||||
for {
|
|
||||||
if i > 40 {
|
if i > 40 {
|
||||||
t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i)
|
t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i)
|
||||||
}
|
}
|
||||||
@@ -415,7 +413,6 @@ func TestGroupStart(t *testing.T) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
time.Sleep(interval)
|
time.Sleep(interval)
|
||||||
i++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -475,10 +472,15 @@ func TestFaultyNotifier(t *testing.T) {
|
|||||||
r := newTestAlertingRule("instant", 0)
|
r := newTestAlertingRule("instant", 0)
|
||||||
r.q = fq
|
r.q = fq
|
||||||
|
|
||||||
fn, cleanup := notifier.InitFakeNotifier()
|
fn := ¬ifier.FakeNotifier{}
|
||||||
defer cleanup()
|
e := &executor{
|
||||||
|
Notifiers: func() []notifier.Notifier {
|
||||||
e := &executor{}
|
return []notifier.Notifier{
|
||||||
|
¬ifier.FaultyNotifier{},
|
||||||
|
fn,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
delay := 5 * time.Second
|
delay := 5 * time.Second
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), delay)
|
ctx, cancel := context.WithTimeout(context.Background(), delay)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
@@ -551,7 +553,7 @@ func TestCloseWithEvalInterruption(t *testing.T) {
|
|||||||
g := NewGroup(groups[0], fq, evalInterval, nil)
|
g := NewGroup(groups[0], fq, evalInterval, nil)
|
||||||
g.Init()
|
g.Init()
|
||||||
|
|
||||||
go g.Start(context.Background(), nil, nil)
|
go g.Start(context.Background(), nil, nil, nil)
|
||||||
|
|
||||||
time.Sleep(evalInterval * 20)
|
time.Sleep(evalInterval * 20)
|
||||||
|
|
||||||
@@ -569,10 +571,9 @@ func TestCloseWithEvalInterruption(t *testing.T) {
|
|||||||
|
|
||||||
func TestGroupStartDelay(t *testing.T) {
|
func TestGroupStartDelay(t *testing.T) {
|
||||||
g := &Group{}
|
g := &Group{}
|
||||||
g.id = uint64(math.MaxUint64 / 10)
|
|
||||||
// interval of 5min and key generate a static delay of 30s
|
// interval of 5min and key generate a static delay of 30s
|
||||||
g.Interval = time.Minute * 5
|
g.Interval = time.Minute * 5
|
||||||
maxDelay := time.Minute * 5
|
key := uint64(math.MaxUint64 / 10)
|
||||||
|
|
||||||
f := func(atS, expS string) {
|
f := func(atS, expS string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
@@ -584,7 +585,7 @@ func TestGroupStartDelay(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
delay := g.delayBeforeStart(at, maxDelay)
|
delay := delayBeforeStart(at, key, g.Interval, g.EvalOffset)
|
||||||
gotStart := at.Add(delay)
|
gotStart := at.Add(delay)
|
||||||
if expTS != gotStart {
|
if expTS != gotStart {
|
||||||
t.Fatalf("expected to get %v; got %v instead", expTS, gotStart)
|
t.Fatalf("expected to get %v; got %v instead", expTS, gotStart)
|
||||||
@@ -605,24 +606,6 @@ func TestGroupStartDelay(t *testing.T) {
|
|||||||
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
|
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
|
||||||
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||||
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||||
|
|
||||||
// test group with negative offset -2min, which is equivalent to 3min offset for 5min interval
|
|
||||||
offset = -2 * time.Minute
|
|
||||||
g.EvalOffset = &offset
|
|
||||||
|
|
||||||
f("2023-01-01T00:00:15.000+00:00", "2023-01-01T00:03:00.000+00:00")
|
|
||||||
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
|
|
||||||
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
|
||||||
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
|
||||||
|
|
||||||
maxDelay = time.Minute * 1
|
|
||||||
g.EvalOffset = nil
|
|
||||||
|
|
||||||
// test group with maxDelay, and offset disabled
|
|
||||||
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:06.000+00:00")
|
|
||||||
f("2023-01-01T00:00:01.000+00:00", "2023-01-01T00:00:06.000+00:00")
|
|
||||||
f("2023-01-01T00:00:06.100+00:00", "2023-01-01T00:01:06.000+00:00")
|
|
||||||
f("2023-01-01T00:00:11.000+00:00", "2023-01-01T00:01:06.000+00:00")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetPrometheusReqTimestamp(t *testing.T) {
|
func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package rule
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -82,37 +81,6 @@ func (rr *RecordingRule) ID() uint64 {
|
|||||||
return rr.RuleID
|
return rr.RuleID
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToAPI returns ApiRule representation of rr
|
|
||||||
func (rr *RecordingRule) ToAPI() ApiRule {
|
|
||||||
state := rr.state
|
|
||||||
lastState := state.getLast()
|
|
||||||
r := ApiRule{
|
|
||||||
Type: TypeRecording,
|
|
||||||
DatasourceType: rr.Type.String(),
|
|
||||||
Name: rr.Name,
|
|
||||||
Query: rr.Expr,
|
|
||||||
Labels: rr.Labels,
|
|
||||||
LastEvaluation: lastState.Time,
|
|
||||||
EvaluationTime: lastState.Duration.Seconds(),
|
|
||||||
Health: "ok",
|
|
||||||
LastSamples: lastState.Samples,
|
|
||||||
LastSeriesFetched: lastState.SeriesFetched,
|
|
||||||
MaxUpdates: state.size(),
|
|
||||||
Updates: state.getAll(),
|
|
||||||
|
|
||||||
// encode as strings to avoid rounding
|
|
||||||
ID: fmt.Sprintf("%d", rr.ID()),
|
|
||||||
GroupID: fmt.Sprintf("%d", rr.GroupID),
|
|
||||||
GroupName: rr.GroupName,
|
|
||||||
File: rr.File,
|
|
||||||
}
|
|
||||||
if lastState.Err != nil {
|
|
||||||
r.LastError = lastState.Err.Error()
|
|
||||||
r.Health = "err"
|
|
||||||
}
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewRecordingRule creates a new RecordingRule
|
// NewRecordingRule creates a new RecordingRule
|
||||||
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
|
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
|
||||||
debug := group.Debug
|
debug := group.Debug
|
||||||
@@ -198,7 +166,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
|
|||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
rr.state.add(curState)
|
rr.state.add(curState)
|
||||||
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
|
if curState.Err != nil {
|
||||||
rr.metrics.errors.Inc()
|
rr.metrics.errors.Inc()
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@@ -237,8 +205,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
|
|||||||
Labels: stringToLabels(k),
|
Labels: stringToLabels(k),
|
||||||
Samples: []prompb.Sample{
|
Samples: []prompb.Sample{
|
||||||
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
|
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
|
||||||
},
|
}})
|
||||||
})
|
|
||||||
}
|
}
|
||||||
rr.lastEvaluation = curEvaluation
|
rr.lastEvaluation = curEvaluation
|
||||||
return tss, nil
|
return tss, nil
|
||||||
@@ -293,11 +260,6 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompb.TimeSeries {
|
|||||||
}
|
}
|
||||||
// add extra labels configured by user
|
// add extra labels configured by user
|
||||||
for k := range rr.Labels {
|
for k := range rr.Labels {
|
||||||
// do not add label with empty value, since it has no meaning.
|
|
||||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
|
|
||||||
if rr.Labels[k] == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
existingLabel := promrelabel.GetLabelByName(m.Labels, k)
|
existingLabel := promrelabel.GetLabelByName(m.Labels, k)
|
||||||
if existingLabel != nil { // there is a conflict between extra and existing label
|
if existingLabel != nil { // there is a conflict between extra and existing label
|
||||||
if existingLabel.Value == rr.Labels[k] {
|
if existingLabel.Value == rr.Labels[k] {
|
||||||
|
|||||||
@@ -21,8 +21,6 @@ type Rule interface {
|
|||||||
// ID returns unique ID that may be used for
|
// ID returns unique ID that may be used for
|
||||||
// identifying this Rule among others.
|
// identifying this Rule among others.
|
||||||
ID() uint64
|
ID() uint64
|
||||||
// ToAPI returns ApiRule representation of Rule
|
|
||||||
ToAPI() ApiRule
|
|
||||||
// exec executes the rule with given context at the given timestamp and limit.
|
// exec executes the rule with given context at the given timestamp and limit.
|
||||||
// returns an err if number of resulting time series exceeds the limit.
|
// returns an err if number of resulting time series exceeds the limit.
|
||||||
exec(ctx context.Context, ts time.Time, limit int) ([]prompb.TimeSeries, error)
|
exec(ctx context.Context, ts time.Time, limit int) ([]prompb.TimeSeries, error)
|
||||||
@@ -70,6 +68,39 @@ type StateEntry struct {
|
|||||||
Curl string `json:"curl"`
|
Curl string `json:"curl"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetLastEntry returns latest stateEntry of rule
|
||||||
|
func GetLastEntry(r Rule) StateEntry {
|
||||||
|
if rule, ok := r.(*AlertingRule); ok {
|
||||||
|
return rule.state.getLast()
|
||||||
|
}
|
||||||
|
if rule, ok := r.(*RecordingRule); ok {
|
||||||
|
return rule.state.getLast()
|
||||||
|
}
|
||||||
|
return StateEntry{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetRuleStateSize returns size of rule stateEntry
|
||||||
|
func GetRuleStateSize(r Rule) int {
|
||||||
|
if rule, ok := r.(*AlertingRule); ok {
|
||||||
|
return rule.state.size()
|
||||||
|
}
|
||||||
|
if rule, ok := r.(*RecordingRule); ok {
|
||||||
|
return rule.state.size()
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAllRuleState returns rule entire stateEntries
|
||||||
|
func GetAllRuleState(r Rule) []StateEntry {
|
||||||
|
if rule, ok := r.(*AlertingRule); ok {
|
||||||
|
return rule.state.getAll()
|
||||||
|
}
|
||||||
|
if rule, ok := r.(*RecordingRule); ok {
|
||||||
|
return rule.state.getAll()
|
||||||
|
}
|
||||||
|
return []StateEntry{}
|
||||||
|
}
|
||||||
|
|
||||||
func (s *ruleState) size() int {
|
func (s *ruleState) size() int {
|
||||||
s.RLock()
|
s.RLock()
|
||||||
defer s.RUnlock()
|
defer s.RUnlock()
|
||||||
@@ -121,7 +152,7 @@ func (s *ruleState) add(e StateEntry) {
|
|||||||
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
|
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
|
||||||
var err error
|
var err error
|
||||||
var tss []prompb.TimeSeries
|
var tss []prompb.TimeSeries
|
||||||
for i := range replayRuleRetryAttempts {
|
for i := 0; i < replayRuleRetryAttempts; i++ {
|
||||||
tss, err = r.execRange(context.Background(), start, end)
|
tss, err = r.execRange(context.Background(), start, end)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
break
|
break
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ func TestRule_state(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var last time.Time
|
var last time.Time
|
||||||
for range stateEntriesN * 2 {
|
for i := 0; i < stateEntriesN*2; i++ {
|
||||||
last = time.Now()
|
last = time.Now()
|
||||||
r.state.add(StateEntry{At: last})
|
r.state.add(StateEntry{At: last})
|
||||||
}
|
}
|
||||||
@@ -65,15 +65,17 @@ func TestRule_stateConcurrent(_ *testing.T) {
|
|||||||
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
|
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
|
||||||
const workers = 50
|
const workers = 50
|
||||||
const iterations = 100
|
const iterations = 100
|
||||||
var wg sync.WaitGroup
|
wg := sync.WaitGroup{}
|
||||||
for range workers {
|
wg.Add(workers)
|
||||||
wg.Go(func() {
|
for i := 0; i < workers; i++ {
|
||||||
for range iterations {
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for i := 0; i < iterations; i++ {
|
||||||
r.state.add(StateEntry{At: time.Now()})
|
r.state.add(StateEntry{At: time.Now()})
|
||||||
r.state.getAll()
|
r.state.getAll()
|
||||||
r.state.getLast()
|
r.state.getLast()
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,13 +19,13 @@ func CompareRules(t *testing.T, a, b Rule) error {
|
|||||||
case *AlertingRule:
|
case *AlertingRule:
|
||||||
br, ok := b.(*AlertingRule)
|
br, ok := b.(*AlertingRule)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("rule %d supposed to be of type AlertingRule", b.ID())
|
return fmt.Errorf("rule %q supposed to be of type AlertingRule", b.ID())
|
||||||
}
|
}
|
||||||
return compareAlertingRules(t, v, br)
|
return compareAlertingRules(t, v, br)
|
||||||
case *RecordingRule:
|
case *RecordingRule:
|
||||||
br, ok := b.(*RecordingRule)
|
br, ok := b.(*RecordingRule)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("rule %d supposed to be of type RecordingRule", b.ID())
|
return fmt.Errorf("rule %q supposed to be of type RecordingRule", b.ID())
|
||||||
}
|
}
|
||||||
return compareRecordingRules(t, v, br)
|
return compareRecordingRules(t, v, br)
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -34,12 +34,11 @@ body {
|
|||||||
padding-top: 4.5rem;
|
padding-top: 4.5rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.vm-group {
|
.group-items {
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
padding: 5px;
|
padding: 5px;
|
||||||
margin-top: 5px;
|
margin-top: 5px;
|
||||||
position: relative;
|
position: relative;
|
||||||
display: none;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.btn svg, .dropdown-item svg {
|
.btn svg, .dropdown-item svg {
|
||||||
@@ -56,22 +55,14 @@ body {
|
|||||||
height: 38px;
|
height: 38px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.vm-item:not(.vm-found) {
|
.group-items:not(:has(.sub-item:not(.d-none))) {
|
||||||
display: none;
|
display: none !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.vm-group:has(.vm-item:is(.vm-found)), .vm-group:is(.vm-found) {
|
.group-items:hover {
|
||||||
display: flex;
|
|
||||||
}
|
|
||||||
|
|
||||||
.vm-group:hover {
|
|
||||||
background-color: #f8f9fa!important;
|
background-color: #f8f9fa!important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.vm-group:is(.vm-found) .vm-item {
|
|
||||||
display: table-row;
|
|
||||||
}
|
|
||||||
|
|
||||||
.table {
|
.table {
|
||||||
table-layout: fixed;
|
table-layout: fixed;
|
||||||
}
|
}
|
||||||
@@ -120,9 +111,3 @@ textarea.curl-area {
|
|||||||
.w-60 {
|
.w-60 {
|
||||||
width: 60%;
|
width: 60%;
|
||||||
}
|
}
|
||||||
|
|
||||||
.annotations {
|
|
||||||
white-space: pre-wrap;
|
|
||||||
color: gray;
|
|
||||||
word-wrap: break-word;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
<path d="M224.163 175.27a1.9 1.9 0 0 0 2.8 0l6-5.9a2.1 2.1 0 0 0 .2-2.7 1.9 1.9 0 0 0-3-.2l-2.6 2.6v-5.2c0-1.54-1.667-2.502-3-1.732-.619.357-1 1.017-1 1.732v5.2l-2.6-2.6a1.9 1.9 0 0 0-3 .2 2.1 2.1 0 0 0 .2 2.7zm-16.459-23.297h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1m36 4h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1m-16.59-23.517a1.9 1.9 0 0 0-2.8 0l-6 5.9a2.1 2.1 0 0 0-.2 2.7 1.9 1.9 0 0 0 3 .2l2.6-2.6v5.2c0 1.54 1.667 2.502 3 1.732.619-.357 1-1.017 1-1.732v-5.2l2.6 2.6a1.9 1.9 0 0 0 3-.2 2.1 2.1 0 0 0-.2-2.7z"/>
|
<path d="M224.163 175.27a1.9 1.9 0 0 0 2.8 0l6-5.9a2.1 2.1 0 0 0 .2-2.7 1.9 1.9 0 0 0-3-.2l-2.6 2.6v-5.2c0-1.54-1.667-2.502-3-1.732-.619.357-1 1.017-1 1.732v5.2l-2.6-2.6a1.9 1.9 0 0 0-3 .2 2.1 2.1 0 0 0 .2 2.7zm-16.459-23.297h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1m36 4h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1m-16.59-23.517a1.9 1.9 0 0 0-2.8 0l-6 5.9a2.1 2.1 0 0 0-.2 2.7 1.9 1.9 0 0 0 3 .2l2.6-2.6v5.2c0 1.54 1.667 2.502 3 1.732.619-.357 1-1.017 1-1.732v-5.2l2.6 2.6a1.9 1.9 0 0 0 3-.2 2.1 2.1 0 0 0-.2-2.7z"/>
|
||||||
</symbol>
|
</symbol>
|
||||||
|
|
||||||
<symbol id="state" viewBox="-10 -10 320 310">
|
<symbol id="filter" viewBox="-10 -10 320 310">
|
||||||
<path d="M288.953 0h-277c-5.522 0-10 4.478-10 10v49.531c0 5.522 4.478 10 10 10h12.372l91.378 107.397v113.978a10 10 0 0 0 15.547 8.32l49.5-33a10 10 0 0 0 4.453-8.32v-80.978l91.378-107.397h12.372c5.522 0 10-4.478 10-10V10c0-5.522-4.477-10-10-10M167.587 166.77a10 10 0 0 0-2.384 6.48v79.305l-29.5 19.666V173.25a10 10 0 0 0-2.384-6.48L50.585 69.531h199.736zM278.953 49.531h-257V20h257z"/>
|
<path d="M288.953 0h-277c-5.522 0-10 4.478-10 10v49.531c0 5.522 4.478 10 10 10h12.372l91.378 107.397v113.978a10 10 0 0 0 15.547 8.32l49.5-33a10 10 0 0 0 4.453-8.32v-80.978l91.378-107.397h12.372c5.522 0 10-4.478 10-10V10c0-5.522-4.477-10-10-10M167.587 166.77a10 10 0 0 0-2.384 6.48v79.305l-29.5 19.666V173.25a10 10 0 0 0-2.384-6.48L50.585 69.531h199.736zM278.953 49.531h-257V20h257z"/>
|
||||||
</symbol>
|
</symbol>
|
||||||
|
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 4.7 KiB After Width: | Height: | Size: 4.7 KiB |
@@ -8,9 +8,9 @@ function actionAll(isCollapse) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function groupForState(key) {
|
function groupFilter(key) {
|
||||||
if (key) {
|
if (key) {
|
||||||
location.href = `?state=${key}`;
|
location.href = `?filter=${key}`;
|
||||||
} else {
|
} else {
|
||||||
window.location = window.location.pathname;
|
window.location = window.location.pathname;
|
||||||
}
|
}
|
||||||
@@ -65,34 +65,32 @@ function getParamURL(key) {
|
|||||||
return url.searchParams.get(key)
|
return url.searchParams.get(key)
|
||||||
}
|
}
|
||||||
|
|
||||||
function matchText(search, item) {
|
|
||||||
const text = item.innerText.toLowerCase();
|
|
||||||
return text.indexOf(search) >= 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
function filterRules(searchPhrase) {
|
function filterRules(searchPhrase) {
|
||||||
document.querySelectorAll('.vm-group').forEach((group) => {
|
document.querySelectorAll('.sub-items').forEach((rules) => {
|
||||||
if (!searchPhrase) {
|
let found = false;
|
||||||
group.classList.add('vm-found');
|
rules.querySelectorAll('.sub-item').forEach((rule) => {
|
||||||
return;
|
if (searchPhrase) {
|
||||||
}
|
const ruleName = rule.innerText.toLowerCase();
|
||||||
for (const item of group.querySelectorAll('.vm-group-search')) {
|
const matches = []
|
||||||
if (matchText(searchPhrase, item)) {
|
const hasValue = ruleName.indexOf(searchPhrase) >= 0;
|
||||||
group.classList.add('vm-found');
|
rule.querySelectorAll('.label').forEach((label) => {
|
||||||
return;
|
const text = label.innerText.toLowerCase();
|
||||||
|
if (text.indexOf(searchPhrase) >= 0) {
|
||||||
|
matches.push(text);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (!matches.length && !hasValue) {
|
||||||
|
rule.classList.add('d-none');
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
rule.classList.remove('d-none');
|
||||||
group.classList.remove('vm-found');
|
found = true;
|
||||||
for (const item of group.querySelectorAll('.vm-item')) {
|
});
|
||||||
if (matchText(searchPhrase, item)) {
|
if (found && searchPhrase || !searchPhrase) {
|
||||||
item.classList.add('vm-found');
|
rules.classList.remove('d-none');
|
||||||
continue;
|
} else {
|
||||||
}
|
rules.classList.add('d-none');
|
||||||
if (Array.from(item.querySelectorAll('.label')).find(l => matchText(searchPhrase, l))) {
|
|
||||||
item.classList.add('vm-found');
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
item.classList.remove('vm-found');
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -485,12 +485,6 @@ func templateFuncs() textTpl.FuncMap {
|
|||||||
|
|
||||||
/* Helpers */
|
/* Helpers */
|
||||||
|
|
||||||
// now returns the Unix timestamp in seconds at the time of the template evaluation.
|
|
||||||
// For example: {{ (now | toTime).Sub $activeAt }} will return the duration the alert has been active.
|
|
||||||
"now": func() float64 {
|
|
||||||
return float64(time.Now().Unix())
|
|
||||||
},
|
|
||||||
|
|
||||||
// Converts a list of objects to a map with keys arg0, arg1 etc.
|
// Converts a list of objects to a map with keys arg0, arg1 etc.
|
||||||
// This is intended to allow multiple arguments to be passed to templates.
|
// This is intended to allow multiple arguments to be passed to templates.
|
||||||
"args": func(args ...any) map[string]any {
|
"args": func(args ...any) map[string]any {
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ func (eg *ErrGroup) Error() string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
var b strings.Builder
|
var b strings.Builder
|
||||||
fmt.Fprintf(&b, "errors(%d): \n", len(eg.errs))
|
fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
|
||||||
for i, err := range eg.errs {
|
for i, err := range eg.errs {
|
||||||
b.WriteString(err.Error())
|
b.WriteString(err.Error())
|
||||||
if i != len(eg.errs)-1 {
|
if i != len(eg.errs)-1 {
|
||||||
|
|||||||
@@ -30,8 +30,8 @@ func TestErrGroup(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
f(nil, "")
|
f(nil, "")
|
||||||
f([]error{errors.New("timeout")}, "errors(1): \ntimeout")
|
f([]error{errors.New("timeout")}, "errors(1): timeout")
|
||||||
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): \ntimeout\ndeadline")
|
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): timeout\ndeadline")
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestErrGroupConcurrent supposed to test concurrent
|
// TestErrGroupConcurrent supposed to test concurrent
|
||||||
@@ -42,7 +42,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
|
|||||||
|
|
||||||
const writersN = 4
|
const writersN = 4
|
||||||
payload := make(chan error, writersN)
|
payload := make(chan error, writersN)
|
||||||
for range writersN {
|
for i := 0; i < writersN; i++ {
|
||||||
go func() {
|
go func() {
|
||||||
for err := range payload {
|
for err := range payload {
|
||||||
eg.Add(err)
|
eg.Add(err)
|
||||||
@@ -51,7 +51,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const iterations = 500
|
const iterations = 500
|
||||||
for i := range iterations {
|
for i := 0; i < iterations; i++ {
|
||||||
payload <- fmt.Errorf("error %d", i)
|
payload <- fmt.Errorf("error %d", i)
|
||||||
if i%10 == 0 {
|
if i%10 == 0 {
|
||||||
_ = eg.Err()
|
_ = eg.Err()
|
||||||
|
|||||||
@@ -1,11 +1,9 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"cmp"
|
|
||||||
"embed"
|
"embed"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"slices"
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -31,9 +29,7 @@ var (
|
|||||||
{"api/v1/rules", "list all loaded groups and rules"},
|
{"api/v1/rules", "list all loaded groups and rules"},
|
||||||
{"api/v1/alerts", "list all active alerts"},
|
{"api/v1/alerts", "list all active alerts"},
|
||||||
{"api/v1/notifiers", "list all notifiers"},
|
{"api/v1/notifiers", "list all notifiers"},
|
||||||
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamAlertID), "get alert status by group and alert ID"},
|
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", paramGroupID, paramAlertID), "get alert status by group and alert ID"},
|
||||||
{fmt.Sprintf("api/v1/rule?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamRuleID), "get rule status by group and rule ID"},
|
|
||||||
{fmt.Sprintf("api/v1/group?%s=<int>", rule.ParamGroupID), "get group status by group ID"},
|
|
||||||
}
|
}
|
||||||
systemLinks = [][2]string{
|
systemLinks = [][2]string{
|
||||||
{"vmalert/groups", "UI"},
|
{"vmalert/groups", "UI"},
|
||||||
@@ -49,16 +45,9 @@ var (
|
|||||||
{Name: "Docs", URL: "https://docs.victoriametrics.com/victoriametrics/vmalert/"},
|
{Name: "Docs", URL: "https://docs.victoriametrics.com/victoriametrics/vmalert/"},
|
||||||
}
|
}
|
||||||
ruleTypeMap = map[string]string{
|
ruleTypeMap = map[string]string{
|
||||||
"alert": rule.TypeAlerting,
|
"alert": ruleTypeAlerting,
|
||||||
"record": rule.TypeRecording,
|
"record": ruleTypeRecording,
|
||||||
}
|
}
|
||||||
|
|
||||||
// The "recovering", "noData", "normal", "error" states are used by Grafana.
|
|
||||||
// Ignore "recovering" since it is not currently acknowledged by vmalert,
|
|
||||||
// treat "noData" as an alias for "nomatch",
|
|
||||||
// treat "normal" as an alias for "inactive",
|
|
||||||
// treat "error" as an alias for "unhealthy"
|
|
||||||
ruleStates = []string{"ok", "nomatch", "inactive", "firing", "pending", "unhealthy", "recovering", "noData", "normal", "error"}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type requestHandler struct {
|
type requestHandler struct {
|
||||||
@@ -72,14 +61,6 @@ var (
|
|||||||
staticServer = http.StripPrefix("/vmalert", staticHandler)
|
staticServer = http.StripPrefix("/vmalert", staticHandler)
|
||||||
)
|
)
|
||||||
|
|
||||||
func marshalJson(v any, kind string) ([]byte, *httpserver.ErrorWithStatusCode) {
|
|
||||||
data, err := json.Marshal(v)
|
|
||||||
if err != nil {
|
|
||||||
return nil, errResponse(fmt.Errorf("failed to marshal %s: %s", kind, err), http.StatusInternalServerError)
|
|
||||||
}
|
|
||||||
return data, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||||
if strings.HasPrefix(r.URL.Path, "/vmalert/static") {
|
if strings.HasPrefix(r.URL.Path, "/vmalert/static") {
|
||||||
staticServer.ServeHTTP(w, r)
|
staticServer.ServeHTTP(w, r)
|
||||||
@@ -111,32 +92,40 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
httpserver.Errorf(w, r, "%s", err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
WriteRule(w, r, rule)
|
WriteRuleDetails(w, r, rule)
|
||||||
return true
|
return true
|
||||||
// current used by old vmalert UI and Grafana Alerts
|
case "/vmalert/groups":
|
||||||
case "/vmalert/groups", "/rules":
|
|
||||||
rf, err := newRulesFilter(r)
|
rf, err := newRulesFilter(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
httpserver.Errorf(w, r, "%s", err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// only support filtering by a single state
|
data := rh.groups(rf)
|
||||||
state := ""
|
WriteListGroups(w, r, data, rf.filter)
|
||||||
if len(rf.states) > 0 {
|
|
||||||
state = rf.states[0]
|
|
||||||
rf.states = rf.states[:1]
|
|
||||||
}
|
|
||||||
lr := rh.groups(rf)
|
|
||||||
WriteListGroups(w, r, lr.Data.Groups, state)
|
|
||||||
return true
|
return true
|
||||||
case "/vmalert/notifiers":
|
case "/vmalert/notifiers":
|
||||||
WriteListTargets(w, r, notifier.GetTargets())
|
WriteListTargets(w, r, notifier.GetTargets())
|
||||||
return true
|
return true
|
||||||
|
|
||||||
|
// special cases for Grafana requests,
|
||||||
|
// served without `vmalert` prefix:
|
||||||
|
case "/rules":
|
||||||
|
// Grafana makes an extra request to `/rules`
|
||||||
|
// handler in addition to `/api/v1/rules` calls in alerts UI
|
||||||
|
var data []*apiGroup
|
||||||
|
rf, err := newRulesFilter(r)
|
||||||
|
if err != nil {
|
||||||
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
data = rh.groups(rf)
|
||||||
|
WriteListGroups(w, r, data, rf.filter)
|
||||||
|
return true
|
||||||
|
|
||||||
case "/vmalert/api/v1/notifiers", "/api/v1/notifiers":
|
case "/vmalert/api/v1/notifiers", "/api/v1/notifiers":
|
||||||
data, err := rh.listNotifiers()
|
data, err := rh.listNotifiers()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errJson(w, r, err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
@@ -144,14 +133,15 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
return true
|
return true
|
||||||
case "/vmalert/api/v1/rules", "/api/v1/rules":
|
case "/vmalert/api/v1/rules", "/api/v1/rules":
|
||||||
// path used by Grafana for ng alerting
|
// path used by Grafana for ng alerting
|
||||||
|
var data []byte
|
||||||
rf, err := newRulesFilter(r)
|
rf, err := newRulesFilter(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errJson(w, r, err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
data, err := rh.listGroups(rf)
|
data, err = rh.listGroups(rf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errJson(w, r, err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
@@ -160,14 +150,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
|
|
||||||
case "/vmalert/api/v1/alerts", "/api/v1/alerts":
|
case "/vmalert/api/v1/alerts", "/api/v1/alerts":
|
||||||
// path used by Grafana for ng alerting
|
// path used by Grafana for ng alerting
|
||||||
gf, err := newGroupsFilter(r)
|
rf, err := newRulesFilter(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errJson(w, r, err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
data, err := rh.listAlerts(gf)
|
data, err := rh.listAlerts(rf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errJson(w, r, err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
@@ -176,44 +166,30 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
case "/vmalert/api/v1/alert", "/api/v1/alert":
|
case "/vmalert/api/v1/alert", "/api/v1/alert":
|
||||||
alert, err := rh.getAlert(r)
|
alert, err := rh.getAlert(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errJson(w, r, err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
data, err := marshalJson(alert, "alert")
|
data, err := json.Marshal(alert)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errJson(w, r, err)
|
httpserver.Errorf(w, r, "failed to marshal alert: %s", err)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
w.Write(data)
|
w.Write(data)
|
||||||
return true
|
return true
|
||||||
case "/vmalert/api/v1/rule", "/api/v1/rule":
|
case "/vmalert/api/v1/rule", "/api/v1/rule":
|
||||||
apiRule, err := rh.getRule(r)
|
rule, err := rh.getRule(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errJson(w, r, err)
|
httpserver.Errorf(w, r, "%s", err)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
rwu := rule.ApiRuleWithUpdates{
|
rwu := apiRuleWithUpdates{
|
||||||
ApiRule: apiRule,
|
apiRule: rule,
|
||||||
StateUpdates: apiRule.Updates,
|
StateUpdates: rule.Updates,
|
||||||
}
|
}
|
||||||
data, err := marshalJson(rwu, "rule")
|
data, err := json.Marshal(rwu)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errJson(w, r, err)
|
httpserver.Errorf(w, r, "failed to marshal rule: %s", err)
|
||||||
return true
|
|
||||||
}
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.Write(data)
|
|
||||||
return true
|
|
||||||
case "/vmalert/api/v1/group", "/api/v1/group":
|
|
||||||
group, err := rh.getGroup(r)
|
|
||||||
if err != nil {
|
|
||||||
errJson(w, r, err)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
data, err := marshalJson(group, "group")
|
|
||||||
if err != nil {
|
|
||||||
errJson(w, r, err)
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
@@ -233,42 +209,30 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, *httpserver.ErrorWithStatusCode) {
|
func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
|
||||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
|
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
|
||||||
}
|
}
|
||||||
obj, err := rh.m.groupAPI(groupID)
|
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errResponse(err, http.StatusNotFound)
|
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramRuleID, err)
|
||||||
}
|
|
||||||
return obj, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, *httpserver.ErrorWithStatusCode) {
|
|
||||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
return rule.ApiRule{}, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
|
|
||||||
}
|
|
||||||
ruleID, err := strconv.ParseUint(r.FormValue(rule.ParamRuleID), 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
return rule.ApiRule{}, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err), http.StatusBadRequest)
|
|
||||||
}
|
}
|
||||||
obj, err := rh.m.ruleAPI(groupID, ruleID)
|
obj, err := rh.m.ruleAPI(groupID, ruleID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return rule.ApiRule{}, errResponse(err, http.StatusNotFound)
|
return apiRule{}, errResponse(err, http.StatusNotFound)
|
||||||
}
|
}
|
||||||
return obj, nil
|
return obj, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, *httpserver.ErrorWithStatusCode) {
|
func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
|
||||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
|
return nil, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
|
||||||
}
|
}
|
||||||
alertID, err := strconv.ParseUint(r.FormValue(rule.ParamAlertID), 10, 64)
|
alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err), http.StatusBadRequest)
|
return nil, fmt.Errorf("failed to read %q param: %w", paramAlertID, err)
|
||||||
}
|
}
|
||||||
a, err := rh.m.alertAPI(groupID, alertID)
|
a, err := rh.m.alertAPI(groupID, alertID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -278,76 +242,28 @@ func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, *httpserver
|
|||||||
}
|
}
|
||||||
|
|
||||||
type listGroupsResponse struct {
|
type listGroupsResponse struct {
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
Page int `json:"page,omitempty"`
|
Data struct {
|
||||||
TotalPages int `json:"total_pages,omitempty"`
|
Groups []*apiGroup `json:"groups"`
|
||||||
TotalGroups int `json:"total_groups,omitempty"`
|
|
||||||
TotalRules int `json:"total_rules,omitempty"`
|
|
||||||
Data struct {
|
|
||||||
Groups []*rule.ApiGroup `json:"groups"`
|
|
||||||
} `json:"data"`
|
} `json:"data"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type groupsFilter struct {
|
|
||||||
groupNames []string
|
|
||||||
files []string
|
|
||||||
dsType config.Type
|
|
||||||
}
|
|
||||||
|
|
||||||
func newGroupsFilter(r *http.Request) (*groupsFilter, *httpserver.ErrorWithStatusCode) {
|
|
||||||
_ = r.ParseForm()
|
|
||||||
vs := r.Form
|
|
||||||
gf := &groupsFilter{
|
|
||||||
groupNames: vs["rule_group[]"],
|
|
||||||
files: vs["file[]"],
|
|
||||||
}
|
|
||||||
dsType := vs.Get("datasource_type")
|
|
||||||
if len(dsType) > 0 {
|
|
||||||
if config.SupportedType(dsType) {
|
|
||||||
gf.dsType = config.NewRawType(dsType)
|
|
||||||
} else {
|
|
||||||
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return gf, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (gf *groupsFilter) matches(group *rule.Group) bool {
|
|
||||||
if len(gf.groupNames) > 0 && !slices.Contains(gf.groupNames, group.Name) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if len(gf.files) > 0 && !slices.Contains(gf.files, group.File) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if len(gf.dsType.Name) > 0 && gf.dsType.String() != group.Type.String() {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// see https://prometheus.io/docs/prometheus/latest/querying/api/#rules
|
// see https://prometheus.io/docs/prometheus/latest/querying/api/#rules
|
||||||
type rulesFilter struct {
|
type rulesFilter struct {
|
||||||
gf *groupsFilter
|
files []string
|
||||||
ruleNames []string
|
groupNames []string
|
||||||
ruleType string
|
ruleNames []string
|
||||||
excludeAlerts bool
|
ruleType string
|
||||||
states []string
|
excludeAlerts bool
|
||||||
maxGroups int
|
filter string
|
||||||
pageNum int
|
dsType config.Type
|
||||||
search string
|
|
||||||
extendedStates bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func newRulesFilter(r *http.Request) (*rulesFilter, *httpserver.ErrorWithStatusCode) {
|
func newRulesFilter(r *http.Request) (*rulesFilter, error) {
|
||||||
gf, err := newGroupsFilter(r)
|
rf := &rulesFilter{}
|
||||||
if err != nil {
|
query := r.URL.Query()
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var rf rulesFilter
|
ruleTypeParam := query.Get("type")
|
||||||
rf.gf = gf
|
|
||||||
vs := r.Form
|
|
||||||
ruleTypeParam := vs.Get("type")
|
|
||||||
if len(ruleTypeParam) > 0 {
|
if len(ruleTypeParam) > 0 {
|
||||||
if ruleType, ok := ruleTypeMap[ruleTypeParam]; ok {
|
if ruleType, ok := ruleTypeMap[ruleTypeParam]; ok {
|
||||||
rf.ruleType = ruleType
|
rf.ruleType = ruleType
|
||||||
@@ -356,155 +272,102 @@ func newRulesFilter(r *http.Request) (*rulesFilter, *httpserver.ErrorWithStatusC
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
states := vs["state"]
|
dsType := query.Get("datasource_type")
|
||||||
if len(states) == 0 {
|
if len(dsType) > 0 {
|
||||||
states = vs["filter"]
|
if config.SupportedType(dsType) {
|
||||||
|
rf.dsType = config.NewRawType(dsType)
|
||||||
|
} else {
|
||||||
|
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for _, s := range states {
|
|
||||||
values := strings.Split(s, ",")
|
filter := strings.ToLower(query.Get("filter"))
|
||||||
for _, v := range values {
|
if len(filter) > 0 {
|
||||||
if len(v) == 0 {
|
if filter == "nomatch" || filter == "unhealthy" {
|
||||||
continue
|
rf.filter = filter
|
||||||
}
|
} else {
|
||||||
if !slices.Contains(ruleStates, v) {
|
return nil, errResponse(fmt.Errorf(`invalid parameter "filter": not supported value %q`, filter), http.StatusBadRequest)
|
||||||
return nil, errResponse(fmt.Errorf(`invalid parameter "state": contains not supported value %q`, v), http.StatusBadRequest)
|
|
||||||
}
|
|
||||||
// Replace grafana states with supported internal states
|
|
||||||
switch v {
|
|
||||||
case "noData":
|
|
||||||
v = "nomatch"
|
|
||||||
case "normal":
|
|
||||||
v = "inactive"
|
|
||||||
case "error":
|
|
||||||
v = "unhealthy"
|
|
||||||
}
|
|
||||||
rf.states = append(rf.states, v)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rf.excludeAlerts = httputil.GetBool(r, "exclude_alerts")
|
rf.excludeAlerts = httputil.GetBool(r, "exclude_alerts")
|
||||||
rf.extendedStates = httputil.GetBool(r, "extended_states")
|
rf.ruleNames = append([]string{}, r.Form["rule_name[]"]...)
|
||||||
rf.ruleNames = append([]string{}, vs["rule_name[]"]...)
|
rf.groupNames = append([]string{}, r.Form["rule_group[]"]...)
|
||||||
rf.search = strings.ToLower(vs.Get("search"))
|
rf.files = append([]string{}, r.Form["file[]"]...)
|
||||||
|
return rf, nil
|
||||||
pageNum := vs.Get("page_num")
|
|
||||||
maxGroups := vs.Get("group_limit")
|
|
||||||
if pageNum != "" {
|
|
||||||
if maxGroups == "" {
|
|
||||||
return nil, errResponse(fmt.Errorf(`"group_limit" needs to be present in order to paginate over the groups`), http.StatusBadRequest)
|
|
||||||
}
|
|
||||||
v, err := strconv.Atoi(pageNum)
|
|
||||||
if err != nil || v <= 0 {
|
|
||||||
return nil, errResponse(fmt.Errorf(`"page_num" is expected to be a positive number, found %q`, pageNum), http.StatusBadRequest)
|
|
||||||
}
|
|
||||||
rf.pageNum = v
|
|
||||||
}
|
|
||||||
if maxGroups != "" {
|
|
||||||
v, err := strconv.Atoi(maxGroups)
|
|
||||||
if err != nil || v <= 0 {
|
|
||||||
return nil, errResponse(fmt.Errorf(`"group_limit" is expected to be a positive number, found %q`, maxGroups), http.StatusBadRequest)
|
|
||||||
}
|
|
||||||
rf.maxGroups = v
|
|
||||||
}
|
|
||||||
return &rf, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rf *rulesFilter) matchesRule(r *rule.ApiRule) bool {
|
func (rf *rulesFilter) matchesGroup(group *rule.Group) bool {
|
||||||
if rf.ruleType != "" && rf.ruleType != r.Type {
|
if len(rf.groupNames) > 0 && !slices.Contains(rf.groupNames, group.Name) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, r.Name) {
|
if len(rf.files) > 0 && !slices.Contains(rf.files, group.File) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if len(rf.states) == 0 {
|
if len(rf.dsType.Name) > 0 && rf.dsType.String() != group.Type.String() {
|
||||||
return true
|
return false
|
||||||
}
|
}
|
||||||
return slices.Contains(rf.states, r.State)
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) groups(rf *rulesFilter) *listGroupsResponse {
|
func (rh *requestHandler) groups(rf *rulesFilter) []*apiGroup {
|
||||||
rh.m.groupsMu.RLock()
|
rh.m.groupsMu.RLock()
|
||||||
defer rh.m.groupsMu.RUnlock()
|
defer rh.m.groupsMu.RUnlock()
|
||||||
|
|
||||||
skipGroups := (rf.pageNum - 1) * rf.maxGroups
|
groups := make([]*apiGroup, 0)
|
||||||
lr := &listGroupsResponse{
|
|
||||||
Status: "success",
|
|
||||||
}
|
|
||||||
lr.Data.Groups = make([]*rule.ApiGroup, 0)
|
|
||||||
if skipGroups >= len(rh.m.groups) {
|
|
||||||
return lr
|
|
||||||
}
|
|
||||||
// sort list of groups for deterministic output
|
|
||||||
groups := make([]*rule.Group, 0, len(rh.m.groups))
|
|
||||||
for _, group := range rh.m.groups {
|
for _, group := range rh.m.groups {
|
||||||
groups = append(groups, group)
|
if !rf.matchesGroup(group) {
|
||||||
}
|
|
||||||
|
|
||||||
slices.SortFunc(groups, func(a, b *rule.Group) int {
|
|
||||||
nameCmp := cmp.Compare(a.Name, b.Name)
|
|
||||||
if nameCmp != 0 {
|
|
||||||
return nameCmp
|
|
||||||
}
|
|
||||||
return cmp.Compare(a.File, b.File)
|
|
||||||
})
|
|
||||||
for _, group := range groups {
|
|
||||||
if !rf.gf.matches(group) {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
groupFound := len(rf.search) == 0 || strings.Contains(strings.ToLower(group.Name), rf.search) || strings.Contains(strings.ToLower(group.File), rf.search)
|
g := groupToAPI(group)
|
||||||
g := group.ToAPI()
|
|
||||||
// the returned list should always be non-nil
|
// the returned list should always be non-nil
|
||||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
|
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
|
||||||
filteredRules := make([]rule.ApiRule, 0)
|
filteredRules := make([]apiRule, 0)
|
||||||
for _, rule := range g.Rules {
|
for _, rule := range g.Rules {
|
||||||
if !groupFound && !strings.Contains(strings.ToLower(rule.Name), rf.search) {
|
if rf.ruleType != "" && rf.ruleType != rule.Type {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if rf.extendedStates {
|
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, rule.Name) {
|
||||||
rule.ExtendState()
|
continue
|
||||||
}
|
}
|
||||||
if !rf.matchesRule(&rule) {
|
if (rule.LastError == "" && rf.filter == "unhealthy") || (!isNoMatch(rule) && rf.filter == "nomatch") {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if rf.excludeAlerts {
|
if rf.excludeAlerts {
|
||||||
rule.Alerts = nil
|
rule.Alerts = nil
|
||||||
}
|
}
|
||||||
g.States[rule.State]++
|
if rule.LastError != "" {
|
||||||
|
g.Unhealthy++
|
||||||
|
} else {
|
||||||
|
g.Healthy++
|
||||||
|
}
|
||||||
|
if isNoMatch(rule) {
|
||||||
|
g.NoMatch++
|
||||||
|
}
|
||||||
filteredRules = append(filteredRules, rule)
|
filteredRules = append(filteredRules, rule)
|
||||||
}
|
}
|
||||||
if len(g.Rules) == 0 || len(filteredRules) > 0 {
|
g.Rules = filteredRules
|
||||||
if rf.maxGroups > 0 {
|
groups = append(groups, g)
|
||||||
lr.TotalGroups++
|
}
|
||||||
lr.TotalRules += len(filteredRules)
|
// sort list of groups for deterministic output
|
||||||
}
|
slices.SortFunc(groups, func(a, b *apiGroup) int {
|
||||||
if skipGroups > 0 {
|
if a.Name != b.Name {
|
||||||
skipGroups--
|
return strings.Compare(a.Name, b.Name)
|
||||||
continue
|
|
||||||
}
|
|
||||||
if rf.maxGroups == 0 || len(lr.Data.Groups) < rf.maxGroups {
|
|
||||||
g.Rules = filteredRules
|
|
||||||
lr.Data.Groups = append(lr.Data.Groups, g)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
return strings.Compare(a.File, b.File)
|
||||||
if rf.maxGroups > 0 {
|
})
|
||||||
lr.Page = rf.pageNum
|
return groups
|
||||||
lr.TotalPages = max(int(math.Ceil(float64(lr.TotalGroups)/float64(rf.maxGroups))), 1)
|
|
||||||
}
|
|
||||||
return lr
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, *httpserver.ErrorWithStatusCode) {
|
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, error) {
|
||||||
lr := rh.groups(rf)
|
lr := listGroupsResponse{Status: "success"}
|
||||||
if rf.pageNum > 1 && len(lr.Data.Groups) == 0 {
|
lr.Data.Groups = rh.groups(rf)
|
||||||
return nil, errResponse(fmt.Errorf(`page_num exceeds total amount of pages`), http.StatusBadRequest)
|
|
||||||
}
|
|
||||||
if lr.Page > lr.TotalPages {
|
|
||||||
return nil, errResponse(fmt.Errorf(`page_num=%d exceeds total amount of pages in result=%d`, lr.Page, lr.TotalPages), http.StatusBadRequest)
|
|
||||||
}
|
|
||||||
b, err := json.Marshal(lr)
|
b, err := json.Marshal(lr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errResponse(fmt.Errorf(`error encoding list of groups: %w`, err), http.StatusInternalServerError)
|
return nil, &httpserver.ErrorWithStatusCode{
|
||||||
|
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
|
||||||
|
StatusCode: http.StatusInternalServerError,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return b, nil
|
return b, nil
|
||||||
}
|
}
|
||||||
@@ -512,64 +375,67 @@ func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, *httpserver.Error
|
|||||||
type listAlertsResponse struct {
|
type listAlertsResponse struct {
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
Data struct {
|
Data struct {
|
||||||
Alerts []*rule.ApiAlert `json:"alerts"`
|
Alerts []*apiAlert `json:"alerts"`
|
||||||
} `json:"data"`
|
} `json:"data"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) groupAlerts() []rule.GroupAlerts {
|
func (rh *requestHandler) groupAlerts() []groupAlerts {
|
||||||
rh.m.groupsMu.RLock()
|
rh.m.groupsMu.RLock()
|
||||||
defer rh.m.groupsMu.RUnlock()
|
defer rh.m.groupsMu.RUnlock()
|
||||||
|
|
||||||
var gAlerts []rule.GroupAlerts
|
var gAlerts []groupAlerts
|
||||||
for _, group := range rh.m.groups {
|
for _, g := range rh.m.groups {
|
||||||
var alerts []*rule.ApiAlert
|
var alerts []*apiAlert
|
||||||
g := group.ToAPI()
|
|
||||||
for _, r := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
if r.Type != rule.TypeAlerting {
|
a, ok := r.(*rule.AlertingRule)
|
||||||
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
alerts = append(alerts, r.Alerts...)
|
alerts = append(alerts, ruleToAPIAlert(a)...)
|
||||||
}
|
}
|
||||||
if len(alerts) > 0 {
|
if len(alerts) > 0 {
|
||||||
gAlerts = append(gAlerts, rule.GroupAlerts{
|
gAlerts = append(gAlerts, groupAlerts{
|
||||||
Group: g,
|
Group: groupToAPI(g),
|
||||||
Alerts: alerts,
|
Alerts: alerts,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
slices.SortFunc(gAlerts, func(a, b rule.GroupAlerts) int {
|
slices.SortFunc(gAlerts, func(a, b groupAlerts) int {
|
||||||
return strings.Compare(a.Group.Name, b.Group.Name)
|
return strings.Compare(a.Group.Name, b.Group.Name)
|
||||||
})
|
})
|
||||||
return gAlerts
|
return gAlerts
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) listAlerts(gf *groupsFilter) ([]byte, *httpserver.ErrorWithStatusCode) {
|
func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
|
||||||
rh.m.groupsMu.RLock()
|
rh.m.groupsMu.RLock()
|
||||||
defer rh.m.groupsMu.RUnlock()
|
defer rh.m.groupsMu.RUnlock()
|
||||||
|
|
||||||
lr := listAlertsResponse{Status: "success"}
|
lr := listAlertsResponse{Status: "success"}
|
||||||
lr.Data.Alerts = make([]*rule.ApiAlert, 0)
|
lr.Data.Alerts = make([]*apiAlert, 0)
|
||||||
for _, group := range rh.m.groups {
|
for _, group := range rh.m.groups {
|
||||||
if !gf.matches(group) {
|
if !rf.matchesGroup(group) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
g := group.ToAPI()
|
for _, r := range group.Rules {
|
||||||
for _, r := range g.Rules {
|
a, ok := r.(*rule.AlertingRule)
|
||||||
if r.Type != rule.TypeAlerting {
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
lr.Data.Alerts = append(lr.Data.Alerts, r.Alerts...)
|
lr.Data.Alerts = append(lr.Data.Alerts, ruleToAPIAlert(a)...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort list of alerts for deterministic output
|
// sort list of alerts for deterministic output
|
||||||
slices.SortFunc(lr.Data.Alerts, func(a, b *rule.ApiAlert) int {
|
slices.SortFunc(lr.Data.Alerts, func(a, b *apiAlert) int {
|
||||||
return strings.Compare(a.ID, b.ID)
|
return strings.Compare(a.ID, b.ID)
|
||||||
})
|
})
|
||||||
|
|
||||||
b, err := json.Marshal(lr)
|
b, err := json.Marshal(lr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errResponse(fmt.Errorf(`error encoding list of active alerts: %w`, err), http.StatusInternalServerError)
|
return nil, &httpserver.ErrorWithStatusCode{
|
||||||
|
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
|
||||||
|
StatusCode: http.StatusInternalServerError,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return b, nil
|
return b, nil
|
||||||
}
|
}
|
||||||
@@ -577,33 +443,35 @@ func (rh *requestHandler) listAlerts(gf *groupsFilter) ([]byte, *httpserver.Erro
|
|||||||
type listNotifiersResponse struct {
|
type listNotifiersResponse struct {
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
Data struct {
|
Data struct {
|
||||||
Notifiers []*notifier.ApiNotifier `json:"notifiers"`
|
Notifiers []*apiNotifier `json:"notifiers"`
|
||||||
} `json:"data"`
|
} `json:"data"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rh *requestHandler) listNotifiers() ([]byte, *httpserver.ErrorWithStatusCode) {
|
func (rh *requestHandler) listNotifiers() ([]byte, error) {
|
||||||
targets := notifier.GetTargets()
|
targets := notifier.GetTargets()
|
||||||
|
|
||||||
lr := listNotifiersResponse{Status: "success"}
|
lr := listNotifiersResponse{Status: "success"}
|
||||||
lr.Data.Notifiers = make([]*notifier.ApiNotifier, 0)
|
lr.Data.Notifiers = make([]*apiNotifier, 0)
|
||||||
for protoName, protoTargets := range targets {
|
for protoName, protoTargets := range targets {
|
||||||
nr := ¬ifier.ApiNotifier{
|
notifier := &apiNotifier{
|
||||||
Kind: protoName,
|
Kind: string(protoName),
|
||||||
Targets: make([]*notifier.ApiTarget, 0, len(protoTargets)),
|
Targets: make([]*apiTarget, 0, len(protoTargets)),
|
||||||
}
|
}
|
||||||
for _, target := range protoTargets {
|
for _, target := range protoTargets {
|
||||||
nr.Targets = append(nr.Targets, ¬ifier.ApiTarget{
|
notifier.Targets = append(notifier.Targets, &apiTarget{
|
||||||
Address: target.Addr(),
|
Address: target.Addr(),
|
||||||
Labels: target.Labels.ToMap(),
|
Labels: target.Labels.ToMap(),
|
||||||
LastError: target.LastError(),
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
lr.Data.Notifiers = append(lr.Data.Notifiers, nr)
|
lr.Data.Notifiers = append(lr.Data.Notifiers, notifier)
|
||||||
}
|
}
|
||||||
|
|
||||||
b, err := json.Marshal(lr)
|
b, err := json.Marshal(lr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errResponse(fmt.Errorf(`error encoding list of notifiers: %w`, err), http.StatusInternalServerError)
|
return nil, &httpserver.ErrorWithStatusCode{
|
||||||
|
Err: fmt.Errorf(`error encoding list of notifiers: %w`, err),
|
||||||
|
StatusCode: http.StatusInternalServerError,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return b, nil
|
return b, nil
|
||||||
}
|
}
|
||||||
@@ -614,8 +482,3 @@ func errResponse(err error, sc int) *httpserver.ErrorWithStatusCode {
|
|||||||
StatusCode: sc,
|
StatusCode: sc,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func errJson(w http.ResponseWriter, r *http.Request, err *httpserver.ErrorWithStatusCode) {
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
httpserver.Errorf(w, r, `{"error":%q,"errorType":%d}`, err, err.StatusCode)
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -8,11 +8,9 @@
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
|
||||||
) %}
|
) %}
|
||||||
|
|
||||||
{% func Controls(prefix, currentIcon, currentText string, icons, states map[string]string, search bool) %}
|
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
|
||||||
<div class="btn-toolbar mb-3" role="toolbar">
|
<div class="btn-toolbar mb-3" role="toolbar">
|
||||||
<div class="d-flex gap-2 justify-content-between w-100">
|
<div class="d-flex gap-2 justify-content-between w-100">
|
||||||
<div class="d-flex gap-2 align-items-center">
|
<div class="d-flex gap-2 align-items-center">
|
||||||
@@ -28,10 +26,10 @@
|
|||||||
<use href="{%s prefix %}static/icons/icons.svg#expand"/>
|
<use href="{%s prefix %}static/icons/icons.svg#expand"/>
|
||||||
</svg>
|
</svg>
|
||||||
</a>
|
</a>
|
||||||
{% if len(states) > 0 %}
|
{% if len(filters) > 0 %}
|
||||||
<span class="d-none d-md-inline-block">Filter by status:</span>
|
<span class="d-none d-md-inline-block">Filter by status:</span>
|
||||||
<svg class="d-md-none" width="20" height="20">
|
<svg class="d-md-none" width="20" height="20">
|
||||||
<use href="{%s prefix %}static/icons/icons.svg#state">
|
<use href="{%s prefix %}static/icons/icons.svg#filter">
|
||||||
</svg>
|
</svg>
|
||||||
<div class="dropdown">
|
<div class="dropdown">
|
||||||
<button
|
<button
|
||||||
@@ -46,10 +44,10 @@
|
|||||||
</svg>
|
</svg>
|
||||||
</button>
|
</button>
|
||||||
<ul class="dropdown-menu">
|
<ul class="dropdown-menu">
|
||||||
{% for key, title := range states %}
|
{% for key, title := range filters %}
|
||||||
{% if title != currentText %}
|
{% if title != currentText %}
|
||||||
<li>
|
<li>
|
||||||
<a class="dropdown-item" onclick="groupForState('{%s key %}')">
|
<a class="dropdown-item" onclick="groupFilter('{%s key %}')">
|
||||||
<span class="d-none d-md-inline-block">{%s title %}</span>
|
<span class="d-none d-md-inline-block">{%s title %}</span>
|
||||||
<svg class="d-md-none" width="22" height="22">
|
<svg class="d-md-none" width="22" height="22">
|
||||||
<use href="{%s prefix %}static/icons/icons.svg#{%s icons[key] %}"/>
|
<use href="{%s prefix %}static/icons/icons.svg#{%s icons[key] %}"/>
|
||||||
@@ -79,8 +77,6 @@
|
|||||||
{% func Welcome(r *http.Request) %}
|
{% func Welcome(r *http.Request) %}
|
||||||
{%= tpl.Header(r, navItems, "vmalert", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "vmalert", getLastConfigError()) %}
|
||||||
<p>
|
<p>
|
||||||
Version {%s buildinfo.Version %} <br>
|
|
||||||
|
|
||||||
API:<br>
|
API:<br>
|
||||||
{% for _, p := range apiLinks %}
|
{% for _, p := range apiLinks %}
|
||||||
{%code p, doc := p[0], p[1] %}
|
{%code p, doc := p[0], p[1] %}
|
||||||
@@ -97,10 +93,10 @@
|
|||||||
{%= tpl.Footer(r) %}
|
{%= tpl.Footer(r) %}
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, state string) %}
|
{% func ListGroups(r *http.Request, groups []*apiGroup, filter string) %}
|
||||||
{%code
|
{%code
|
||||||
prefix := vmalertutil.Prefix(r.URL.Path)
|
prefix := vmalertutil.Prefix(r.URL.Path)
|
||||||
states := map[string]string{
|
filters := map[string]string{
|
||||||
"": "All",
|
"": "All",
|
||||||
"unhealthy": "Unhealthy",
|
"unhealthy": "Unhealthy",
|
||||||
"nomatch": "No Match",
|
"nomatch": "No Match",
|
||||||
@@ -110,29 +106,26 @@
|
|||||||
"unhealthy": "unhealthy",
|
"unhealthy": "unhealthy",
|
||||||
"nomatch": "nomatch",
|
"nomatch": "nomatch",
|
||||||
}
|
}
|
||||||
currentText := states[state]
|
currentText := filters[filter]
|
||||||
currentIcon := icons[state]
|
currentIcon := icons[filter]
|
||||||
%}
|
%}
|
||||||
{%= tpl.Header(r, navItems, "Groups", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "Groups", getLastConfigError()) %}
|
||||||
{%= Controls(prefix, currentIcon, currentText, icons, states, true) %}
|
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
|
||||||
{% if len(groups) > 0 %}
|
{% if len(groups) > 0 %}
|
||||||
{% for _, g := range groups %}
|
{% for _, g := range groups %}
|
||||||
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.States["unhealthy"] > 0 %} alert-danger{% endif %}">
|
<div id="group-{%s g.ID %}" class="d-flex w-100 border-0 flex-column group-items{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
|
||||||
<span class="d-flex justify-content-between">
|
<span class="d-flex justify-content-between">
|
||||||
<a
|
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
|
||||||
class="vm-group-search"
|
|
||||||
href="#group-{%s g.ID %}"
|
|
||||||
>{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
|
|
||||||
<span
|
<span
|
||||||
class="flex-grow-1 d-flex justify-content-end"
|
class="flex-grow-1 d-flex justify-content-end"
|
||||||
role="button"
|
role="button"
|
||||||
data-bs-toggle="collapse"
|
data-bs-toggle="collapse"
|
||||||
data-bs-target="#item-{%s g.ID %}"
|
data-bs-target="#sub-{%s g.ID %}"
|
||||||
>
|
>
|
||||||
<span class="d-flex gap-2">
|
<span class="d-flex gap-2">
|
||||||
{% if g.States["unhealthy"] > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.States["unhealthy"] %}</span> {% endif %}
|
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
|
||||||
{% if g.States["nomatch"] > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d g.States["nomatch"] %}</span> {% endif %}
|
{% if g.NoMatch > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d g.NoMatch %}</span> {% endif %}
|
||||||
<span class="badge bg-success" title="Number of rules with status Ok">{%d g.States["ok"] %}</span>
|
<span class="badge bg-success" title="Number of rules with status Ok">{%d g.Healthy %}</span>
|
||||||
</span>
|
</span>
|
||||||
</span>
|
</span>
|
||||||
</span>
|
</span>
|
||||||
@@ -140,9 +133,9 @@
|
|||||||
class="d-flex flex-column row-gap-2 mb-2"
|
class="d-flex flex-column row-gap-2 mb-2"
|
||||||
role="button"
|
role="button"
|
||||||
data-bs-toggle="collapse"
|
data-bs-toggle="collapse"
|
||||||
data-bs-target="#item-{%s g.ID %}"
|
data-bs-target="#sub-{%s g.ID %}"
|
||||||
>
|
>
|
||||||
<span class="fs-6 text-start vm-group-search w-100 fw-lighter">{%s g.File %}</span>
|
<span class="fs-6 text-start w-100 fw-lighter">{%s g.File %}</span>
|
||||||
{% if len(g.Params) > 0 %}
|
{% if len(g.Params) > 0 %}
|
||||||
<span class="fs-6 text-start w-100 d-flex justify-content-between fw-lighter">
|
<span class="fs-6 text-start w-100 d-flex justify-content-between fw-lighter">
|
||||||
<span>Extra params</span>
|
<span>Extra params</span>
|
||||||
@@ -164,7 +157,7 @@
|
|||||||
</span>
|
</span>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</span>
|
</span>
|
||||||
<div class="collapse" id="item-{%s g.ID %}">
|
<div class="collapse sub-items" id="sub-{%s g.ID %}">
|
||||||
<table class="table table-striped table-hover table-sm">
|
<table class="table table-striped table-hover table-sm">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
@@ -175,7 +168,7 @@
|
|||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for _, r := range g.Rules %}
|
{% for _, r := range g.Rules %}
|
||||||
<tr class="vm-item{% if r.LastError != "" %} alert-danger{% endif %}">
|
<tr class="sub-item{% if r.LastError != "" %} alert-danger{% endif %}">
|
||||||
<td>
|
<td>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col-12 mb-2">
|
<div class="col-12 mb-2">
|
||||||
@@ -189,7 +182,7 @@
|
|||||||
<b>record:</b> {%s r.Name %}
|
<b>record:</b> {%s r.Name %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
|
|
||||||
{%= seriesFetchedWarn(prefix, &r) %}
|
{%= seriesFetchedWarn(prefix, r) %}
|
||||||
<span><a target="_blank" href="{%s prefix+r.WebLink() %}">Details</a></span>
|
<span><a target="_blank" href="{%s prefix+r.WebLink() %}">Details</a></span>
|
||||||
</div>
|
</div>
|
||||||
<div class="col-12">
|
<div class="col-12">
|
||||||
@@ -212,12 +205,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
<td class="text-center">{%d r.LastSamples %}</td>
|
<td class="text-center">{%d r.LastSamples %}</td>
|
||||||
<td class="text-center">{% if r.LastEvaluation.IsZero() %}
|
<td class="text-center">{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago</td>
|
||||||
Never
|
|
||||||
{% else %}
|
|
||||||
{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago
|
|
||||||
{% endif %}
|
|
||||||
</td>
|
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
@@ -234,7 +222,7 @@
|
|||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
|
|
||||||
{% func ListAlerts(r *http.Request, groupAlerts []rule.GroupAlerts) %}
|
{% func ListAlerts(r *http.Request, groupAlerts []groupAlerts) %}
|
||||||
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
||||||
{%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %}
|
||||||
{%= Controls(prefix, "", "", nil, nil, true) %}
|
{%= Controls(prefix, "", "", nil, nil, true) %}
|
||||||
@@ -243,7 +231,7 @@
|
|||||||
{%code
|
{%code
|
||||||
g := ga.Group
|
g := ga.Group
|
||||||
var keys []string
|
var keys []string
|
||||||
alertsByRule := make(map[string][]*rule.ApiAlert)
|
alertsByRule := make(map[string][]*apiAlert)
|
||||||
for _, alert := range ga.Alerts {
|
for _, alert := range ga.Alerts {
|
||||||
if len(alertsByRule[alert.RuleID]) < 1 {
|
if len(alertsByRule[alert.RuleID]) < 1 {
|
||||||
keys = append(keys, alert.RuleID)
|
keys = append(keys, alert.RuleID)
|
||||||
@@ -252,14 +240,14 @@
|
|||||||
}
|
}
|
||||||
sort.Strings(keys)
|
sort.Strings(keys)
|
||||||
%}
|
%}
|
||||||
<div class="w-100 flex-column vm-group alert-danger">
|
<div class="d-flex w-100 flex-column group-items alert-danger">
|
||||||
<span id="group-{%s g.ID %}" class="d-flex justify-content-between">
|
<span id="group-{%s g.ID %}" class="d-flex justify-content-between">
|
||||||
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %}</a>
|
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %}</a>
|
||||||
<span
|
<span
|
||||||
class="flex-grow-1 d-flex justify-content-end"
|
class="flex-grow-1 d-flex justify-content-end"
|
||||||
role="button"
|
role="button"
|
||||||
data-bs-toggle="collapse"
|
data-bs-toggle="collapse"
|
||||||
data-bs-target="#item-{%s g.ID %}"
|
data-bs-target="#sub-{%s g.ID %}"
|
||||||
>
|
>
|
||||||
<span class="badge bg-danger" title="Number of active alerts">{%d len(ga.Alerts) %}</span>
|
<span class="badge bg-danger" title="Number of active alerts">{%d len(ga.Alerts) %}</span>
|
||||||
</span>
|
</span>
|
||||||
@@ -269,10 +257,10 @@
|
|||||||
class="fs-6 text-start w-100 fw-lighter"
|
class="fs-6 text-start w-100 fw-lighter"
|
||||||
role="button"
|
role="button"
|
||||||
data-bs-toggle="collapse"
|
data-bs-toggle="collapse"
|
||||||
data-bs-target="#item-{%s g.ID %}"
|
data-bs-target="#sub-{%s g.ID %}"
|
||||||
>{%s g.File %}</span>
|
>{%s g.File %}</span>
|
||||||
</span>
|
</span>
|
||||||
<div class="collapse" id="item-{%s g.ID %}">
|
<div class="collapse sub-items" id="sub-{%s g.ID %}">
|
||||||
{% for _, ruleID := range keys %}
|
{% for _, ruleID := range keys %}
|
||||||
{%code
|
{%code
|
||||||
defaultAR := alertsByRule[ruleID][0]
|
defaultAR := alertsByRule[ruleID][0]
|
||||||
@@ -283,7 +271,7 @@
|
|||||||
sort.Strings(labelKeys)
|
sort.Strings(labelKeys)
|
||||||
%}
|
%}
|
||||||
<br>
|
<br>
|
||||||
<div class="vm-item">
|
<div class="sub-item">
|
||||||
<b>alert:</b> {%s defaultAR.Name %} ({%d len(alertsByRule[ruleID]) %})
|
<b>alert:</b> {%s defaultAR.Name %} ({%d len(alertsByRule[ruleID]) %})
|
||||||
| <span><a target="_blank" href="{%s defaultAR.SourceLink %}">Source</a></span>
|
| <span><a target="_blank" href="{%s defaultAR.SourceLink %}">Source</a></span>
|
||||||
<br>
|
<br>
|
||||||
@@ -348,20 +336,20 @@
|
|||||||
typeK, ns := keys[i], targets[notifier.TargetType(keys[i])]
|
typeK, ns := keys[i], targets[notifier.TargetType(keys[i])]
|
||||||
count := len(ns)
|
count := len(ns)
|
||||||
%}
|
%}
|
||||||
<div class="w-100 flex-column vm-group">
|
<div class="d-flex w-100 flex-column group-items">
|
||||||
<span class="d-flex justify-content-between" id="group-{%s typeK %}">
|
<span class="d-flex justify-content-between" id="group-{%s typeK %}">
|
||||||
<a href="#group-{%s typeK %}">{%s typeK %} ({%d count %})</a>
|
<a href="#group-{%s typeK %}">{%s typeK %} ({%d count %})</a>
|
||||||
<span
|
<span
|
||||||
class="flex-grow-1"
|
class="flex-grow-1"
|
||||||
role="button"
|
role="button"
|
||||||
data-bs-toggle="collapse"
|
data-bs-toggle="collapse"
|
||||||
data-bs-target="#item-{%s typeK %}"
|
data-bs-target="#sub-{%s typeK %}"
|
||||||
></span>
|
></span>
|
||||||
</span>
|
</span>
|
||||||
<div id="item-{%s typeK %}" class="collapse show">
|
<div id="sub-{%s typeK %}" class="collapse show sub-items">
|
||||||
<table class="table table-striped table-hover table-sm">
|
<table class="table table-striped table-hover table-sm">
|
||||||
<thead>
|
<thead>
|
||||||
<tr class="vm-item">
|
<tr class="sub-item">
|
||||||
<th scope="col">Labels</th>
|
<th scope="col">Labels</th>
|
||||||
<th scope="col">Address</th>
|
<th scope="col">Address</th>
|
||||||
</tr>
|
</tr>
|
||||||
@@ -390,7 +378,7 @@
|
|||||||
{%= tpl.Footer(r) %}
|
{%= tpl.Footer(r) %}
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
{% func Alert(r *http.Request, alert *rule.ApiAlert) %}
|
{% func Alert(r *http.Request, alert *apiAlert) %}
|
||||||
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
||||||
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
||||||
{%code
|
{%code
|
||||||
@@ -446,7 +434,7 @@
|
|||||||
<div class="col">
|
<div class="col">
|
||||||
{% for _, k := range annotationKeys %}
|
{% for _, k := range annotationKeys %}
|
||||||
<b>{%s k %}:</b><br>
|
<b>{%s k %}:</b><br>
|
||||||
<p class="annotations">{%s alert.Annotations[k] %}</p>
|
<p>{%s alert.Annotations[k] %}</p>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -476,7 +464,7 @@
|
|||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
|
|
||||||
{% func Rule(r *http.Request, rule rule.ApiRule) %}
|
{% func RuleDetails(r *http.Request, rule apiRule) %}
|
||||||
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
||||||
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
||||||
{%code
|
{%code
|
||||||
@@ -560,7 +548,7 @@
|
|||||||
<div class="col">
|
<div class="col">
|
||||||
{% for _, k := range annotationKeys %}
|
{% for _, k := range annotationKeys %}
|
||||||
<b>{%s k %}:</b><br>
|
<b>{%s k %}:</b><br>
|
||||||
<p class="annotations">{%s rule.Annotations[k] %}</p>
|
<p>{%s rule.Annotations[k] %}</p>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -605,11 +593,11 @@
|
|||||||
<table class="table table-striped table-hover table-sm">
|
<table class="table table-striped table-hover table-sm">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
<th scope="col" title="The time when the rule was executed">Updated at</th>
|
<th scope="col" title="The time when event was created">Updated at</th>
|
||||||
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
||||||
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
|
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
|
||||||
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
|
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
|
||||||
<th scope="col" class="text-center" title="The time used in execution query request">Execution timestamp</th>
|
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
|
||||||
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
@@ -661,8 +649,8 @@
|
|||||||
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
|
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
{% func seriesFetchedWarn(prefix string, r *rule.ApiRule) %}
|
{% func seriesFetchedWarn(prefix string, r apiRule) %}
|
||||||
{% if r.IsNoMatch() %}
|
{% if isNoMatch(r) %}
|
||||||
<svg
|
<svg
|
||||||
data-bs-toggle="tooltip"
|
data-bs-toggle="tooltip"
|
||||||
title="No match! This rule's last evaluation hasn't selected any time series from the datasource.
|
title="No match! This rule's last evaluation hasn't selected any time series from the datasource.
|
||||||
@@ -673,3 +661,9 @@
|
|||||||
</svg>
|
</svg>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endfunc %}
|
{% endfunc %}
|
||||||
|
|
||||||
|
{%code
|
||||||
|
func isNoMatch (r apiRule) bool {
|
||||||
|
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||||
|
}
|
||||||
|
%}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -23,12 +23,8 @@ func TestHandler(t *testing.T) {
|
|||||||
Timestamps: []int64{0},
|
Timestamps: []int64{0},
|
||||||
})
|
})
|
||||||
m := &manager{groups: map[uint64]*rule.Group{}}
|
m := &manager{groups: map[uint64]*rule.Group{}}
|
||||||
_, cleanup := notifier.InitFakeNotifier()
|
|
||||||
defer cleanup()
|
|
||||||
|
|
||||||
var ar *rule.AlertingRule
|
var ar *rule.AlertingRule
|
||||||
var rr *rule.RecordingRule
|
var rr *rule.RecordingRule
|
||||||
var groupIDs []uint64
|
|
||||||
for _, dsType := range []string{"prometheus", "", "graphite"} {
|
for _, dsType := range []string{"prometheus", "", "graphite"} {
|
||||||
g := rule.NewGroup(config.Group{
|
g := rule.NewGroup(config.Group{
|
||||||
Name: "group",
|
Name: "group",
|
||||||
@@ -48,10 +44,8 @@ func TestHandler(t *testing.T) {
|
|||||||
}, fq, 1*time.Minute, nil)
|
}, fq, 1*time.Minute, nil)
|
||||||
ar = g.Rules[0].(*rule.AlertingRule)
|
ar = g.Rules[0].(*rule.AlertingRule)
|
||||||
rr = g.Rules[1].(*rule.RecordingRule)
|
rr = g.Rules[1].(*rule.RecordingRule)
|
||||||
g.ExecOnce(context.Background(), nil, time.Time{})
|
g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, nil, time.Time{})
|
||||||
id := g.CreateID()
|
m.groups[g.CreateID()] = g
|
||||||
m.groups[id] = g
|
|
||||||
groupIDs = append(groupIDs, id)
|
|
||||||
}
|
}
|
||||||
rh := &requestHandler{m: m}
|
rh := &requestHandler{m: m}
|
||||||
|
|
||||||
@@ -88,22 +82,22 @@ func TestHandler(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("/vmalert/rule", func(t *testing.T) {
|
t.Run("/vmalert/rule", func(t *testing.T) {
|
||||||
a := ar.ToAPI()
|
a := ruleToAPI(ar)
|
||||||
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
||||||
r := rr.ToAPI()
|
r := ruleToAPI(rr)
|
||||||
getResp(t, ts.URL+"/vmalert/"+r.WebLink(), nil, 200)
|
getResp(t, ts.URL+"/vmalert/"+r.WebLink(), nil, 200)
|
||||||
})
|
})
|
||||||
t.Run("/vmalert/alert", func(t *testing.T) {
|
t.Run("/vmalert/alert", func(t *testing.T) {
|
||||||
alerts := ar.AlertsToAPI()
|
alerts := ruleToAPIAlert(ar)
|
||||||
for _, a := range alerts {
|
for _, a := range alerts {
|
||||||
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
t.Run("/vmalert/rule?badParam", func(t *testing.T) {
|
t.Run("/vmalert/rule?badParam", func(t *testing.T) {
|
||||||
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamRuleID)
|
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramRuleID)
|
||||||
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
|
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
|
||||||
|
|
||||||
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamRuleID)
|
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramRuleID)
|
||||||
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
|
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -130,14 +124,14 @@ func TestHandler(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) {
|
t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) {
|
||||||
expAlert := rule.NewAlertAPI(ar, ar.GetAlerts()[0])
|
expAlert := newAlertAPI(ar, ar.GetAlerts()[0])
|
||||||
alert := &rule.ApiAlert{}
|
alert := &apiAlert{}
|
||||||
getResp(t, ts.URL+"/"+expAlert.APILink(), alert, 200)
|
getResp(t, ts.URL+"/"+expAlert.APILink(), alert, 200)
|
||||||
if !reflect.DeepEqual(alert, expAlert) {
|
if !reflect.DeepEqual(alert, expAlert) {
|
||||||
t.Fatalf("expected %v is equal to %v", alert, expAlert)
|
t.Fatalf("expected %v is equal to %v", alert, expAlert)
|
||||||
}
|
}
|
||||||
|
|
||||||
alert = &rule.ApiAlert{}
|
alert = &apiAlert{}
|
||||||
getResp(t, ts.URL+"/vmalert/"+expAlert.APILink(), alert, 200)
|
getResp(t, ts.URL+"/vmalert/"+expAlert.APILink(), alert, 200)
|
||||||
if !reflect.DeepEqual(alert, expAlert) {
|
if !reflect.DeepEqual(alert, expAlert) {
|
||||||
t.Fatalf("expected %v is equal to %v", alert, expAlert)
|
t.Fatalf("expected %v is equal to %v", alert, expAlert)
|
||||||
@@ -145,16 +139,16 @@ func TestHandler(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("/api/v1/alert?badParams", func(t *testing.T) {
|
t.Run("/api/v1/alert?badParams", func(t *testing.T) {
|
||||||
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamAlertID)
|
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramAlertID)
|
||||||
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
|
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
|
||||||
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
|
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
|
||||||
|
|
||||||
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamAlertID)
|
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramAlertID)
|
||||||
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
|
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
|
||||||
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
|
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
|
||||||
|
|
||||||
// bad request, alertID is missing
|
// bad request, alertID is missing
|
||||||
params = fmt.Sprintf("?%s=1", rule.ParamGroupID)
|
params = fmt.Sprintf("?%s=1", paramGroupID)
|
||||||
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 400)
|
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 400)
|
||||||
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 400)
|
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 400)
|
||||||
})
|
})
|
||||||
@@ -173,44 +167,29 @@ func TestHandler(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
|
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
|
||||||
expRule := ar.ToAPI()
|
expRule := ruleToAPI(ar)
|
||||||
gotRule := rule.ApiRule{}
|
gotRule := apiRule{}
|
||||||
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRule, 200)
|
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRule, 200)
|
||||||
|
|
||||||
if expRule.ID != gotRule.ID {
|
if expRule.ID != gotRule.ID {
|
||||||
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
gotRule = rule.ApiRule{}
|
gotRule = apiRule{}
|
||||||
getResp(t, ts.URL+"/vmalert/"+expRule.APILink(), &gotRule, 200)
|
getResp(t, ts.URL+"/vmalert/"+expRule.APILink(), &gotRule, 200)
|
||||||
|
|
||||||
if expRule.ID != gotRule.ID {
|
if expRule.ID != gotRule.ID {
|
||||||
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
gotRuleWithUpdates := rule.ApiRuleWithUpdates{}
|
gotRuleWithUpdates := apiRuleWithUpdates{}
|
||||||
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRuleWithUpdates, 200)
|
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRuleWithUpdates, 200)
|
||||||
if len(gotRuleWithUpdates.StateUpdates) < 1 {
|
if len(gotRuleWithUpdates.StateUpdates) < 1 {
|
||||||
t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates)
|
t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
t.Run("/api/v1/group?groupID", func(t *testing.T) {
|
|
||||||
id := groupIDs[0]
|
|
||||||
g := m.groups[id]
|
|
||||||
expGroup := g.ToAPI()
|
|
||||||
gotGroup := rule.ApiGroup{}
|
|
||||||
getResp(t, ts.URL+"/"+expGroup.APILink(), &gotGroup, 200)
|
|
||||||
if expGroup.ID != gotGroup.ID {
|
|
||||||
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
|
|
||||||
}
|
|
||||||
gotGroup = rule.ApiGroup{}
|
|
||||||
getResp(t, ts.URL+"/vmalert/"+expGroup.APILink(), &gotGroup, 200)
|
|
||||||
if expGroup.ID != gotGroup.ID {
|
|
||||||
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("/api/v1/rules&states", func(t *testing.T) {
|
t.Run("/api/v1/rules&filters", func(t *testing.T) {
|
||||||
check := func(url string, statusCode, expGroups, expRules int) {
|
check := func(url string, statusCode, expGroups, expRules int) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
lr := listGroupsResponse{}
|
lr := listGroupsResponse{}
|
||||||
@@ -252,15 +231,9 @@ func TestHandler(t *testing.T) {
|
|||||||
check("/api/v1/rules?rule_group[]=group&file[]=foo", 200, 0, 0)
|
check("/api/v1/rules?rule_group[]=group&file[]=foo", 200, 0, 0)
|
||||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml", 200, 3, 6)
|
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml", 200, 3, 6)
|
||||||
|
|
||||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 0, 0)
|
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 3, 0)
|
||||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert", 200, 3, 3)
|
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert", 200, 3, 3)
|
||||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert&rule_name[]=record", 200, 3, 6)
|
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert&rule_name[]=record", 200, 3, 6)
|
||||||
|
|
||||||
check("/api/v1/rules?group_limit=1", 200, 1, 2)
|
|
||||||
check("/api/v1/rules?group_limit=1&type=alert", 200, 1, 1)
|
|
||||||
check("/api/v1/rules?group_limit=1&type=record", 200, 1, 1)
|
|
||||||
check("/api/v1/rules?group_limit=2", 200, 2, 4)
|
|
||||||
check(fmt.Sprintf("/api/v1/rules?group_limit=1&page_num=%d", 1), 200, 1, 2)
|
|
||||||
})
|
})
|
||||||
t.Run("/api/v1/rules&exclude_alerts=true", func(t *testing.T) {
|
t.Run("/api/v1/rules&exclude_alerts=true", func(t *testing.T) {
|
||||||
// check if response returns active alerts by default
|
// check if response returns active alerts by default
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
package rule
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -8,28 +8,79 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// ParamGroupID is group id key in url parameter
|
// ParamGroupID is group id key in url parameter
|
||||||
ParamGroupID = "group_id"
|
paramGroupID = "group_id"
|
||||||
// ParamAlertID is alert id key in url parameter
|
// ParamAlertID is alert id key in url parameter
|
||||||
ParamAlertID = "alert_id"
|
paramAlertID = "alert_id"
|
||||||
// ParamRuleID is rule id key in url parameter
|
// ParamRuleID is rule id key in url parameter
|
||||||
ParamRuleID = "rule_id"
|
paramRuleID = "rule_id"
|
||||||
|
|
||||||
// TypeRecording is a RecordingRule type
|
|
||||||
TypeRecording = "recording"
|
|
||||||
// TypeAlerting is an AlertingRule type
|
|
||||||
TypeAlerting = "alerting"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// ApiGroup represents a Group for web view
|
type apiNotifier struct {
|
||||||
type ApiGroup struct {
|
Kind string `json:"kind"`
|
||||||
|
Targets []*apiTarget `json:"targets"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type apiTarget struct {
|
||||||
|
Address string `json:"address"`
|
||||||
|
Labels map[string]string `json:"labels"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// apiAlert represents a notifier.AlertingRule state
|
||||||
|
// for WEB view
|
||||||
|
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||||
|
type apiAlert struct {
|
||||||
|
State string `json:"state"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
Labels map[string]string `json:"labels,omitempty"`
|
||||||
|
Annotations map[string]string `json:"annotations"`
|
||||||
|
ActiveAt time.Time `json:"activeAt"`
|
||||||
|
|
||||||
|
// Additional fields
|
||||||
|
|
||||||
|
// ID is an unique Alert's ID within a group
|
||||||
|
ID string `json:"id"`
|
||||||
|
// RuleID is an unique Rule's ID within a group
|
||||||
|
RuleID string `json:"rule_id"`
|
||||||
|
// GroupID is an unique Group's ID
|
||||||
|
GroupID string `json:"group_id"`
|
||||||
|
// Expression contains the PromQL/MetricsQL expression
|
||||||
|
// for Rule's evaluation
|
||||||
|
Expression string `json:"expression"`
|
||||||
|
// SourceLink contains a link to a system which should show
|
||||||
|
// why Alert was generated
|
||||||
|
SourceLink string `json:"source"`
|
||||||
|
// Restored shows whether Alert's state was restored on restart
|
||||||
|
Restored bool `json:"restored"`
|
||||||
|
// Stabilizing shows when firing state is kept because of
|
||||||
|
// `keep_firing_for` instead of real alert
|
||||||
|
Stabilizing bool `json:"stabilizing"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// WebLink returns a link to the alert which can be used in UI.
|
||||||
|
func (aa *apiAlert) WebLink() string {
|
||||||
|
return fmt.Sprintf("alert?%s=%s&%s=%s",
|
||||||
|
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// APILink returns a link to the alert's JSON representation.
|
||||||
|
func (aa *apiAlert) APILink() string {
|
||||||
|
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
|
||||||
|
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// apiGroup represents Group for web view
|
||||||
|
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||||
|
type apiGroup struct {
|
||||||
// Name is the group name as present in the config
|
// Name is the group name as present in the config
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
// Rules contains both recording and alerting rules
|
// Rules contains both recording and alerting rules
|
||||||
Rules []ApiRule `json:"rules"`
|
Rules []apiRule `json:"rules"`
|
||||||
// Interval is the Group's evaluation interval in float seconds as present in the file.
|
// Interval is the Group's evaluation interval in float seconds as present in the file.
|
||||||
Interval float64 `json:"interval"`
|
Interval float64 `json:"interval"`
|
||||||
// LastEvaluation is the timestamp of the last time the Group was executed
|
// LastEvaluation is the timestamp of the last time the Group was executed
|
||||||
@@ -57,24 +108,23 @@ type ApiGroup struct {
|
|||||||
EvalOffset float64 `json:"eval_offset,omitempty"`
|
EvalOffset float64 `json:"eval_offset,omitempty"`
|
||||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||||
EvalDelay float64 `json:"eval_delay,omitempty"`
|
EvalDelay float64 `json:"eval_delay,omitempty"`
|
||||||
// States represents counts per each rule state
|
// Unhealthy unhealthy rules count
|
||||||
States map[string]int `json:"states"`
|
Unhealthy int
|
||||||
|
// Healthy passing rules count
|
||||||
|
Healthy int
|
||||||
|
// NoMatch not matching rules count
|
||||||
|
NoMatch int
|
||||||
}
|
}
|
||||||
|
|
||||||
// APILink returns a link to the group's JSON representation.
|
// groupAlerts represents a group of alerts for WEB view
|
||||||
func (ag *ApiGroup) APILink() string {
|
type groupAlerts struct {
|
||||||
return fmt.Sprintf("api/v1/group?%s=%s", ParamGroupID, ag.ID)
|
Group *apiGroup
|
||||||
|
Alerts []*apiAlert
|
||||||
}
|
}
|
||||||
|
|
||||||
// GroupAlerts represents a Group with its Alerts for web view
|
// apiRule represents a Rule for web view
|
||||||
type GroupAlerts struct {
|
|
||||||
Group *ApiGroup
|
|
||||||
Alerts []*ApiAlert
|
|
||||||
}
|
|
||||||
|
|
||||||
// ApiRule represents a Rule for web view
|
|
||||||
// see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
// see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||||
type ApiRule struct {
|
type apiRule struct {
|
||||||
// State must be one of these under following scenarios
|
// State must be one of these under following scenarios
|
||||||
// "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState.
|
// "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState.
|
||||||
// "firing": at least 1 alert in the rule in firing state.
|
// "firing": at least 1 alert in the rule in firing state.
|
||||||
@@ -96,7 +146,7 @@ type ApiRule struct {
|
|||||||
// LastEvaluation is the timestamp of the last time the rule was executed
|
// LastEvaluation is the timestamp of the last time the rule was executed
|
||||||
LastEvaluation time.Time `json:"lastEvaluation"`
|
LastEvaluation time.Time `json:"lastEvaluation"`
|
||||||
// Alerts is the list of all the alerts in this rule that are currently pending or firing
|
// Alerts is the list of all the alerts in this rule that are currently pending or firing
|
||||||
Alerts []*ApiAlert `json:"alerts,omitempty"`
|
Alerts []*apiAlert `json:"alerts,omitempty"`
|
||||||
// Health is the health of rule evaluation.
|
// Health is the health of rule evaluation.
|
||||||
// It MUST be one of "ok", "err", "unknown"
|
// It MUST be one of "ok", "err", "unknown"
|
||||||
Health string `json:"health"`
|
Health string `json:"health"`
|
||||||
@@ -127,92 +177,143 @@ type ApiRule struct {
|
|||||||
// MaxUpdates is the max number of recorded ruleStateEntry objects
|
// MaxUpdates is the max number of recorded ruleStateEntry objects
|
||||||
MaxUpdates int `json:"max_updates_entries"`
|
MaxUpdates int `json:"max_updates_entries"`
|
||||||
// Updates contains the ordered list of recorded ruleStateEntry objects
|
// Updates contains the ordered list of recorded ruleStateEntry objects
|
||||||
Updates []StateEntry `json:"-"`
|
Updates []rule.StateEntry `json:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsNoMatch returns true if rule is in nomatch state
|
// apiRuleWithUpdates represents apiRule but with extra fields for marshalling
|
||||||
func (r *ApiRule) IsNoMatch() bool {
|
type apiRuleWithUpdates struct {
|
||||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
apiRule
|
||||||
}
|
|
||||||
|
|
||||||
// ApiAlert represents a notifier.AlertingRule state
|
|
||||||
// for WEB view
|
|
||||||
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
|
||||||
type ApiAlert struct {
|
|
||||||
State string `json:"state"`
|
|
||||||
Name string `json:"name"`
|
|
||||||
Value string `json:"value"`
|
|
||||||
Labels map[string]string `json:"labels,omitempty"`
|
|
||||||
Annotations map[string]string `json:"annotations"`
|
|
||||||
ActiveAt time.Time `json:"activeAt"`
|
|
||||||
|
|
||||||
// Additional fields
|
|
||||||
|
|
||||||
// ID is an unique Alert's ID within a group
|
|
||||||
ID string `json:"id"`
|
|
||||||
// RuleID is an unique Rule's ID within a group
|
|
||||||
RuleID string `json:"rule_id"`
|
|
||||||
// GroupID is an unique Group's ID
|
|
||||||
GroupID string `json:"group_id"`
|
|
||||||
// Expression contains the PromQL/MetricsQL expression
|
|
||||||
// for Rule's evaluation
|
|
||||||
Expression string `json:"expression"`
|
|
||||||
// SourceLink contains a link to a system which should show
|
|
||||||
// why Alert was generated
|
|
||||||
SourceLink string `json:"source"`
|
|
||||||
// Restored shows whether Alert's state was restored on restart
|
|
||||||
Restored bool `json:"restored"`
|
|
||||||
// Stabilizing shows when firing state is kept because of
|
|
||||||
// `keep_firing_for` instead of real alert
|
|
||||||
Stabilizing bool `json:"stabilizing"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// WebLink returns a link to the alert which can be used in UI.
|
|
||||||
func (aa *ApiAlert) WebLink() string {
|
|
||||||
return fmt.Sprintf("alert?%s=%s&%s=%s",
|
|
||||||
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
|
|
||||||
}
|
|
||||||
|
|
||||||
// APILink returns a link to the alert's JSON representation.
|
|
||||||
func (aa *ApiAlert) APILink() string {
|
|
||||||
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
|
|
||||||
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ApiRuleWithUpdates represents ApiRule but with extra fields for marshalling
|
|
||||||
type ApiRuleWithUpdates struct {
|
|
||||||
ApiRule
|
|
||||||
// Updates contains the ordered list of recorded ruleStateEntry objects
|
// Updates contains the ordered list of recorded ruleStateEntry objects
|
||||||
StateUpdates []StateEntry `json:"updates,omitempty"`
|
StateUpdates []rule.StateEntry `json:"updates,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// APILink returns a link to the rule's JSON representation.
|
// APILink returns a link to the rule's JSON representation.
|
||||||
func (ar ApiRule) APILink() string {
|
func (ar apiRule) APILink() string {
|
||||||
return fmt.Sprintf("api/v1/rule?%s=%s&%s=%s",
|
return fmt.Sprintf("api/v1/rule?%s=%s&%s=%s",
|
||||||
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID)
|
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// WebLink returns a link to the alert which can be used in UI.
|
// WebLink returns a link to the alert which can be used in UI.
|
||||||
func (ar ApiRule) WebLink() string {
|
func (ar apiRule) WebLink() string {
|
||||||
return fmt.Sprintf("rule?%s=%s&%s=%s",
|
return fmt.Sprintf("rule?%s=%s&%s=%s",
|
||||||
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID)
|
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// AlertsToAPI returns list of ApiAlert objects from existing alerts
|
func ruleToAPI(r any) apiRule {
|
||||||
func (ar *AlertingRule) AlertsToAPI() []*ApiAlert {
|
if ar, ok := r.(*rule.AlertingRule); ok {
|
||||||
var alerts []*ApiAlert
|
return alertingToAPI(ar)
|
||||||
|
}
|
||||||
|
if rr, ok := r.(*rule.RecordingRule); ok {
|
||||||
|
return recordingToAPI(rr)
|
||||||
|
}
|
||||||
|
return apiRule{}
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
ruleTypeRecording = "recording"
|
||||||
|
ruleTypeAlerting = "alerting"
|
||||||
|
)
|
||||||
|
|
||||||
|
func recordingToAPI(rr *rule.RecordingRule) apiRule {
|
||||||
|
lastState := rule.GetLastEntry(rr)
|
||||||
|
r := apiRule{
|
||||||
|
Type: ruleTypeRecording,
|
||||||
|
DatasourceType: rr.Type.String(),
|
||||||
|
Name: rr.Name,
|
||||||
|
Query: rr.Expr,
|
||||||
|
Labels: rr.Labels,
|
||||||
|
LastEvaluation: lastState.Time,
|
||||||
|
EvaluationTime: lastState.Duration.Seconds(),
|
||||||
|
Health: "ok",
|
||||||
|
LastSamples: lastState.Samples,
|
||||||
|
LastSeriesFetched: lastState.SeriesFetched,
|
||||||
|
MaxUpdates: rule.GetRuleStateSize(rr),
|
||||||
|
Updates: rule.GetAllRuleState(rr),
|
||||||
|
|
||||||
|
// encode as strings to avoid rounding
|
||||||
|
ID: fmt.Sprintf("%d", rr.ID()),
|
||||||
|
GroupID: fmt.Sprintf("%d", rr.GroupID),
|
||||||
|
GroupName: rr.GroupName,
|
||||||
|
File: rr.File,
|
||||||
|
}
|
||||||
|
if lastState.Err != nil {
|
||||||
|
r.LastError = lastState.Err.Error()
|
||||||
|
r.Health = "err"
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// alertingToAPI returns Rule representation in form of apiRule
|
||||||
|
func alertingToAPI(ar *rule.AlertingRule) apiRule {
|
||||||
|
lastState := rule.GetLastEntry(ar)
|
||||||
|
r := apiRule{
|
||||||
|
Type: ruleTypeAlerting,
|
||||||
|
DatasourceType: ar.Type.String(),
|
||||||
|
Name: ar.Name,
|
||||||
|
Query: ar.Expr,
|
||||||
|
Duration: ar.For.Seconds(),
|
||||||
|
KeepFiringFor: ar.KeepFiringFor.Seconds(),
|
||||||
|
Labels: ar.Labels,
|
||||||
|
Annotations: ar.Annotations,
|
||||||
|
LastEvaluation: lastState.Time,
|
||||||
|
EvaluationTime: lastState.Duration.Seconds(),
|
||||||
|
Health: "ok",
|
||||||
|
State: "inactive",
|
||||||
|
Alerts: ruleToAPIAlert(ar),
|
||||||
|
LastSamples: lastState.Samples,
|
||||||
|
LastSeriesFetched: lastState.SeriesFetched,
|
||||||
|
MaxUpdates: rule.GetRuleStateSize(ar),
|
||||||
|
Updates: rule.GetAllRuleState(ar),
|
||||||
|
Debug: ar.Debug,
|
||||||
|
|
||||||
|
// encode as strings to avoid rounding in JSON
|
||||||
|
ID: fmt.Sprintf("%d", ar.ID()),
|
||||||
|
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||||
|
GroupName: ar.GroupName,
|
||||||
|
File: ar.File,
|
||||||
|
}
|
||||||
|
if lastState.Err != nil {
|
||||||
|
r.LastError = lastState.Err.Error()
|
||||||
|
r.Health = "err"
|
||||||
|
}
|
||||||
|
// satisfy apiRule.State logic
|
||||||
|
if len(r.Alerts) > 0 {
|
||||||
|
r.State = notifier.StatePending.String()
|
||||||
|
stateFiring := notifier.StateFiring.String()
|
||||||
|
for _, a := range r.Alerts {
|
||||||
|
if a.State == stateFiring {
|
||||||
|
r.State = stateFiring
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// ruleToAPIAlert generates list of apiAlert objects from existing alerts
|
||||||
|
func ruleToAPIAlert(ar *rule.AlertingRule) []*apiAlert {
|
||||||
|
var alerts []*apiAlert
|
||||||
for _, a := range ar.GetAlerts() {
|
for _, a := range ar.GetAlerts() {
|
||||||
if a.State == notifier.StateInactive {
|
if a.State == notifier.StateInactive {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
alerts = append(alerts, NewAlertAPI(ar, a))
|
alerts = append(alerts, newAlertAPI(ar, a))
|
||||||
}
|
}
|
||||||
return alerts
|
return alerts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// alertToAPI generates apiAlert object from alert by its id(hash)
|
||||||
|
func alertToAPI(ar *rule.AlertingRule, id uint64) *apiAlert {
|
||||||
|
a := ar.GetAlert(id)
|
||||||
|
if a == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return newAlertAPI(ar, a)
|
||||||
|
}
|
||||||
|
|
||||||
// NewAlertAPI creates apiAlert for notifier.Alert
|
// NewAlertAPI creates apiAlert for notifier.Alert
|
||||||
func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
func newAlertAPI(ar *rule.AlertingRule, a *notifier.Alert) *apiAlert {
|
||||||
aa := &ApiAlert{
|
aa := &apiAlert{
|
||||||
// encode as strings to avoid rounding
|
// encode as strings to avoid rounding
|
||||||
ID: fmt.Sprintf("%d", a.ID),
|
ID: fmt.Sprintf("%d", a.ID),
|
||||||
GroupID: fmt.Sprintf("%d", a.GroupID),
|
GroupID: fmt.Sprintf("%d", a.GroupID),
|
||||||
@@ -227,8 +328,8 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
|||||||
Restored: a.Restored,
|
Restored: a.Restored,
|
||||||
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
|
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
|
||||||
}
|
}
|
||||||
if notifier.AlertURLGeneratorFn != nil {
|
if alertURLGeneratorFn != nil {
|
||||||
aa.SourceLink = notifier.AlertURLGeneratorFn(*a)
|
aa.SourceLink = alertURLGeneratorFn(*a)
|
||||||
}
|
}
|
||||||
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
|
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
|
||||||
aa.Stabilizing = true
|
aa.Stabilizing = true
|
||||||
@@ -236,25 +337,9 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
|||||||
return aa
|
return aa
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *ApiRule) ExtendState() {
|
func groupToAPI(g *rule.Group) *apiGroup {
|
||||||
if len(r.Alerts) > 0 {
|
g = g.DeepCopy()
|
||||||
return
|
ag := apiGroup{
|
||||||
}
|
|
||||||
if r.State == "" {
|
|
||||||
r.State = "ok"
|
|
||||||
}
|
|
||||||
if r.Health != "ok" {
|
|
||||||
r.State = "unhealthy"
|
|
||||||
} else if r.IsNoMatch() {
|
|
||||||
r.State = "nomatch"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ToAPI returns ApiGroup representation of g
|
|
||||||
func (g *Group) ToAPI() *ApiGroup {
|
|
||||||
g.mu.RLock()
|
|
||||||
defer g.mu.RUnlock()
|
|
||||||
ag := ApiGroup{
|
|
||||||
// encode as string to avoid rounding
|
// encode as string to avoid rounding
|
||||||
ID: strconv.FormatUint(g.GetID(), 10),
|
ID: strconv.FormatUint(g.GetID(), 10),
|
||||||
Name: g.Name,
|
Name: g.Name,
|
||||||
@@ -267,7 +352,6 @@ func (g *Group) ToAPI() *ApiGroup {
|
|||||||
Headers: headersToStrings(g.Headers),
|
Headers: headersToStrings(g.Headers),
|
||||||
NotifierHeaders: headersToStrings(g.NotifierHeaders),
|
NotifierHeaders: headersToStrings(g.NotifierHeaders),
|
||||||
Labels: g.Labels,
|
Labels: g.Labels,
|
||||||
States: make(map[string]int),
|
|
||||||
}
|
}
|
||||||
if g.EvalOffset != nil {
|
if g.EvalOffset != nil {
|
||||||
ag.EvalOffset = g.EvalOffset.Seconds()
|
ag.EvalOffset = g.EvalOffset.Seconds()
|
||||||
@@ -275,10 +359,9 @@ func (g *Group) ToAPI() *ApiGroup {
|
|||||||
if g.EvalDelay != nil {
|
if g.EvalDelay != nil {
|
||||||
ag.EvalDelay = g.EvalDelay.Seconds()
|
ag.EvalDelay = g.EvalDelay.Seconds()
|
||||||
}
|
}
|
||||||
ag.Rules = make([]ApiRule, 0, len(g.Rules))
|
ag.Rules = make([]apiRule, 0)
|
||||||
for _, r := range g.Rules {
|
for _, r := range g.Rules {
|
||||||
ar := r.ToAPI()
|
ag.Rules = append(ag.Rules, ruleToAPI(r))
|
||||||
ag.Rules = append(ag.Rules, ar)
|
|
||||||
}
|
}
|
||||||
return &ag
|
return &ag
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package rule
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestRecordingToApi(t *testing.T) {
|
func TestRecordingToApi(t *testing.T) {
|
||||||
@@ -16,7 +17,7 @@ func TestRecordingToApi(t *testing.T) {
|
|||||||
Values: []float64{1}, Timestamps: []int64{0},
|
Values: []float64{1}, Timestamps: []int64{0},
|
||||||
})
|
})
|
||||||
entriesLimit := 44
|
entriesLimit := 44
|
||||||
g := NewGroup(config.Group{
|
g := rule.NewGroup(config.Group{
|
||||||
Name: "group",
|
Name: "group",
|
||||||
File: "rules.yaml",
|
File: "rules.yaml",
|
||||||
Concurrency: 1,
|
Concurrency: 1,
|
||||||
@@ -30,24 +31,24 @@ func TestRecordingToApi(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}, fq, 1*time.Minute, nil)
|
}, fq, 1*time.Minute, nil)
|
||||||
rr := g.Rules[0].(*RecordingRule)
|
rr := g.Rules[0].(*rule.RecordingRule)
|
||||||
|
|
||||||
expectedRes := ApiRule{
|
expectedRes := apiRule{
|
||||||
Name: "record_name",
|
Name: "record_name",
|
||||||
Query: "up",
|
Query: "up",
|
||||||
Labels: map[string]string{"label": "value"},
|
Labels: map[string]string{"label": "value"},
|
||||||
Health: "ok",
|
Health: "ok",
|
||||||
Type: TypeRecording,
|
Type: ruleTypeRecording,
|
||||||
DatasourceType: "prometheus",
|
DatasourceType: "prometheus",
|
||||||
ID: "1248",
|
ID: "1248",
|
||||||
GroupID: fmt.Sprintf("%d", g.CreateID()),
|
GroupID: fmt.Sprintf("%d", g.CreateID()),
|
||||||
GroupName: "group",
|
GroupName: "group",
|
||||||
File: "rules.yaml",
|
File: "rules.yaml",
|
||||||
MaxUpdates: 44,
|
MaxUpdates: 44,
|
||||||
Updates: make([]StateEntry, 0),
|
Updates: make([]rule.StateEntry, 0),
|
||||||
}
|
}
|
||||||
|
|
||||||
res := rr.ToAPI()
|
res := recordingToAPI(rr)
|
||||||
|
|
||||||
if !reflect.DeepEqual(res, expectedRes) {
|
if !reflect.DeepEqual(res, expectedRes) {
|
||||||
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expectedRes, res)
|
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expectedRes, res)
|
||||||
@@ -27,9 +27,6 @@ vmauth-linux-ppc64le-prod:
|
|||||||
vmauth-linux-386-prod:
|
vmauth-linux-386-prod:
|
||||||
APP_NAME=vmauth $(MAKE) app-via-docker-linux-386
|
APP_NAME=vmauth $(MAKE) app-via-docker-linux-386
|
||||||
|
|
||||||
vmauth-linux-s390x-prod:
|
|
||||||
APP_NAME=vmauth $(MAKE) app-via-docker-linux-s390x
|
|
||||||
|
|
||||||
vmauth-darwin-amd64-prod:
|
vmauth-darwin-amd64-prod:
|
||||||
APP_NAME=vmauth $(MAKE) app-via-docker-darwin-amd64
|
APP_NAME=vmauth $(MAKE) app-via-docker-darwin-amd64
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"errors"
|
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
@@ -13,7 +12,6 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
"slices"
|
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -29,7 +27,6 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fscore"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fscore"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||||
@@ -44,9 +41,6 @@ var (
|
|||||||
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing for details")
|
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing for details")
|
||||||
defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+
|
defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+
|
||||||
"Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing")
|
"Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing")
|
||||||
defaultMergeQueryArgs = flagutil.NewArrayString("mergeQueryArgs", "An optional list of client query arg names, which must be merged with args at backend urls. "+
|
|
||||||
"The rest of client query args are replaced by the corresponding query args from backend urls for security reasons; "+
|
|
||||||
"see https://docs.victoriametrics.com/victoriametrics/vmauth/#query-args-handling")
|
|
||||||
discoverBackendIPsGlobal = flag.Bool("discoverBackendIPs", false, "Whether to discover backend IPs via periodic DNS queries to hostnames specified in url_prefix. "+
|
discoverBackendIPsGlobal = flag.Bool("discoverBackendIPs", false, "Whether to discover backend IPs via periodic DNS queries to hostnames specified in url_prefix. "+
|
||||||
"This may be useful when url_prefix points to a hostname with dynamically scaled instances behind it. See https://docs.victoriametrics.com/victoriametrics/vmauth/#discovering-backend-ips")
|
"This may be useful when url_prefix points to a hostname with dynamically scaled instances behind it. See https://docs.victoriametrics.com/victoriametrics/vmauth/#discovering-backend-ips")
|
||||||
discoverBackendIPsInterval = flag.Duration("discoverBackendIPsInterval", 10*time.Second, "The interval for re-discovering backend IPs if -discoverBackendIPs command-line flag is set. "+
|
discoverBackendIPsInterval = flag.Duration("discoverBackendIPsInterval", 10*time.Second, "The interval for re-discovering backend IPs if -discoverBackendIPs command-line flag is set. "+
|
||||||
@@ -67,11 +61,10 @@ type AuthConfig struct {
|
|||||||
type UserInfo struct {
|
type UserInfo struct {
|
||||||
Name string `yaml:"name,omitempty"`
|
Name string `yaml:"name,omitempty"`
|
||||||
|
|
||||||
BearerToken string `yaml:"bearer_token,omitempty"`
|
BearerToken string `yaml:"bearer_token,omitempty"`
|
||||||
JWT *JWTConfig `yaml:"jwt,omitempty"`
|
AuthToken string `yaml:"auth_token,omitempty"`
|
||||||
AuthToken string `yaml:"auth_token,omitempty"`
|
Username string `yaml:"username,omitempty"`
|
||||||
Username string `yaml:"username,omitempty"`
|
Password string `yaml:"password,omitempty"`
|
||||||
Password string `yaml:"password,omitempty"`
|
|
||||||
|
|
||||||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||||
DiscoverBackendIPs *bool `yaml:"discover_backend_ips,omitempty"`
|
DiscoverBackendIPs *bool `yaml:"discover_backend_ips,omitempty"`
|
||||||
@@ -82,7 +75,6 @@ type UserInfo struct {
|
|||||||
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
|
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
|
||||||
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
||||||
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
||||||
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
|
|
||||||
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||||
TLSCAFile string `yaml:"tls_ca_file,omitempty"`
|
TLSCAFile string `yaml:"tls_ca_file,omitempty"`
|
||||||
TLSCertFile string `yaml:"tls_cert_file,omitempty"`
|
TLSCertFile string `yaml:"tls_cert_file,omitempty"`
|
||||||
@@ -92,79 +84,30 @@ type UserInfo struct {
|
|||||||
|
|
||||||
MetricLabels map[string]string `yaml:"metric_labels,omitempty"`
|
MetricLabels map[string]string `yaml:"metric_labels,omitempty"`
|
||||||
|
|
||||||
AccessLog *AccessLog `yaml:"access_log,omitempty"`
|
|
||||||
|
|
||||||
concurrencyLimitCh chan struct{}
|
concurrencyLimitCh chan struct{}
|
||||||
concurrencyLimitReached *metrics.Counter
|
concurrencyLimitReached *metrics.Counter
|
||||||
|
|
||||||
rt http.RoundTripper
|
rt http.RoundTripper
|
||||||
|
|
||||||
requests *metrics.Counter
|
requests *metrics.Counter
|
||||||
requestErrors *metrics.Counter
|
|
||||||
backendRequests *metrics.Counter
|
|
||||||
backendErrors *metrics.Counter
|
backendErrors *metrics.Counter
|
||||||
requestsDuration *metrics.Summary
|
requestsDuration *metrics.Summary
|
||||||
}
|
}
|
||||||
|
|
||||||
// AccessLog represents configuration for access log settings.
|
|
||||||
type AccessLog struct {
|
|
||||||
Filters *AccessLogFilters `yaml:"filters"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// AccessLogFilters represents list of filters for access logs printing
|
|
||||||
type AccessLogFilters struct {
|
|
||||||
// SkipStatusCodes is a list of HTTP status codes for which access logs will be skipped
|
|
||||||
SkipStatusCodes []int `yaml:"skip_status_codes"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ui *UserInfo) logRequest(r *http.Request, userName string, statusCode int, duration time.Duration) {
|
|
||||||
if ui.AccessLog == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
filters := ui.AccessLog.Filters
|
|
||||||
if filters != nil && len(filters.SkipStatusCodes) > 0 {
|
|
||||||
if slices.Contains(filters.SkipStatusCodes, statusCode) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
|
||||||
requestURI := httpserver.GetRequestURI(r)
|
|
||||||
logger.Infof("access_log request_host=%q request_uri=%q status_code=%d remote_addr=%s user_agent=%q referer=%q duration_ms=%d username=%q",
|
|
||||||
r.Host, requestURI, statusCode, remoteAddr, r.UserAgent(), r.Referer(), duration.Milliseconds(), userName)
|
|
||||||
}
|
|
||||||
|
|
||||||
// HeadersConf represents config for request and response headers.
|
// HeadersConf represents config for request and response headers.
|
||||||
type HeadersConf struct {
|
type HeadersConf struct {
|
||||||
RequestHeaders []*Header `yaml:"headers,omitempty"`
|
RequestHeaders []*Header `yaml:"headers,omitempty"`
|
||||||
ResponseHeaders []*Header `yaml:"response_headers,omitempty"`
|
ResponseHeaders []*Header `yaml:"response_headers,omitempty"`
|
||||||
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
|
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
|
||||||
hasAnyPlaceHolders bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
|
func (ui *UserInfo) beginConcurrencyLimit() error {
|
||||||
select {
|
select {
|
||||||
case ui.concurrencyLimitCh <- struct{}{}:
|
case ui.concurrencyLimitCh <- struct{}{}:
|
||||||
return nil
|
return nil
|
||||||
default:
|
default:
|
||||||
// The number of concurrently executed requests for the given user equals the limit.
|
ui.concurrencyLimitReached.Inc()
|
||||||
// Wait until some of the currently executed requests are finished, so the current request could be executed.
|
return fmt.Errorf("cannot handle more than %d concurrent requests from user %s", ui.getMaxConcurrentRequests(), ui.name())
|
||||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
|
|
||||||
select {
|
|
||||||
case ui.concurrencyLimitCh <- struct{}{}:
|
|
||||||
return nil
|
|
||||||
case <-ctx.Done():
|
|
||||||
err := ctx.Err()
|
|
||||||
if errors.Is(err, context.DeadlineExceeded) {
|
|
||||||
// The current request couldn't be executed until the request timeout.
|
|
||||||
ui.concurrencyLimitReached.Inc()
|
|
||||||
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because %d concurrent requests from the user %s are executed",
|
|
||||||
*maxQueueDuration, ui.getMaxConcurrentRequests(), ui.name())
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt.Errorf("cannot start executing the request because %d concurrent requests from the user %s are executed: %w",
|
|
||||||
ui.getMaxConcurrentRequests(), ui.name(), err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -180,28 +123,6 @@ func (ui *UserInfo) getMaxConcurrentRequests() int {
|
|||||||
return mcr
|
return mcr
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ui *UserInfo) stopHealthChecks() {
|
|
||||||
if ui == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if ui.URLPrefix != nil {
|
|
||||||
bus := ui.URLPrefix.bus.Load()
|
|
||||||
bus.stopHealthChecks()
|
|
||||||
}
|
|
||||||
if ui.DefaultURL != nil {
|
|
||||||
bus := ui.DefaultURL.bus.Load()
|
|
||||||
bus.stopHealthChecks()
|
|
||||||
}
|
|
||||||
for i := range ui.URLMaps {
|
|
||||||
um := &ui.URLMaps[i]
|
|
||||||
if um.URLPrefix != nil {
|
|
||||||
bus := um.URLPrefix.bus.Load()
|
|
||||||
bus.stopHealthChecks()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Header is `Name: Value` http header, which must be added to the proxied request.
|
// Header is `Name: Value` http header, which must be added to the proxied request.
|
||||||
type Header struct {
|
type Header struct {
|
||||||
Name string
|
Name string
|
||||||
@@ -261,11 +182,6 @@ type URLMap struct {
|
|||||||
// LoadBalancingPolicy is load balancing policy among UrlPrefix backends.
|
// LoadBalancingPolicy is load balancing policy among UrlPrefix backends.
|
||||||
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
||||||
|
|
||||||
// MergeQueryArgs is a list of client query args, which must be merged with the existing backend query args.
|
|
||||||
//
|
|
||||||
// The rest of client query args are replaced with the corresponding backend query args for security reasons.
|
|
||||||
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
|
|
||||||
|
|
||||||
// DropSrcPathPrefixParts is the number of `/`-delimited request path prefix parts to drop before proxying the request to backend.
|
// DropSrcPathPrefixParts is the number of `/`-delimited request path prefix parts to drop before proxying the request to backend.
|
||||||
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||||
}
|
}
|
||||||
@@ -312,7 +228,7 @@ func (qa *QueryArg) MarshalYAML() (any, error) {
|
|||||||
return qa.sOriginal, nil
|
return qa.sOriginal, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// URLPrefix represents the `url_prefix` from auth config.
|
// URLPrefix represents passed `url_prefix`
|
||||||
type URLPrefix struct {
|
type URLPrefix struct {
|
||||||
// requests are re-tried on other backend urls for these http response status codes
|
// requests are re-tried on other backend urls for these http response status codes
|
||||||
retryStatusCodes []int
|
retryStatusCodes []int
|
||||||
@@ -320,11 +236,6 @@ type URLPrefix struct {
|
|||||||
// load balancing policy used
|
// load balancing policy used
|
||||||
loadBalancingPolicy string
|
loadBalancingPolicy string
|
||||||
|
|
||||||
// the list of client query args, which must be merged with backend query args.
|
|
||||||
//
|
|
||||||
// By default backend query args replace all the client query args for security reasons.
|
|
||||||
mergeQueryArgs []string
|
|
||||||
|
|
||||||
// how many request path prefix parts to drop before routing the request to backendURL
|
// how many request path prefix parts to drop before routing the request to backendURL
|
||||||
dropSrcPathPrefixParts int
|
dropSrcPathPrefixParts int
|
||||||
|
|
||||||
@@ -337,7 +248,7 @@ type URLPrefix struct {
|
|||||||
// the list of backend urls
|
// the list of backend urls
|
||||||
//
|
//
|
||||||
// the list can be dynamically updated if `discover_backend_ips` option is set.
|
// the list can be dynamically updated if `discover_backend_ips` option is set.
|
||||||
bus atomic.Pointer[backendURLs]
|
bus atomic.Pointer[[]*backendURL]
|
||||||
|
|
||||||
// if this option is set, then backend ips for busOriginal are periodically re-discovered and put to bus.
|
// if this option is set, then backend ips for busOriginal are periodically re-discovered and put to bus.
|
||||||
discoverBackendIPs bool
|
discoverBackendIPs bool
|
||||||
@@ -361,94 +272,21 @@ func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type backendURLs struct {
|
|
||||||
healthChecksContext context.Context
|
|
||||||
healthChecksCancel func()
|
|
||||||
healthChecksWG sync.WaitGroup
|
|
||||||
|
|
||||||
bus []*backendURL
|
|
||||||
}
|
|
||||||
|
|
||||||
func newBackendURLs() *backendURLs {
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
|
||||||
return &backendURLs{
|
|
||||||
healthChecksContext: ctx,
|
|
||||||
healthChecksCancel: cancel,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bus *backendURLs) add(u *url.URL) {
|
|
||||||
bus.bus = append(bus.bus, &backendURL{
|
|
||||||
url: u,
|
|
||||||
healthCheckContext: bus.healthChecksContext,
|
|
||||||
healthCheckWG: &bus.healthChecksWG,
|
|
||||||
hasPlaceHolders: hasAnyPlaceholders(u),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bus *backendURLs) stopHealthChecks() {
|
|
||||||
bus.healthChecksCancel()
|
|
||||||
bus.healthChecksWG.Wait()
|
|
||||||
}
|
|
||||||
|
|
||||||
type backendURL struct {
|
type backendURL struct {
|
||||||
broken atomic.Bool
|
brokenDeadline atomic.Uint64
|
||||||
|
|
||||||
healthCheckContext context.Context
|
|
||||||
healthCheckWG *sync.WaitGroup
|
|
||||||
|
|
||||||
concurrentRequests atomic.Int32
|
concurrentRequests atomic.Int32
|
||||||
|
|
||||||
url *url.URL
|
url *url.URL
|
||||||
|
|
||||||
hasPlaceHolders bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bu *backendURL) isBroken() bool {
|
func (bu *backendURL) isBroken() bool {
|
||||||
return bu.broken.Load()
|
ct := fasttime.UnixTimestamp()
|
||||||
|
return ct < bu.brokenDeadline.Load()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bu *backendURL) setBroken() {
|
func (bu *backendURL) setBroken() {
|
||||||
if bu.broken.CompareAndSwap(false, true) {
|
deadline := fasttime.UnixTimestamp() + uint64((*failTimeout).Seconds())
|
||||||
bu.healthCheckWG.Go(func() {
|
bu.brokenDeadline.Store(deadline)
|
||||||
bu.runHealthCheck()
|
|
||||||
bu.broken.Store(false)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bu *backendURL) runHealthCheck() {
|
|
||||||
port := bu.url.Port()
|
|
||||||
if port == "" {
|
|
||||||
port = "80"
|
|
||||||
}
|
|
||||||
addr := net.JoinHostPort(bu.url.Hostname(), port)
|
|
||||||
|
|
||||||
t := time.NewTicker(*failTimeout)
|
|
||||||
defer t.Stop()
|
|
||||||
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-t.C:
|
|
||||||
// Verify network connectivity via TCP dial before marking backend healthy.
|
|
||||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997
|
|
||||||
ctx, cancel := context.WithTimeout(bu.healthCheckContext, time.Second)
|
|
||||||
c, err := netutil.Dialer.DialContext(ctx, "tcp", addr)
|
|
||||||
cancel()
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(bu.healthCheckContext.Err(), context.Canceled) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
logger.Warnf("ignoring the backend at %s for %s because of dial error: %s", addr, *failTimeout, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
_ = c.Close()
|
|
||||||
return
|
|
||||||
case <-bu.healthCheckContext.Done():
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bu *backendURL) get() {
|
func (bu *backendURL) get() {
|
||||||
@@ -460,8 +298,8 @@ func (bu *backendURL) put() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (up *URLPrefix) getBackendsCount() int {
|
func (up *URLPrefix) getBackendsCount() int {
|
||||||
bus := up.bus.Load()
|
pbus := up.bus.Load()
|
||||||
return len(bus.bus)
|
return len(*pbus)
|
||||||
}
|
}
|
||||||
|
|
||||||
// getBackendURL returns the backendURL depending on the load balance policy.
|
// getBackendURL returns the backendURL depending on the load balance policy.
|
||||||
@@ -472,15 +310,16 @@ func (up *URLPrefix) getBackendsCount() int {
|
|||||||
func (up *URLPrefix) getBackendURL() *backendURL {
|
func (up *URLPrefix) getBackendURL() *backendURL {
|
||||||
up.discoverBackendAddrsIfNeeded()
|
up.discoverBackendAddrsIfNeeded()
|
||||||
|
|
||||||
bus := up.bus.Load()
|
pbus := up.bus.Load()
|
||||||
if len(bus.bus) == 0 {
|
bus := *pbus
|
||||||
|
if len(bus) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if up.loadBalancingPolicy == "first_available" {
|
if up.loadBalancingPolicy == "first_available" {
|
||||||
return getFirstAvailableBackendURL(bus.bus)
|
return getFirstAvailableBackendURL(bus)
|
||||||
}
|
}
|
||||||
return getLeastLoadedBackendURL(bus.bus, &up.n)
|
return getLeastLoadedBackendURL(bus, &up.n)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
|
func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
|
||||||
@@ -554,24 +393,25 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
|
|||||||
cancel()
|
cancel()
|
||||||
|
|
||||||
// generate new backendURLs for the resolved IPs
|
// generate new backendURLs for the resolved IPs
|
||||||
busNew := newBackendURLs()
|
var busNew []*backendURL
|
||||||
for _, bu := range up.busOriginal {
|
for _, bu := range up.busOriginal {
|
||||||
host := bu.Hostname()
|
host := bu.Hostname()
|
||||||
for _, addr := range hostToAddrs[host] {
|
for _, addr := range hostToAddrs[host] {
|
||||||
buCopy := *bu
|
buCopy := *bu
|
||||||
buCopy.Host = addr
|
buCopy.Host = addr
|
||||||
busNew.add(&buCopy)
|
busNew = append(busNew, &backendURL{
|
||||||
|
url: &buCopy,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bus := up.bus.Load()
|
pbus := up.bus.Load()
|
||||||
if areEqualBackendURLs(bus.bus, busNew.bus) {
|
if areEqualBackendURLs(*pbus, busNew) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store new backend urls
|
// Store new backend urls
|
||||||
up.bus.Store(busNew)
|
up.bus.Store(&busNew)
|
||||||
bus.stopHealthChecks()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func areEqualBackendURLs(a, b []*backendURL) bool {
|
func areEqualBackendURLs(a, b []*backendURL) bool {
|
||||||
@@ -602,66 +442,53 @@ func getFirstAvailableBackendURL(bus []*backendURL) *backendURL {
|
|||||||
for i := 1; i < len(bus); i++ {
|
for i := 1; i < len(bus); i++ {
|
||||||
if !bus[i].isBroken() {
|
if !bus[i].isBroken() {
|
||||||
bu = bus[i]
|
bu = bus[i]
|
||||||
bu.get()
|
break
|
||||||
return bu
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
bu.get()
|
||||||
|
return bu
|
||||||
}
|
}
|
||||||
|
|
||||||
// getLeastLoadedBackendURL returns a non-broken backendURL with the lowest number of concurrent requests.
|
// getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
|
||||||
//
|
//
|
||||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||||
func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL {
|
func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL {
|
||||||
if len(bus) == 1 {
|
if len(bus) == 1 {
|
||||||
// Fast path - return the only backend url.
|
// Fast path - return the only backend url.
|
||||||
bu := bus[0]
|
bu := bus[0]
|
||||||
if bu.isBroken() {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
bu.get()
|
bu.get()
|
||||||
return bu
|
return bu
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slow path - select other backend urls.
|
// Slow path - select other backend urls.
|
||||||
n := atomicCounter.Add(1) - 1
|
n := atomicCounter.Add(1) - 1
|
||||||
for i := range uint32(len(bus)) {
|
for i := uint32(0); i < uint32(len(bus)); i++ {
|
||||||
idx := (n + i) % uint32(len(bus))
|
idx := (n + i) % uint32(len(bus))
|
||||||
bu := bus[idx]
|
bu := bus[idx]
|
||||||
if bu.isBroken() {
|
if bu.isBroken() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if bu.concurrentRequests.Load() == 0 {
|
||||||
// The Load() in front of CompareAndSwap() avoids CAS overhead for items with values bigger than 0.
|
// Fast path - return the backend with zero concurrently executed requests.
|
||||||
if bu.concurrentRequests.Load() == 0 && bu.concurrentRequests.CompareAndSwap(0, 1) {
|
// Do not use CompareAndSwap() instead of Load(), since it is much slower on systems with many CPU cores.
|
||||||
atomicCounter.CompareAndSwap(n+1, idx+1)
|
bu.concurrentRequests.Add(1)
|
||||||
// There is no need in the call bu.get(), because we already incremented bu.concurrentRequests above.
|
|
||||||
return bu
|
return bu
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slow path - return the backend with the minimum number of concurrently executed requests.
|
// Slow path - return the backend with the minimum number of concurrently executed requests.
|
||||||
buMinIdx := n % uint32(len(bus))
|
buMin := bus[n%uint32(len(bus))]
|
||||||
minRequests := bus[buMinIdx].concurrentRequests.Load()
|
minRequests := buMin.concurrentRequests.Load()
|
||||||
for i := uint32(1); i < uint32(len(bus)); i++ {
|
for _, bu := range bus {
|
||||||
idx := (n + i) % uint32(len(bus))
|
|
||||||
bu := bus[idx]
|
|
||||||
if bu.isBroken() {
|
if bu.isBroken() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if n := bu.concurrentRequests.Load(); n < minRequests || buMin.isBroken() {
|
||||||
reqs := bu.concurrentRequests.Load()
|
buMin = bu
|
||||||
if reqs < minRequests || bus[buMinIdx].isBroken() {
|
minRequests = n
|
||||||
buMinIdx = idx
|
|
||||||
minRequests = reqs
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
buMin := bus[buMinIdx]
|
|
||||||
if buMin.isBroken() {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
buMin.get()
|
buMin.get()
|
||||||
atomicCounter.CompareAndSwap(n+1, buMinIdx+1)
|
|
||||||
return buMin
|
return buMin
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -778,9 +605,11 @@ func initAuthConfig() {
|
|||||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||||
|
|
||||||
stopCh = make(chan struct{})
|
stopCh = make(chan struct{})
|
||||||
authConfigWG.Go(func() {
|
authConfigWG.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer authConfigWG.Done()
|
||||||
authConfigReloader(sighupCh)
|
authConfigReloader(sighupCh)
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
func stopAuthConfig() {
|
func stopAuthConfig() {
|
||||||
@@ -836,9 +665,6 @@ var (
|
|||||||
// authUsers contains the currently loaded auth users
|
// authUsers contains the currently loaded auth users
|
||||||
authUsers atomic.Pointer[map[string]*UserInfo]
|
authUsers atomic.Pointer[map[string]*UserInfo]
|
||||||
|
|
||||||
// jwt authentication cache
|
|
||||||
jwtAuthCache atomic.Pointer[jwtCache]
|
|
||||||
|
|
||||||
authConfigWG sync.WaitGroup
|
authConfigWG sync.WaitGroup
|
||||||
stopCh chan struct{}
|
stopCh chan struct{}
|
||||||
)
|
)
|
||||||
@@ -855,7 +681,7 @@ func reloadAuthConfig() (bool, error) {
|
|||||||
|
|
||||||
ok, err := reloadAuthConfigData(data)
|
ok, err := reloadAuthConfigData(data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("failed to parse -auth.config=%q: %w", *authConfigPath, err)
|
return false, fmt.Errorf("failed to pars -auth.config=%q: %w", *authConfigPath, err)
|
||||||
}
|
}
|
||||||
if !ok {
|
if !ok {
|
||||||
return false, nil
|
return false, nil
|
||||||
@@ -878,16 +704,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
|||||||
return false, fmt.Errorf("failed to parse auth config: %w", err)
|
return false, fmt.Errorf("failed to parse auth config: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
jui, oidcDP, err := parseJWTUsers(ac)
|
|
||||||
if err != nil {
|
|
||||||
return false, fmt.Errorf("failed to parse JWT users from auth config: %w", err)
|
|
||||||
}
|
|
||||||
oidcDP.startDiscovery()
|
|
||||||
jwtc := &jwtCache{
|
|
||||||
users: jui,
|
|
||||||
oidcDP: oidcDP,
|
|
||||||
}
|
|
||||||
|
|
||||||
m, err := parseAuthConfigUsers(ac)
|
m, err := parseAuthConfigUsers(ac)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("failed to parse users from auth config: %w", err)
|
return false, fmt.Errorf("failed to parse users from auth config: %w", err)
|
||||||
@@ -895,24 +711,13 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
|||||||
|
|
||||||
acPrev := authConfig.Load()
|
acPrev := authConfig.Load()
|
||||||
if acPrev != nil {
|
if acPrev != nil {
|
||||||
acPrev.UnauthorizedUser.stopHealthChecks()
|
|
||||||
for i := range acPrev.Users {
|
|
||||||
acPrev.Users[i].stopHealthChecks()
|
|
||||||
}
|
|
||||||
|
|
||||||
metrics.UnregisterSet(acPrev.ms, true)
|
metrics.UnregisterSet(acPrev.ms, true)
|
||||||
}
|
}
|
||||||
metrics.RegisterSet(ac.ms)
|
metrics.RegisterSet(ac.ms)
|
||||||
|
|
||||||
jwtcPrev := jwtAuthCache.Load()
|
|
||||||
if jwtcPrev != nil {
|
|
||||||
jwtcPrev.oidcDP.stopDiscovery()
|
|
||||||
}
|
|
||||||
|
|
||||||
authConfig.Store(ac)
|
authConfig.Store(ac)
|
||||||
authConfigData.Store(&data)
|
authConfigData.Store(&data)
|
||||||
authUsers.Store(&m)
|
authUsers.Store(&m)
|
||||||
jwtAuthCache.Store(jwtc)
|
|
||||||
|
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
@@ -937,18 +742,12 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
|||||||
if ui.BearerToken != "" {
|
if ui.BearerToken != "" {
|
||||||
return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section")
|
return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section")
|
||||||
}
|
}
|
||||||
if ui.JWT != nil {
|
|
||||||
return nil, fmt.Errorf("field jwt can't be specified for unauthorized_user section")
|
|
||||||
}
|
|
||||||
if ui.AuthToken != "" {
|
if ui.AuthToken != "" {
|
||||||
return nil, fmt.Errorf("field auth_token can't be specified for unauthorized_user section")
|
return nil, fmt.Errorf("field auth_token can't be specified for unauthorized_user section")
|
||||||
}
|
}
|
||||||
if ui.Name != "" {
|
if ui.Name != "" {
|
||||||
return nil, fmt.Errorf("field name can't be specified for unauthorized_user section")
|
return nil, fmt.Errorf("field name can't be specified for unauthorized_user section")
|
||||||
}
|
}
|
||||||
if err := parseJWTPlaceholdersForUserInfo(ui, false); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if err := ui.initURLs(); err != nil {
|
if err := ui.initURLs(); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -958,8 +757,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
|||||||
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
|
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
|
||||||
}
|
}
|
||||||
ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
|
ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
|
||||||
ui.requestErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_errors_total` + metricLabels)
|
|
||||||
ui.backendRequests = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_requests_total` + metricLabels)
|
|
||||||
ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
|
ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
|
||||||
ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
|
ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
|
||||||
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
|
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
|
||||||
@@ -989,27 +786,16 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
|||||||
}
|
}
|
||||||
for i := range uis {
|
for i := range uis {
|
||||||
ui := &uis[i]
|
ui := &uis[i]
|
||||||
// users with jwt tokens are parsed by parseJWTUsers function.
|
|
||||||
// the function also checks that users with jwt tokens do not have auth tokens, bearer tokens, usernames and passwords.
|
|
||||||
if ui.JWT != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
ats, err := getAuthTokens(ui.AuthToken, ui.BearerToken, ui.Username, ui.Password)
|
ats, err := getAuthTokens(ui.AuthToken, ui.BearerToken, ui.Username, ui.Password)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, at := range ats {
|
for _, at := range ats {
|
||||||
if uiOld := byAuthToken[at]; uiOld != nil {
|
if uiOld := byAuthToken[at]; uiOld != nil {
|
||||||
return nil, fmt.Errorf("duplicate auth token=%q found for username=%q, name=%q; the previous one is set for username=%q, name=%q",
|
return nil, fmt.Errorf("duplicate auth token=%q found for username=%q, name=%q; the previous one is set for username=%q, name=%q",
|
||||||
at, ui.Username, ui.Name, uiOld.Username, uiOld.Name)
|
at, ui.Username, ui.Name, uiOld.Username, uiOld.Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := parseJWTPlaceholdersForUserInfo(ui, false); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if err := ui.initURLs(); err != nil {
|
if err := ui.initURLs(); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -1019,8 +805,6 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
|||||||
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||||
}
|
}
|
||||||
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
||||||
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
|
|
||||||
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
|
|
||||||
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
||||||
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
||||||
mcr := ui.getMaxConcurrentRequests()
|
mcr := ui.getMaxConcurrentRequests()
|
||||||
@@ -1072,7 +856,6 @@ func (ui *UserInfo) getMetricLabels() (string, error) {
|
|||||||
func (ui *UserInfo) initURLs() error {
|
func (ui *UserInfo) initURLs() error {
|
||||||
retryStatusCodes := defaultRetryStatusCodes.Values()
|
retryStatusCodes := defaultRetryStatusCodes.Values()
|
||||||
loadBalancingPolicy := *defaultLoadBalancingPolicy
|
loadBalancingPolicy := *defaultLoadBalancingPolicy
|
||||||
mergeQueryArgs := *defaultMergeQueryArgs
|
|
||||||
dropSrcPathPrefixParts := 0
|
dropSrcPathPrefixParts := 0
|
||||||
discoverBackendIPs := *discoverBackendIPsGlobal
|
discoverBackendIPs := *discoverBackendIPsGlobal
|
||||||
if ui.RetryStatusCodes != nil {
|
if ui.RetryStatusCodes != nil {
|
||||||
@@ -1081,9 +864,6 @@ func (ui *UserInfo) initURLs() error {
|
|||||||
if ui.LoadBalancingPolicy != "" {
|
if ui.LoadBalancingPolicy != "" {
|
||||||
loadBalancingPolicy = ui.LoadBalancingPolicy
|
loadBalancingPolicy = ui.LoadBalancingPolicy
|
||||||
}
|
}
|
||||||
if len(ui.MergeQueryArgs) != 0 {
|
|
||||||
mergeQueryArgs = ui.MergeQueryArgs
|
|
||||||
}
|
|
||||||
if ui.DropSrcPathPrefixParts != nil {
|
if ui.DropSrcPathPrefixParts != nil {
|
||||||
dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts
|
dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts
|
||||||
}
|
}
|
||||||
@@ -1091,25 +871,22 @@ func (ui *UserInfo) initURLs() error {
|
|||||||
discoverBackendIPs = *ui.DiscoverBackendIPs
|
discoverBackendIPs = *ui.DiscoverBackendIPs
|
||||||
}
|
}
|
||||||
|
|
||||||
up := ui.URLPrefix
|
if ui.URLPrefix != nil {
|
||||||
if up != nil {
|
if err := ui.URLPrefix.sanitizeAndInitialize(); err != nil {
|
||||||
if err := up.sanitizeAndInitialize(); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
up.retryStatusCodes = retryStatusCodes
|
ui.URLPrefix.retryStatusCodes = retryStatusCodes
|
||||||
up.dropSrcPathPrefixParts = dropSrcPathPrefixParts
|
ui.URLPrefix.dropSrcPathPrefixParts = dropSrcPathPrefixParts
|
||||||
up.discoverBackendIPs = discoverBackendIPs
|
ui.URLPrefix.discoverBackendIPs = discoverBackendIPs
|
||||||
if err := up.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
|
if err := ui.URLPrefix.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
up.mergeQueryArgs = mergeQueryArgs
|
|
||||||
}
|
}
|
||||||
if ui.DefaultURL != nil {
|
if ui.DefaultURL != nil {
|
||||||
if err := ui.DefaultURL.sanitizeAndInitialize(); err != nil {
|
if err := ui.DefaultURL.sanitizeAndInitialize(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, e := range ui.URLMaps {
|
for _, e := range ui.URLMaps {
|
||||||
if len(e.SrcPaths) == 0 && len(e.SrcHosts) == 0 && len(e.SrcQueryArgs) == 0 && len(e.SrcHeaders) == 0 {
|
if len(e.SrcPaths) == 0 && len(e.SrcHosts) == 0 && len(e.SrcQueryArgs) == 0 && len(e.SrcHeaders) == 0 {
|
||||||
return fmt.Errorf("missing `src_paths`, `src_hosts`, `src_query_args` and `src_headers` in `url_map`")
|
return fmt.Errorf("missing `src_paths`, `src_hosts`, `src_query_args` and `src_headers` in `url_map`")
|
||||||
@@ -1122,7 +899,6 @@ func (ui *UserInfo) initURLs() error {
|
|||||||
}
|
}
|
||||||
rscs := retryStatusCodes
|
rscs := retryStatusCodes
|
||||||
lbp := loadBalancingPolicy
|
lbp := loadBalancingPolicy
|
||||||
mqa := mergeQueryArgs
|
|
||||||
dsp := dropSrcPathPrefixParts
|
dsp := dropSrcPathPrefixParts
|
||||||
dbd := discoverBackendIPs
|
dbd := discoverBackendIPs
|
||||||
if e.RetryStatusCodes != nil {
|
if e.RetryStatusCodes != nil {
|
||||||
@@ -1131,9 +907,6 @@ func (ui *UserInfo) initURLs() error {
|
|||||||
if e.LoadBalancingPolicy != "" {
|
if e.LoadBalancingPolicy != "" {
|
||||||
lbp = e.LoadBalancingPolicy
|
lbp = e.LoadBalancingPolicy
|
||||||
}
|
}
|
||||||
if len(e.MergeQueryArgs) != 0 {
|
|
||||||
mqa = e.MergeQueryArgs
|
|
||||||
}
|
|
||||||
if e.DropSrcPathPrefixParts != nil {
|
if e.DropSrcPathPrefixParts != nil {
|
||||||
dsp = *e.DropSrcPathPrefixParts
|
dsp = *e.DropSrcPathPrefixParts
|
||||||
}
|
}
|
||||||
@@ -1144,7 +917,6 @@ func (ui *UserInfo) initURLs() error {
|
|||||||
if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil {
|
if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
e.URLPrefix.mergeQueryArgs = mqa
|
|
||||||
e.URLPrefix.dropSrcPathPrefixParts = dsp
|
e.URLPrefix.dropSrcPathPrefixParts = dsp
|
||||||
e.URLPrefix.discoverBackendIPs = dbd
|
e.URLPrefix.discoverBackendIPs = dbd
|
||||||
}
|
}
|
||||||
@@ -1169,9 +941,6 @@ func (ui *UserInfo) name() string {
|
|||||||
h := xxhash.Sum64([]byte(ui.AuthToken))
|
h := xxhash.Sum64([]byte(ui.AuthToken))
|
||||||
return fmt.Sprintf("auth_token:hash:%016X", h)
|
return fmt.Sprintf("auth_token:hash:%016X", h)
|
||||||
}
|
}
|
||||||
if ui.JWT != nil {
|
|
||||||
return `jwt`
|
|
||||||
}
|
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1259,11 +1028,13 @@ func (up *URLPrefix) sanitizeAndInitialize() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Initialize up.bus
|
// Initialize up.bus
|
||||||
bus := newBackendURLs()
|
bus := make([]*backendURL, len(up.busOriginal))
|
||||||
for _, bu := range up.busOriginal {
|
for i, bu := range up.busOriginal {
|
||||||
bus.add(bu)
|
bus[i] = &backendURL{
|
||||||
|
url: bu,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
up.bus.Store(bus)
|
up.bus.Store(&bus)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user