mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-18 17:26:31 +03:00
Compare commits
6 Commits
master
...
release-gu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9430395c18 | ||
|
|
d9e26de0a6 | ||
|
|
13e7e04727 | ||
|
|
f8b1b918c4 | ||
|
|
db52cca9df | ||
|
|
cdc374d8dd |
2
.github/ISSUE_TEMPLATE/question.yml
vendored
2
.github/ISSUE_TEMPLATE/question.yml
vendored
@@ -5,7 +5,7 @@ body:
|
||||
- type: textarea
|
||||
id: describe-the-component
|
||||
attributes:
|
||||
label: Is your question related to a specific component?
|
||||
label: Is your question request related to a specific component?
|
||||
placeholder: |
|
||||
VictoriaMetrics, vmagent, vmalert, vmui, etc...
|
||||
validations:
|
||||
|
||||
23
.github/copilot-instructions.md
vendored
Normal file
23
.github/copilot-instructions.md
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
# Project Overview
|
||||
|
||||
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
|
||||
|
||||
## Folder Structure
|
||||
|
||||
- `/app`: Contains the compilable binaries.
|
||||
- `/lib`: Contains the golang reusable libraries
|
||||
- `/docs/victoriametrics`: Contains documentation for the project.
|
||||
- `/apptest/tests`: Contains integration tests.
|
||||
|
||||
## Libraries and Frameworks
|
||||
|
||||
- Backend: Golang, no framework. Use third-party libraries sparingly.
|
||||
- Frontend: React.
|
||||
|
||||
## Code review guidelines
|
||||
|
||||
Ensure the feature or bugfix includes a changelog entry in /docs/victoriametrics/changelog/CHANGELOG.md.
|
||||
Verify the entry is under the ## tip section and matches the structure and style of existing entries.
|
||||
Chore-only changes may be omitted from the changelog.
|
||||
|
||||
|
||||
4
.github/dependabot.yml
vendored
4
.github/dependabot.yml
vendored
@@ -4,8 +4,6 @@ updates:
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
cooldown:
|
||||
default-days: 21
|
||||
- package-ecosystem: "gomod"
|
||||
directory: "/"
|
||||
schedule:
|
||||
@@ -25,8 +23,6 @@ updates:
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
cooldown:
|
||||
default-days: 21
|
||||
- package-ecosystem: "npm"
|
||||
directory: "/app/vmui/packages/vmui"
|
||||
schedule:
|
||||
|
||||
11
.github/pull_request_template.md
vendored
11
.github/pull_request_template.md
vendored
@@ -1,3 +1,10 @@
|
||||
**PLEASE REMOVE LINE BELOW BEFORE SUBMITTING**
|
||||
### Describe Your Changes
|
||||
|
||||
Before creating the PR, make sure you have read and followed the [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
|
||||
Please provide a brief description of the changes you made. Be as specific as possible to help others understand the purpose and impact of your modifications.
|
||||
|
||||
### Checklist
|
||||
|
||||
The following checks are **mandatory**:
|
||||
|
||||
- [ ] My change adheres to [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
|
||||
- [ ] My change adheres to [VictoriaMetrics development goals](https://docs.victoriametrics.com/victoriametrics/goals/).
|
||||
|
||||
48
.github/scripts/lint-changelog-tip.sh
vendored
48
.github/scripts/lint-changelog-tip.sh
vendored
@@ -1,48 +0,0 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
set -e
|
||||
|
||||
CHANGELOG_FILE="docs/victoriametrics/changelog/CHANGELOG.md"
|
||||
|
||||
GITHUB_BASE_REF=${GITHUB_BASE_REF:-"master"}
|
||||
GIT_REMOTE=${GIT_REMOTE:-"origin"}
|
||||
|
||||
git diff "${GIT_REMOTE}/${GITHUB_BASE_REF}"...HEAD -- $CHANGELOG_FILE > diff.txt
|
||||
if ! grep -q "^+" diff.txt; then
|
||||
echo "No additions in CHANGELOG.md"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
ADDED_LINES=$(grep "^+\S" diff.txt | sed 's/^+//')
|
||||
|
||||
START_TIP=$(grep -n "^## tip" "$CHANGELOG_FILE" | head -1 | cut -d: -f1)
|
||||
if [ -z "$START_TIP" ]; then
|
||||
echo "ERROR: ${CHANGELOG_FILE} does not contain a ## tip section"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
END_TIP=$(awk "NR>$START_TIP && /^## / {print NR; exit}" "${CHANGELOG_FILE}")
|
||||
if [ -z "$END_TIP" ]; then
|
||||
END_TIP=$(wc -l < "$CHANGELOG_FILE")
|
||||
fi
|
||||
|
||||
BAD=0
|
||||
while IFS= read -r line; do
|
||||
# Grep exact line inside the file and get line numbers
|
||||
MATCHES=$(grep -n -F "$line" "$CHANGELOG_FILE" | cut -d: -f1)
|
||||
for m in $MATCHES; do
|
||||
if [ "$m" -lt "$START_TIP" ] || [ "$m" -gt "$END_TIP" ]; then
|
||||
echo "'$line' on line ${m} is outside ## tip section (lines ${START_TIP}-${END_TIP})"
|
||||
BAD=1
|
||||
fi
|
||||
done
|
||||
done << EOF
|
||||
$ADDED_LINES
|
||||
EOF
|
||||
|
||||
if [ "$BAD" -ne 0 ]; then
|
||||
echo "CHANGELOG modifications must be placed inside the ## tip section."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "CHANGELOG modifications are valid."
|
||||
7
.github/workflows/build.yml
vendored
7
.github/workflows/build.yml
vendored
@@ -57,13 +57,11 @@ jobs:
|
||||
arch: amd64
|
||||
- os: openbsd
|
||||
arch: amd64
|
||||
- os: netbsd
|
||||
arch: amd64
|
||||
- os: windows
|
||||
arch: amd64
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
@@ -73,8 +71,7 @@ jobs:
|
||||
go.sum
|
||||
Makefile
|
||||
app/**/Makefile
|
||||
go-version-file: 'go.mod'
|
||||
- run: go version
|
||||
go-version: stable
|
||||
|
||||
- name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }}
|
||||
run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }}
|
||||
|
||||
19
.github/workflows/changelog-linter.yml
vendored
19
.github/workflows/changelog-linter.yml
vendored
@@ -1,19 +0,0 @@
|
||||
name: 'changelog-linter'
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- "docs/victoriametrics/changelog/CHANGELOG.md"
|
||||
|
||||
jobs:
|
||||
tip-lint:
|
||||
runs-on: 'ubuntu-latest'
|
||||
steps:
|
||||
- uses: 'actions/checkout@v6'
|
||||
with:
|
||||
# needed for proper diff
|
||||
fetch-depth: 0
|
||||
|
||||
- name: 'Validate that changelog changes are under ## tip'
|
||||
run: |
|
||||
GITHUB_BASE_REF=${{ github.base_ref }} ./.github/scripts/lint-changelog-tip.sh
|
||||
18
.github/workflows/check-commit-signed.yml
vendored
18
.github/workflows/check-commit-signed.yml
vendored
@@ -8,7 +8,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0 # we need full history for commit verification
|
||||
|
||||
@@ -27,21 +27,11 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check raw commit objects for a "gpgsig" header as a fast early signal for
|
||||
# contributors. Both GPG and SSH signatures use this header.
|
||||
# This avoids relying on %G? which returns N for SSH commits.
|
||||
# This check is not a security enforcement — unsigned commits cannot be merged
|
||||
# anyway due to the GitHub repository merge policy.
|
||||
unsigned=""
|
||||
for sha in $(git rev-list $RANGE); do
|
||||
if ! git cat-file commit "$sha" | grep -q "^gpgsig"; then
|
||||
unsigned="$unsigned $sha"
|
||||
fi
|
||||
done
|
||||
unsigned=$(git log --pretty="%H %G?" $RANGE | grep -vE " (G|E)$" || true)
|
||||
if [ -n "$unsigned" ]; then
|
||||
echo "Found unsigned commits:"
|
||||
echo "$unsigned"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "All commits in PR are signed (GPG or SSH)"
|
||||
|
||||
echo "All commits in PR are signed (G or E)"
|
||||
8
.github/workflows/check-licenses.yml
vendored
8
.github/workflows/check-licenses.yml
vendored
@@ -21,20 +21,18 @@ jobs:
|
||||
id: go
|
||||
uses: actions/setup-go@v6
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- run: go version
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
|
||||
|
||||
- name: Check License
|
||||
run: make check-licenses
|
||||
|
||||
15
.github/workflows/codeql-analysis-go.yml
vendored
15
.github/workflows/codeql-analysis-go.yml
vendored
@@ -29,35 +29,34 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Go
|
||||
id: go
|
||||
uses: actions/setup-go@v6
|
||||
with:
|
||||
cache: false
|
||||
go-version-file: 'go.mod'
|
||||
- run: go version
|
||||
go-version: stable
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/bin
|
||||
~/go/pkg/mod
|
||||
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-
|
||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v4.35.2
|
||||
uses: github/codeql-action/init@v4
|
||||
with:
|
||||
languages: go
|
||||
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v4.35.2
|
||||
uses: github/codeql-action/autobuild@v4
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v4.35.2
|
||||
uses: github/codeql-action/analyze@v4
|
||||
with:
|
||||
category: 'language:go'
|
||||
|
||||
6
.github/workflows/docs.yaml
vendored
6
.github/workflows/docs.yaml
vendored
@@ -16,19 +16,19 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
path: __vm
|
||||
|
||||
- name: Checkout private code
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
repository: VictoriaMetrics/vmdocs
|
||||
token: ${{ secrets.VM_BOT_GH_TOKEN }}
|
||||
path: __vm-docs
|
||||
|
||||
- name: Import GPG key
|
||||
uses: crazy-max/ghaction-import-gpg@v7
|
||||
uses: crazy-max/ghaction-import-gpg@v6
|
||||
id: import-gpg
|
||||
with:
|
||||
gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }}
|
||||
|
||||
40
.github/workflows/test.yml
vendored
40
.github/workflows/test.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
@@ -42,17 +42,16 @@ jobs:
|
||||
go.sum
|
||||
Makefile
|
||||
app/**/Makefile
|
||||
go-version-file: 'go.mod'
|
||||
go-version: stable
|
||||
|
||||
- run: go version
|
||||
|
||||
- name: Cache golangci-lint
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/golangci-lint
|
||||
~/go/bin
|
||||
key: golangci-lint-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('.golangci.yml') }}
|
||||
key: golangci-lint-${{ runner.os }}-${{ hashFiles('.golangci.yml') }}
|
||||
|
||||
- name: Run check-all
|
||||
run: |
|
||||
@@ -66,13 +65,13 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
scenario:
|
||||
- 'test'
|
||||
- 'test-386'
|
||||
- 'test-full'
|
||||
- 'test-full-386'
|
||||
- 'test-pure'
|
||||
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
@@ -82,19 +81,23 @@ jobs:
|
||||
go.sum
|
||||
Makefile
|
||||
app/**/Makefile
|
||||
go-version-file: 'go.mod'
|
||||
- run: go version
|
||||
go-version: stable
|
||||
|
||||
- name: Run tests
|
||||
run: make ${{ matrix.scenario}}
|
||||
run: GOGC=10 make ${{ matrix.scenario}}
|
||||
|
||||
apptest:
|
||||
name: apptest
|
||||
runs-on: apptest
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v5
|
||||
with:
|
||||
files: ./coverage.txt
|
||||
|
||||
integration:
|
||||
name: integration
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
@@ -104,8 +107,7 @@ jobs:
|
||||
go.sum
|
||||
Makefile
|
||||
app/**/Makefile
|
||||
go-version-file: 'go.mod'
|
||||
- run: go version
|
||||
go-version: stable
|
||||
|
||||
- name: Run app tests
|
||||
run: make apptest
|
||||
- name: Run integration tests
|
||||
run: make integration-test
|
||||
|
||||
28
.github/workflows/vmui.yml
vendored
28
.github/workflows/vmui.yml
vendored
@@ -32,41 +32,35 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Cache node_modules
|
||||
id: cache
|
||||
uses: actions/cache@v5
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
path: app/vmui/packages/vmui/node_modules
|
||||
key: vmui-deps-${{ runner.os }}-${{ hashFiles('app/vmui/packages/vmui/package-lock.json', 'app/vmui/Dockerfile-build') }}
|
||||
restore-keys: |
|
||||
vmui-deps-${{ runner.os }}-
|
||||
node-version: '24.x'
|
||||
|
||||
- name: Install dependencies
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
run: make vmui-install
|
||||
- name: Cache node-modules
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
app/vmui/packages/vmui/node_modules
|
||||
key: vmui-artifacts-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
||||
restore-keys: vmui-artifacts-${{ runner.os }}-
|
||||
|
||||
- name: Run lint
|
||||
id: lint
|
||||
run: make vmui-lint
|
||||
continue-on-error: true
|
||||
env:
|
||||
VMUI_SKIP_INSTALL: true
|
||||
|
||||
- name: Run tests
|
||||
id: test
|
||||
run: make vmui-test
|
||||
continue-on-error: true
|
||||
env:
|
||||
VMUI_SKIP_INSTALL: true
|
||||
|
||||
- name: Run typecheck
|
||||
id: typecheck
|
||||
run: make vmui-typecheck
|
||||
continue-on-error: true
|
||||
env:
|
||||
VMUI_SKIP_INSTALL: true
|
||||
|
||||
- name: Annotate Code Linting Results
|
||||
uses: ataylorme/eslint-annotate-action@v3
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -175,7 +175,7 @@
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2019-2026 VictoriaMetrics, Inc.
|
||||
Copyright 2019-2025 VictoriaMetrics, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
57
Makefile
57
Makefile
@@ -17,7 +17,7 @@ EXTRA_GO_BUILD_TAGS ?=
|
||||
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
|
||||
TAR_OWNERSHIP ?= --owner=1000 --group=1000
|
||||
|
||||
GOLANGCI_LINT_VERSION := 2.9.0
|
||||
GOLANGCI_LINT_VERSION := 2.4.0
|
||||
|
||||
.PHONY: $(MAKECMDGOALS)
|
||||
|
||||
@@ -435,7 +435,7 @@ release-vmutils-windows-goarch: \
|
||||
vmctl-windows-$(GOARCH)-prod.exe
|
||||
|
||||
pprof-cpu:
|
||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics $(PPROF_FILE)
|
||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
|
||||
|
||||
fmt:
|
||||
gofmt -l -w -s ./lib
|
||||
@@ -443,7 +443,7 @@ fmt:
|
||||
gofmt -l -w -s ./apptest
|
||||
|
||||
vet:
|
||||
go vet -tags 'synctest' ./lib/...
|
||||
GOEXPERIMENT=synctest go vet ./lib/...
|
||||
go vet ./app/...
|
||||
go vet ./apptest/...
|
||||
|
||||
@@ -452,55 +452,39 @@ check-all: fmt vet golangci-lint govulncheck
|
||||
clean-checkers: remove-golangci-lint remove-govulncheck
|
||||
|
||||
test:
|
||||
go test -tags 'synctest' ./lib/... ./app/...
|
||||
GOEXPERIMENT=synctest go test ./lib/... ./app/...
|
||||
|
||||
test-race:
|
||||
go test -tags 'synctest' -race ./lib/... ./app/...
|
||||
|
||||
test-386:
|
||||
GOARCH=386 go test -tags 'synctest' ./lib/... ./app/...
|
||||
GOEXPERIMENT=synctest go test -race ./lib/... ./app/...
|
||||
|
||||
test-pure:
|
||||
CGO_ENABLED=0 go test -tags 'synctest' ./lib/... ./app/...
|
||||
GOEXPERIMENT=synctest CGO_ENABLED=0 go test ./lib/... ./app/...
|
||||
|
||||
test-full:
|
||||
go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
GOEXPERIMENT=synctest go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
|
||||
test-full-386:
|
||||
GOARCH=386 go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
|
||||
integration-test:
|
||||
$(MAKE) apptest
|
||||
|
||||
apptest:
|
||||
$(MAKE) victoria-metrics-race vmagent-race vmalert-race vmauth-race vmctl-race vmbackup-race vmrestore-race
|
||||
go test ./apptest/... -skip="^Test(Cluster|Legacy).*"
|
||||
|
||||
apptest-legacy: victoria-metrics-race vmbackup-race vmrestore-race
|
||||
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
|
||||
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
|
||||
VERSION=v1.132.0; \
|
||||
VMSINGLE=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}.tar.gz; \
|
||||
VMCLUSTER=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}-cluster.tar.gz; \
|
||||
URL=https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/$${VERSION}; \
|
||||
DIR=/tmp/$${VERSION}; \
|
||||
test -d $${DIR} || (mkdir $${DIR} && \
|
||||
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
|
||||
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
|
||||
); \
|
||||
VM_LEGACY_VMSINGLE_PATH=$${DIR}/victoria-metrics-prod \
|
||||
VM_LEGACY_VMSTORAGE_PATH=$${DIR}/vmstorage-prod \
|
||||
go test ./apptest/tests -run="^TestLegacySingle.*"
|
||||
$(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
|
||||
go test ./apptest/... -skip="^TestCluster.*"
|
||||
|
||||
benchmark:
|
||||
go test -run=NO_TESTS -bench=. ./lib/...
|
||||
go test -run=NO_TESTS -bench=. ./app/...
|
||||
GOEXPERIMENT=synctest go test -bench=. ./lib/...
|
||||
go test -bench=. ./app/...
|
||||
|
||||
benchmark-pure:
|
||||
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./lib/...
|
||||
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./app/...
|
||||
GOEXPERIMENT=synctest CGO_ENABLED=0 go test -bench=. ./lib/...
|
||||
CGO_ENABLED=0 go test -bench=. ./app/...
|
||||
|
||||
vendor-update:
|
||||
go get -u ./lib/...
|
||||
go get -u ./app/...
|
||||
go mod tidy -compat=1.26
|
||||
go mod tidy -compat=1.24
|
||||
go mod vendor
|
||||
|
||||
app-local:
|
||||
@@ -516,15 +500,14 @@ app-local-windows-goarch:
|
||||
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
quicktemplate-gen: install-qtc
|
||||
qtc -dir=lib
|
||||
qtc -dir=app
|
||||
qtc
|
||||
|
||||
install-qtc:
|
||||
which qtc || go install github.com/valyala/quicktemplate/qtc@latest
|
||||
|
||||
|
||||
golangci-lint: install-golangci-lint
|
||||
golangci-lint run --build-tags 'synctest'
|
||||
GOEXPERIMENT=synctest golangci-lint run
|
||||
|
||||
install-golangci-lint:
|
||||
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)
|
||||
|
||||
24
README.md
24
README.md
@@ -1,11 +1,12 @@
|
||||
# VictoriaMetrics
|
||||
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
[](https://hub.docker.com/u/victoriametrics)
|
||||

|
||||
[](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml)
|
||||
[](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
|
||||
[](https://slack.victoriametrics.com)
|
||||

|
||||
[](https://x.com/VictoriaMetrics/)
|
||||
[](https://www.reddit.com/r/VictoriaMetrics/)
|
||||
|
||||
@@ -15,21 +16,16 @@
|
||||
<img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo">
|
||||
</picture>
|
||||
|
||||
VictoriaMetrics is a fast, cost-effective, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
|
||||
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
|
||||
|
||||
Here are some resources and information about VictoriaMetrics:
|
||||
|
||||
- **Case studies**: [Grammarly, Roblox, Wix, Spotify,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
|
||||
- **Available**: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), Docker images on [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
- **Deployment types**: [Single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) under [Apache License 2.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE).
|
||||
- **Getting started:** Read [key concepts](https://docs.victoriametrics.com/victoriametrics/keyconcepts/) and follow the
|
||||
[quick start guide](https://docs.victoriametrics.com/victoriametrics/quick-start/).
|
||||
- **Community**: [Slack](https://slack.victoriametrics.com/) (join via [Slack Inviter](https://slack.victoriametrics.com/)), [X (Twitter)](https://x.com/VictoriaMetrics), [YouTube](https://www.youtube.com/@VictoriaMetrics). See full list [here](https://docs.victoriametrics.com/victoriametrics/#community-and-contributions).
|
||||
- **Changelog**: Project evolves fast - check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).
|
||||
- **Enterprise support:** [Contact us](mailto:info@victoriametrics.com) for commercial support with additional [enterprise features](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
- **Enterprise releases:** Enterprise and [long-term support releases (LTS)](https://docs.victoriametrics.com/victoriametrics/lts-releases/) are publicly available and can be evaluated for free
|
||||
using a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
|
||||
- **Security:** we achieved [security certifications](https://victoriametrics.com/security/) for Database Software Development and Software-Based Monitoring Services.
|
||||
- Documentation: [docs.victoriametrics.com](https://docs.victoriametrics.com)
|
||||
- Case studies: [Grammarly, Roblox, Wix,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
|
||||
- Available: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), docker images [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
- Deployment types: [Single-node version](https://docs.victoriametrics.com/), [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/), and [Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/)
|
||||
- Changelog: [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics)
|
||||
- Community: [Slack](https://slack.victoriametrics.com/), [X (Twitter)](https://x.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
|
||||
|
||||
Yes, we open-source both the single-node VictoriaMetrics and the cluster version.
|
||||
|
||||
|
||||
15
SECURITY.md
15
SECURITY.md
@@ -1,4 +1,17 @@
|
||||
# Security Policy
|
||||
|
||||
You can find out about our security policy and VictoriaMetrics version support on the [security page](https://docs.victoriametrics.com/victoriametrics/#security) in the documentation.
|
||||
## Supported Versions
|
||||
|
||||
The following versions of VictoriaMetrics receive regular security fixes:
|
||||
|
||||
| Version | Supported |
|
||||
|--------------------------------------------------------------------------------|--------------------|
|
||||
| [Latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
|
||||
| [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
|
||||
| other releases | :x: |
|
||||
|
||||
See [this page](https://victoriametrics.com/security/) for more details.
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
Please report any security issues to <security@victoriametrics.com>
|
||||
|
||||
@@ -118,8 +118,8 @@ func main() {
|
||||
logger.Fatalf("cannot stop the webservice: %s", err)
|
||||
}
|
||||
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
|
||||
vminsertcommon.StopIngestionRateLimiter()
|
||||
vminsert.Stop()
|
||||
vminsertcommon.StopIngestionRateLimiter()
|
||||
|
||||
vmstorage.Stop()
|
||||
vmselect.Stop()
|
||||
@@ -134,7 +134,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>")
|
||||
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
|
||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>")
|
||||
fmt.Fprintf(w, "Useful endpoints:</br>")
|
||||
httpserver.WriteAPIHelp(w, [][2]string{
|
||||
|
||||
@@ -10,11 +10,9 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
||||
)
|
||||
|
||||
@@ -29,9 +27,11 @@ var selfScraperWG sync.WaitGroup
|
||||
|
||||
func startSelfScraper() {
|
||||
selfScraperStopCh = make(chan struct{})
|
||||
selfScraperWG.Go(func() {
|
||||
selfScraperWG.Add(1)
|
||||
go func() {
|
||||
defer selfScraperWG.Done()
|
||||
selfScraper(*selfScrapeInterval)
|
||||
})
|
||||
}()
|
||||
}
|
||||
|
||||
func stopSelfScraper() {
|
||||
@@ -48,7 +48,6 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
|
||||
var bb bytesutil.ByteBuffer
|
||||
var rows prometheus.Rows
|
||||
var metadataRows prometheus.MetadataRows
|
||||
var mrs []storage.MetricRow
|
||||
var labels []prompb.Label
|
||||
t := time.NewTicker(scrapeInterval)
|
||||
@@ -58,12 +57,8 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
appmetrics.WritePrometheusMetrics(&bb)
|
||||
s := bytesutil.ToUnsafeString(bb.B)
|
||||
rows.Reset()
|
||||
// Parse metrics and optionally metadata when enabled
|
||||
if prommetadata.IsEnabled() {
|
||||
rows, metadataRows = prometheus.UnmarshalWithMetadata(rows, metadataRows, s, nil)
|
||||
} else {
|
||||
rows.UnmarshalWithErrLogger(s, nil)
|
||||
}
|
||||
// VictoriaMetrics components don't expose metadata yet, only need to parse samples
|
||||
rows.UnmarshalWithErrLogger(s, nil)
|
||||
mrs = mrs[:0]
|
||||
for i := range rows.Rows {
|
||||
r := &rows.Rows[i]
|
||||
@@ -96,19 +91,6 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
if err := vmstorage.AddRows(mrs); err != nil {
|
||||
logger.Errorf("cannot store self-scraped metrics: %s", err)
|
||||
}
|
||||
if len(metadataRows.Rows) > 0 {
|
||||
mms := make([]metricsmetadata.Row, 0, len(metadataRows.Rows))
|
||||
for _, mm := range metadataRows.Rows {
|
||||
mms = append(mms, metricsmetadata.Row{
|
||||
MetricFamilyName: bytesutil.ToUnsafeBytes(mm.Metric),
|
||||
Help: bytesutil.ToUnsafeBytes(mm.Help),
|
||||
Type: mm.Type,
|
||||
})
|
||||
}
|
||||
if err := vmstorage.AddMetadataRows(mms); err != nil {
|
||||
logger.Errorf("cannot store self-scraped metrics metadata: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for {
|
||||
select {
|
||||
|
||||
@@ -33,13 +33,13 @@ func PopulateTimeTpl(b []byte, tGlobal time.Time) []byte {
|
||||
}
|
||||
switch strings.TrimSpace(parts[0]) {
|
||||
case `TIME_S`:
|
||||
return fmt.Appendf(nil, "%d", t.Unix())
|
||||
return []byte(fmt.Sprintf("%d", t.Unix()))
|
||||
case `TIME_MSZ`:
|
||||
return fmt.Appendf(nil, "%d", t.Unix()*1e3)
|
||||
return []byte(fmt.Sprintf("%d", t.Unix()*1e3))
|
||||
case `TIME_MS`:
|
||||
return fmt.Appendf(nil, "%d", timeToMillis(t))
|
||||
return []byte(fmt.Sprintf("%d", timeToMillis(t)))
|
||||
case `TIME_NS`:
|
||||
return fmt.Appendf(nil, "%d", t.UnixNano())
|
||||
return []byte(fmt.Sprintf("%d", t.UnixNano()))
|
||||
default:
|
||||
log.Fatalf("unknown time pattern %s in %s", parts[0], repl)
|
||||
}
|
||||
|
||||
@@ -49,11 +49,6 @@ func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels
|
||||
Name: "__name__",
|
||||
Value: m.Name,
|
||||
})
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10557
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: "host",
|
||||
Value: sketch.Host,
|
||||
})
|
||||
for _, label := range m.Labels {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: label.Name,
|
||||
@@ -62,6 +57,9 @@ func insertRows(at *auth.Token, sketches []*datadogsketches.Sketch, extraLabels
|
||||
}
|
||||
for _, tag := range sketch.Tags {
|
||||
name, value := datadogutil.SplitTag(tag)
|
||||
if name == "host" {
|
||||
name = "exported_host"
|
||||
}
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: name,
|
||||
Value: value,
|
||||
|
||||
@@ -27,7 +27,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/zabbixconnector"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
@@ -83,9 +82,6 @@ var (
|
||||
maxLabelsPerTimeseries = flag.Int("maxLabelsPerTimeseries", 0, "The maximum number of labels per time series to be accepted. Series with superfluous labels are ignored. In this case the vm_rows_ignored_total{reason=\"too_many_labels\"} metric at /metrics page is incremented")
|
||||
maxLabelNameLen = flag.Int("maxLabelNameLen", 0, "The maximum length of label names in the accepted time series. Series with longer label name are ignored. In this case the vm_rows_ignored_total{reason=\"too_long_label_name\"} metric at /metrics page is incremented")
|
||||
maxLabelValueLen = flag.Int("maxLabelValueLen", 0, "The maximum length of label values in the accepted time series. Series with longer label value are ignored. In this case the vm_rows_ignored_total{reason=\"too_long_label_value\"} metric at /metrics page is incremented")
|
||||
|
||||
enableMultitenancyViaHeaders = flag.Bool("enableMultitenancyViaHeaders", false, "Enables multitenancy via HTTP headers. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#multitenancy")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -219,7 +215,7 @@ func getOpenTSDBHTTPInsertHandler() func(req *http.Request) error {
|
||||
}
|
||||
return func(req *http.Request) error {
|
||||
path := strings.ReplaceAll(req.URL.Path, "//", "/")
|
||||
at, err := getAuthTokenFromPath(path, req.Header)
|
||||
at, err := getAuthTokenFromPath(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot obtain auth token from path %q: %w", path, err)
|
||||
}
|
||||
@@ -227,15 +223,8 @@ func getOpenTSDBHTTPInsertHandler() func(req *http.Request) error {
|
||||
}
|
||||
}
|
||||
|
||||
func parsePath(path string, header http.Header) (*httpserver.Path, error) {
|
||||
if *enableMultitenancyViaHeaders {
|
||||
return httpserver.ParsePathAndHeaders(path, header)
|
||||
}
|
||||
return httpserver.ParsePath(path)
|
||||
}
|
||||
|
||||
func getAuthTokenFromPath(path string, header http.Header) (*auth.Token, error) {
|
||||
p, err := parsePath(path, header)
|
||||
func getAuthTokenFromPath(path string) (*auth.Token, error) {
|
||||
p, err := httpserver.ParsePath(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse multitenant path: %w", err)
|
||||
}
|
||||
@@ -255,7 +244,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprintf(w, "<h2>vmagent</h2>")
|
||||
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
|
||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
|
||||
fmt.Fprintf(w, "Useful endpoints:</br>")
|
||||
httpserver.WriteAPIHelp(w, [][2]string{
|
||||
@@ -362,17 +350,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
firehose.WriteSuccessResponse(w, r)
|
||||
return true
|
||||
case "/zabbixconnector/api/v1/history":
|
||||
zabbixconnectorHistoryRequests.Inc()
|
||||
if err := zabbixconnector.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
zabbixconnectorHistoryErrors.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
fmt.Fprintf(w, `{"error":%q}`, err.Error())
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
case "/newrelic":
|
||||
newrelicCheckRequest.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -569,15 +546,14 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
|
||||
func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path string) bool {
|
||||
p, err := parsePath(path, r.Header)
|
||||
p, err := httpserver.ParsePath(path)
|
||||
if err != nil {
|
||||
// Cannot parse multitenant path. Skip it - probably it will be parsed later.
|
||||
return false
|
||||
}
|
||||
if p.Prefix != "insert" {
|
||||
// processMultitenantRequest is called for all unmatched path variants,
|
||||
// but we should try parsing only /insert prefixed to avoid catching all possible paths.
|
||||
return false
|
||||
httpserver.Errorf(w, r, `unsupported multitenant prefix: %q; expected "insert"`, p.Prefix)
|
||||
return true
|
||||
}
|
||||
at, err := auth.NewTokenPossibleMultitenant(p.AuthToken)
|
||||
if err != nil {
|
||||
@@ -668,17 +644,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
}
|
||||
firehose.WriteSuccessResponse(w, r)
|
||||
return true
|
||||
case "zabbixconnector/api/v1/history":
|
||||
zabbixconnectorHistoryRequests.Inc()
|
||||
if err := zabbixconnector.InsertHandlerForHTTP(at, r); err != nil {
|
||||
zabbixconnectorHistoryErrors.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
fmt.Fprintf(w, `{"error":%q}`, err.Error())
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
case "newrelic":
|
||||
newrelicCheckRequest.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -800,9 +765,6 @@ var (
|
||||
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
||||
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
||||
|
||||
zabbixconnectorHistoryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
|
||||
zabbixconnectorHistoryErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
|
||||
|
||||
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
||||
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompb.Label)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(samplesCount)
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(samplesCount)
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ var (
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForReader processes metrics from given reader.
|
||||
// InsertHandler processes metrics from given reader.
|
||||
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
|
||||
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
|
||||
return insertRows(at, tss, mms, nil)
|
||||
@@ -77,6 +77,16 @@ func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMeta
|
||||
|
||||
var metadataTotal int
|
||||
if prommetadata.IsEnabled() {
|
||||
var accountID, projectID uint32
|
||||
if at != nil {
|
||||
accountID = at.AccountID
|
||||
projectID = at.ProjectID
|
||||
for i := range mms {
|
||||
mm := &mms[i]
|
||||
mm.AccountID = accountID
|
||||
mm.ProjectID = projectID
|
||||
}
|
||||
}
|
||||
ctx.WriteRequest.Metadata = mms
|
||||
metadataTotal = len(mms)
|
||||
}
|
||||
|
||||
@@ -75,6 +75,11 @@ func insertRows(at *auth.Token, rows []prometheus.Row, mms []prometheus.Metadata
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
}
|
||||
var accountID, projectID uint32
|
||||
if at != nil {
|
||||
accountID = at.AccountID
|
||||
projectID = at.ProjectID
|
||||
}
|
||||
for i := range mms {
|
||||
mm := &mms[i]
|
||||
mmsDst = append(mmsDst, prompb.MetricMetadata{
|
||||
@@ -83,6 +88,8 @@ func insertRows(at *auth.Token, rows []prometheus.Row, mms []prometheus.Metadata
|
||||
Type: mm.Type,
|
||||
// there is no unit in Prometheus exposition formats
|
||||
|
||||
AccountID: accountID,
|
||||
ProjectID: projectID,
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
|
||||
@@ -72,6 +72,11 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
|
||||
|
||||
var metadataTotal int
|
||||
if prommetadata.IsEnabled() {
|
||||
var accountID, projectID uint32
|
||||
if at != nil {
|
||||
accountID = at.AccountID
|
||||
projectID = at.ProjectID
|
||||
}
|
||||
for i := range mms {
|
||||
mm := &mms[i]
|
||||
mmsDst = append(mmsDst, prompb.MetricMetadata{
|
||||
@@ -80,8 +85,8 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
|
||||
Type: mm.Type,
|
||||
Unit: mm.Unit,
|
||||
|
||||
AccountID: mm.AccountID,
|
||||
ProjectID: mm.ProjectID,
|
||||
AccountID: accountID,
|
||||
ProjectID: projectID,
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Metadata = mmsDst
|
||||
|
||||
@@ -13,18 +13,19 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/golang/snappy"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -59,8 +60,6 @@ var (
|
||||
"Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'")
|
||||
|
||||
basicAuthUsername = flagutil.NewArrayString("remoteWrite.basicAuth.username", "Optional basic auth username to use for the corresponding -remoteWrite.url")
|
||||
basicAuthUsernameFile = flagutil.NewArrayString("remoteWrite.basicAuth.usernameFile", "Optional path to basic auth username to use for the corresponding -remoteWrite.url. "+
|
||||
"The file is re-read every second")
|
||||
basicAuthPassword = flagutil.NewArrayString("remoteWrite.basicAuth.password", "Optional basic auth password to use for the corresponding -remoteWrite.url")
|
||||
basicAuthPasswordFile = flagutil.NewArrayString("remoteWrite.basicAuth.passwordFile", "Optional path to basic auth password to use for the corresponding -remoteWrite.url. "+
|
||||
"The file is re-read every second")
|
||||
@@ -204,10 +203,14 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
|
||||
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
|
||||
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
|
||||
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
|
||||
return float64(concurrency)
|
||||
return float64(*queues)
|
||||
})
|
||||
for range concurrency {
|
||||
c.wg.Go(c.runWorker)
|
||||
for i := 0; i < concurrency; i++ {
|
||||
c.wg.Add(1)
|
||||
go func() {
|
||||
defer c.wg.Done()
|
||||
c.runWorker()
|
||||
}()
|
||||
}
|
||||
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
|
||||
}
|
||||
@@ -225,14 +228,12 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) {
|
||||
hdrs = strings.Split(headersValue, "^^")
|
||||
}
|
||||
username := basicAuthUsername.GetOptionalArg(argIdx)
|
||||
usernameFile := basicAuthUsernameFile.GetOptionalArg(argIdx)
|
||||
password := basicAuthPassword.GetOptionalArg(argIdx)
|
||||
passwordFile := basicAuthPasswordFile.GetOptionalArg(argIdx)
|
||||
var basicAuthCfg *promauth.BasicAuthConfig
|
||||
if username != "" || usernameFile != "" || password != "" || passwordFile != "" {
|
||||
if username != "" || password != "" || passwordFile != "" {
|
||||
basicAuthCfg = &promauth.BasicAuthConfig{
|
||||
Username: username,
|
||||
UsernameFile: usernameFile,
|
||||
Password: promauth.NewSecret(password),
|
||||
PasswordFile: passwordFile,
|
||||
}
|
||||
@@ -294,7 +295,7 @@ func getAWSAPIConfig(argIdx int) (*awsapi.Config, error) {
|
||||
accessKey := awsAccessKey.GetOptionalArg(argIdx)
|
||||
secretKey := awsSecretKey.GetOptionalArg(argIdx)
|
||||
service := awsService.GetOptionalArg(argIdx)
|
||||
cfg, err := awsapi.NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, service, "")
|
||||
cfg, err := awsapi.NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, service)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -409,7 +410,8 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
// Otherwise, it tries sending the block to remote storage indefinitely.
|
||||
func (c *client) sendBlockHTTP(block []byte) bool {
|
||||
c.rl.Register(len(block))
|
||||
bt := timeutil.NewBackoffTimer(c.retryMinInterval, c.retryMaxInterval)
|
||||
maxRetryDuration := timeutil.AddJitterToDuration(c.retryMaxInterval)
|
||||
retryDuration := timeutil.AddJitterToDuration(c.retryMinInterval)
|
||||
retriesCount := 0
|
||||
|
||||
again:
|
||||
@@ -418,10 +420,19 @@ again:
|
||||
c.requestDuration.UpdateDuration(startTime)
|
||||
if err != nil {
|
||||
c.errorsCount.Inc()
|
||||
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %s",
|
||||
len(block), c.sanitizedURL, err, bt.CurrentDelay())
|
||||
if !bt.Wait(c.stopCh) {
|
||||
retryDuration *= 2
|
||||
if retryDuration > maxRetryDuration {
|
||||
retryDuration = maxRetryDuration
|
||||
}
|
||||
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
|
||||
len(block), c.sanitizedURL, err, retryDuration.Seconds())
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
timerpool.Put(t)
|
||||
return false
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
@@ -470,7 +481,7 @@ again:
|
||||
goto again
|
||||
}
|
||||
|
||||
logger.Warnf("failed to repack zstd block (%d bytes) to snappy: %s; The block will be rejected. "+
|
||||
logger.Warnf("failed to repack zstd block (%s bytes) to snappy: %s; The block will be rejected. "+
|
||||
"Possible cause: ungraceful shutdown leading to persisted queue corruption.",
|
||||
zstdBlockLen, err)
|
||||
}
|
||||
@@ -487,10 +498,7 @@ again:
|
||||
// Unexpected status code returned
|
||||
retriesCount++
|
||||
retryAfterHeader := parseRetryAfterHeader(resp.Header.Get("Retry-After"))
|
||||
// retryAfterDuration has the highest priority duration
|
||||
if retryAfterHeader > 0 {
|
||||
bt.SetDelay(retryAfterHeader)
|
||||
}
|
||||
retryDuration = getRetryDuration(retryAfterHeader, retryDuration, maxRetryDuration)
|
||||
|
||||
// Handle response
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
@@ -499,10 +507,15 @@ again:
|
||||
logger.Errorf("cannot read response body from %q during retry #%d: %s", c.sanitizedURL, retriesCount, err)
|
||||
} else {
|
||||
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+
|
||||
"re-sending the block in %s", len(block), c.sanitizedURL, retriesCount, statusCode, body, bt.CurrentDelay())
|
||||
"re-sending the block in %.3f seconds", len(block), c.sanitizedURL, retriesCount, statusCode, body, retryDuration.Seconds())
|
||||
}
|
||||
if !bt.Wait(c.stopCh) {
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
timerpool.Put(t)
|
||||
return false
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
@@ -511,6 +524,27 @@ again:
|
||||
var remoteWriteRejectedLogger = logger.WithThrottler("remoteWriteRejected", 5*time.Second)
|
||||
var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Second)
|
||||
|
||||
// getRetryDuration returns retry duration.
|
||||
// retryAfterDuration has the highest priority.
|
||||
// If retryAfterDuration is not specified, retryDuration gets doubled.
|
||||
// retryDuration can't exceed maxRetryDuration.
|
||||
//
|
||||
// Also see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6097
|
||||
func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.Duration) time.Duration {
|
||||
// retryAfterDuration has the highest priority duration
|
||||
if retryAfterDuration > 0 {
|
||||
return timeutil.AddJitterToDuration(retryAfterDuration)
|
||||
}
|
||||
|
||||
// default backoff retry policy
|
||||
retryDuration *= 2
|
||||
if retryDuration > maxRetryDuration {
|
||||
retryDuration = maxRetryDuration
|
||||
}
|
||||
|
||||
return retryDuration
|
||||
}
|
||||
|
||||
// repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block.
|
||||
//
|
||||
// The input block may be corrupted, for example, if vmagent was shut down ungracefully and
|
||||
@@ -520,9 +554,9 @@ var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Sec
|
||||
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
|
||||
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
|
||||
plainBlock := make([]byte, 0, len(zstdBlock)*2)
|
||||
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock)
|
||||
plainBlock, err := zstd.Decompress(plainBlock, zstdBlock)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("zstd: decompress: %s", err)
|
||||
}
|
||||
|
||||
return snappy.Encode(nil, plainBlock), nil
|
||||
@@ -541,20 +575,24 @@ func logBlockRejected(block []byte, sanitizedURL string, resp *http.Response) {
|
||||
}
|
||||
|
||||
// parseRetryAfterHeader parses `Retry-After` value retrieved from HTTP response header.
|
||||
//
|
||||
// s should be in either HTTP-date or a number of seconds.
|
||||
// It returns time.Duration(0) if s does not follow RFC 7231.
|
||||
func parseRetryAfterHeader(s string) time.Duration {
|
||||
if s == "" {
|
||||
return 0
|
||||
// retryAfterString should be in either HTTP-date or a number of seconds.
|
||||
// It will return time.Duration(0) if `retryAfterString` does not follow RFC 7231.
|
||||
func parseRetryAfterHeader(retryAfterString string) (retryAfterDuration time.Duration) {
|
||||
if retryAfterString == "" {
|
||||
return retryAfterDuration
|
||||
}
|
||||
|
||||
defer func() {
|
||||
v := retryAfterDuration.Seconds()
|
||||
logger.Infof("'Retry-After: %s' parsed into %.2f second(s)", retryAfterString, v)
|
||||
}()
|
||||
|
||||
// Retry-After could be in "Mon, 02 Jan 2006 15:04:05 GMT" format.
|
||||
if parsedTime, err := time.Parse(http.TimeFormat, s); err == nil {
|
||||
if parsedTime, err := time.Parse(http.TimeFormat, retryAfterString); err == nil {
|
||||
return time.Duration(time.Until(parsedTime).Seconds()) * time.Second
|
||||
}
|
||||
// Retry-After could be in seconds.
|
||||
if seconds, err := strconv.Atoi(s); err == nil {
|
||||
if seconds, err := strconv.Atoi(retryAfterString); err == nil {
|
||||
return time.Duration(seconds) * time.Second
|
||||
}
|
||||
|
||||
|
||||
@@ -6,12 +6,66 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/golang/snappy"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
func TestCalculateRetryDuration(t *testing.T) {
|
||||
// `testFunc` call `calculateRetryDuration` for `n` times
|
||||
// and evaluate if the result of `calculateRetryDuration` is
|
||||
// 1. >= expectMinDuration
|
||||
// 2. <= expectMinDuration + 10% (see timeutil.AddJitterToDuration)
|
||||
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
|
||||
}
|
||||
|
||||
expectMaxDuration := helper(expectMinDuration)
|
||||
expectMinDuration = expectMinDuration - (1000 * time.Millisecond) // Avoid edge case when calculating time.Until(now)
|
||||
|
||||
if retryDuration < expectMinDuration || retryDuration > expectMaxDuration {
|
||||
t.Fatalf(
|
||||
"incorrect retry duration, want (ms): [%d, %d], got (ms): %d",
|
||||
expectMinDuration.Milliseconds(), expectMaxDuration.Milliseconds(),
|
||||
retryDuration.Milliseconds(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Call calculateRetryDuration for 1 time.
|
||||
{
|
||||
// default backoff policy
|
||||
f(0, time.Second, 1, 2*time.Second)
|
||||
// default backoff policy exceed max limit"
|
||||
f(0, 10*time.Minute, 1, time.Minute)
|
||||
|
||||
// retry after > default backoff policy
|
||||
f(10*time.Second, 1*time.Second, 1, 10*time.Second)
|
||||
// retry after < default backoff policy
|
||||
f(1*time.Second, 10*time.Second, 1, 1*time.Second)
|
||||
// retry after invalid and < default backoff policy
|
||||
f(0, time.Second, 1, 2*time.Second)
|
||||
|
||||
}
|
||||
|
||||
// Call calculateRetryDuration for multiple times.
|
||||
{
|
||||
// default backoff policy 2 times
|
||||
f(0, time.Second, 2, 4*time.Second)
|
||||
// default backoff policy 3 times
|
||||
f(0, time.Second, 3, 8*time.Second)
|
||||
// default backoff policy N times exceed max limit
|
||||
f(0, time.Second, 10, time.Minute)
|
||||
|
||||
// retry after 120s 1 times
|
||||
f(120*time.Second, time.Second, 1, 120*time.Second)
|
||||
// retry after 120s 2 times
|
||||
f(120*time.Second, time.Second, 2, 120*time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRetryAfterHeader(t *testing.T) {
|
||||
f := func(retryAfterString string, expectResult time.Duration) {
|
||||
t.Helper()
|
||||
@@ -37,32 +91,11 @@ func TestParseRetryAfterHeader(t *testing.T) {
|
||||
f(time.Now().Add(10*time.Second).Format("Mon, 02 Jan 2006 15:04:05 FAKETZ"), 0)
|
||||
}
|
||||
|
||||
func TestInitSecretFlags(t *testing.T) {
|
||||
showRemoteWriteURLOrig := *showRemoteWriteURL
|
||||
defer func() {
|
||||
*showRemoteWriteURL = showRemoteWriteURLOrig
|
||||
flagutil.UnregisterAllSecretFlags()
|
||||
}()
|
||||
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
|
||||
func helper(d time.Duration) time.Duration {
|
||||
dv := min(d/10, 10*time.Second)
|
||||
|
||||
flagutil.UnregisterAllSecretFlags()
|
||||
*showRemoteWriteURL = false
|
||||
InitSecretFlags()
|
||||
if !flagutil.IsSecretFlag("remotewrite.url") {
|
||||
t.Fatalf("expecting remoteWrite.url to be secret")
|
||||
}
|
||||
if !flagutil.IsSecretFlag("remotewrite.headers") {
|
||||
t.Fatalf("expecting remoteWrite.headers to be secret")
|
||||
}
|
||||
|
||||
flagutil.UnregisterAllSecretFlags()
|
||||
*showRemoteWriteURL = true
|
||||
InitSecretFlags()
|
||||
if flagutil.IsSecretFlag("remotewrite.url") {
|
||||
t.Fatalf("remoteWrite.url must remain visible when -remoteWrite.showURL is set")
|
||||
}
|
||||
if !flagutil.IsSecretFlag("remotewrite.headers") {
|
||||
t.Fatalf("expecting remoteWrite.headers to remain secret")
|
||||
}
|
||||
return d + dv
|
||||
}
|
||||
|
||||
func TestRepackBlockFromZstdToSnappy(t *testing.T) {
|
||||
|
||||
@@ -48,7 +48,11 @@ func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite *atomic.Boo
|
||||
ps.wr.significantFigures = significantFigures
|
||||
ps.wr.roundDigits = roundDigits
|
||||
ps.stopCh = make(chan struct{})
|
||||
ps.periodicFlusherWG.Go(ps.periodicFlusher)
|
||||
ps.periodicFlusherWG.Add(1)
|
||||
go func() {
|
||||
defer ps.periodicFlusherWG.Done()
|
||||
ps.periodicFlusher()
|
||||
}()
|
||||
return &ps
|
||||
}
|
||||
|
||||
@@ -211,9 +215,6 @@ func (wr *writeRequest) copyMetadata(dst, src *prompb.MetricMetadata) {
|
||||
dst.Type = src.Type
|
||||
dst.Unit = src.Unit
|
||||
|
||||
dst.AccountID = src.AccountID
|
||||
dst.ProjectID = src.ProjectID
|
||||
|
||||
// Pre-allocate memory for all string fields.
|
||||
neededBufLen := len(src.MetricFamilyName) + len(src.Help)
|
||||
bufLen := len(wr.metadatabuf)
|
||||
|
||||
@@ -51,9 +51,9 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
|
||||
|
||||
func newTestWriteRequest(seriesCount, labelsCount int) *prompb.WriteRequest {
|
||||
var wr prompb.WriteRequest
|
||||
for i := range seriesCount {
|
||||
for i := 0; i < seriesCount; i++ {
|
||||
var labels []prompb.Label
|
||||
for j := range labelsCount {
|
||||
for j := 0; j < labelsCount; j++ {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: fmt.Sprintf("label_%d_%d", i, j),
|
||||
Value: fmt.Sprintf("value_%d_%d", i, j),
|
||||
|
||||
@@ -9,18 +9,19 @@ import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"go.yaml.in/yaml/v3"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to all -remoteWrite.url.")
|
||||
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
|
||||
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
|
||||
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
|
||||
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
|
||||
"The path can point either to local file or to http url. "+
|
||||
@@ -38,7 +39,7 @@ var (
|
||||
labelsGlobal []prompb.Label
|
||||
|
||||
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
|
||||
remoteWriteURLRelabelConfigData atomic.Pointer[[]any]
|
||||
remoteWriteURLRelabelConfigData atomic.Pointer[[]interface{}]
|
||||
|
||||
relabelConfigReloads *metrics.Counter
|
||||
relabelConfigReloadErrors *metrics.Counter
|
||||
@@ -90,8 +91,8 @@ func WriteURLRelabelConfigData(w io.Writer) {
|
||||
return
|
||||
}
|
||||
type urlRelabelCfg struct {
|
||||
Url string `yaml:"url"`
|
||||
RelabelConfig any `yaml:"relabel_config"`
|
||||
Url string `yaml:"url"`
|
||||
RelabelConfig interface{} `yaml:"relabel_config"`
|
||||
}
|
||||
var cs []urlRelabelCfg
|
||||
for i, url := range *remoteWriteURLs {
|
||||
@@ -138,13 +139,12 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
remoteWriteRelabelConfigData.Store(&rawCfg)
|
||||
rcs.global = global
|
||||
}
|
||||
|
||||
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
|
||||
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
|
||||
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
|
||||
}
|
||||
|
||||
var urlRelabelCfgs []any
|
||||
var urlRelabelCfgs []interface{}
|
||||
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
|
||||
for i, path := range *relabelConfigPaths {
|
||||
if len(path) == 0 {
|
||||
@@ -157,7 +157,7 @@ func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
}
|
||||
rcs.perURL[i] = prc
|
||||
|
||||
var parsedCfg any
|
||||
var parsedCfg interface{}
|
||||
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
|
||||
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
|
||||
}
|
||||
@@ -176,9 +176,19 @@ type relabelConfigs struct {
|
||||
perURL []*promrelabel.ParsedConfigs
|
||||
}
|
||||
|
||||
// isSet indicates whether (global or per-URL) command-line flags is set
|
||||
func (rcs *relabelConfigs) isSet() bool {
|
||||
return *relabelConfigPathGlobal != "" || len(*relabelConfigPaths) > 0
|
||||
if rcs == nil {
|
||||
return false
|
||||
}
|
||||
if rcs.global.Len() > 0 {
|
||||
return true
|
||||
}
|
||||
for _, pc := range rcs.perURL {
|
||||
if pc.Len() > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// initLabelsGlobal must be called after parsing command-line flags.
|
||||
|
||||
@@ -3,7 +3,6 @@ package remotewrite
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
@@ -12,10 +11,6 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
@@ -28,7 +23,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
@@ -36,6 +30,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -63,7 +59,7 @@ var (
|
||||
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
|
||||
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
||||
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
|
||||
queues = flagutil.NewArrayInt("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
"isn't enough for sending high volume of collected data to remote storage. "+
|
||||
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
|
||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||
@@ -84,14 +80,10 @@ var (
|
||||
`This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. `+
|
||||
`For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}`+
|
||||
`Enabled sorting for labels can slow down ingestion performance a bit`)
|
||||
maxHourlySeries = flag.Int64("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series cardinality. "+
|
||||
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
|
||||
maxDailySeries = flag.Int64("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. "+
|
||||
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
|
||||
maxHourlySeries = flag.Int("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
|
||||
maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
|
||||
maxIngestionRate = flag.Int("maxIngestionRate", 0, "The maximum number of samples vmagent can receive per second. Data ingestion is paused when the limit is exceeded. "+
|
||||
"By default there are no limits on samples ingestion rate. See also -remoteWrite.rateLimit")
|
||||
|
||||
@@ -100,8 +92,6 @@ var (
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence . See also -remoteWrite.dropSamplesOnOverload")
|
||||
dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+
|
||||
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
|
||||
disableMetadataPerURL = flagutil.NewArrayBool("remoteWrite.disableMetadata", "Whether to disable sending metadata to the corresponding -remoteWrite.url. "+
|
||||
"By default, metadata sending is controlled by the global -enableMetadata flag")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -151,8 +141,6 @@ func InitSecretFlags() {
|
||||
// remoteWrite.url can contain authentication codes, so hide it at `/metrics` output.
|
||||
flagutil.RegisterSecretFlag("remoteWrite.url")
|
||||
}
|
||||
// remoteWrite.headers can contain auth headers such as Authorization and API keys.
|
||||
flagutil.RegisterSecretFlag("remoteWrite.headers")
|
||||
}
|
||||
|
||||
var (
|
||||
@@ -169,20 +157,8 @@ func Init() {
|
||||
if len(*remoteWriteURLs) == 0 {
|
||||
logger.Fatalf("at least one `-remoteWrite.url` command-line flag must be set")
|
||||
}
|
||||
if *shardByURL && len(*disableOnDiskQueue) > 1 {
|
||||
disableOnDiskQueues := *disableOnDiskQueue
|
||||
|
||||
firstValue := disableOnDiskQueues[0]
|
||||
for _, v := range disableOnDiskQueues[1:] {
|
||||
if firstValue != v {
|
||||
logger.Fatalf("all -remoteWrite.url targets must have the same -remoteWrite.disableOnDiskQueue setting when -remoteWrite.shardByURL is enabled; " +
|
||||
"either enable or disable -remoteWrite.disableOnDiskQueue for all targets")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if limit := getMaxHourlySeries(); limit > 0 {
|
||||
hourlySeriesLimiter = bloomfilter.NewLimiter(limit, time.Hour)
|
||||
if *maxHourlySeries > 0 {
|
||||
hourlySeriesLimiter = bloomfilter.NewLimiter(*maxHourlySeries, time.Hour)
|
||||
_ = metrics.NewGauge(`vmagent_hourly_series_limit_max_series`, func() float64 {
|
||||
return float64(hourlySeriesLimiter.MaxItems())
|
||||
})
|
||||
@@ -190,8 +166,8 @@ func Init() {
|
||||
return float64(hourlySeriesLimiter.CurrentItems())
|
||||
})
|
||||
}
|
||||
if limit := getMaxDailySeries(); limit > 0 {
|
||||
dailySeriesLimiter = bloomfilter.NewLimiter(limit, 24*time.Hour)
|
||||
if *maxDailySeries > 0 {
|
||||
dailySeriesLimiter = bloomfilter.NewLimiter(*maxDailySeries, 24*time.Hour)
|
||||
_ = metrics.NewGauge(`vmagent_daily_series_limit_max_series`, func() float64 {
|
||||
return float64(dailySeriesLimiter.MaxItems())
|
||||
})
|
||||
@@ -200,6 +176,13 @@ func Init() {
|
||||
})
|
||||
}
|
||||
|
||||
if *queues > maxQueues {
|
||||
*queues = maxQueues
|
||||
}
|
||||
if *queues <= 0 {
|
||||
*queues = 1
|
||||
}
|
||||
|
||||
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
|
||||
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
|
||||
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
|
||||
@@ -232,7 +215,9 @@ func Init() {
|
||||
dropDanglingQueues()
|
||||
|
||||
// Start config reloader.
|
||||
configReloaderWG.Go(func() {
|
||||
configReloaderWG.Add(1)
|
||||
go func() {
|
||||
defer configReloaderWG.Done()
|
||||
for {
|
||||
select {
|
||||
case <-configReloaderStopCh:
|
||||
@@ -242,7 +227,7 @@ func Init() {
|
||||
reloadRelabelConfigs()
|
||||
reloadStreamAggrConfigs()
|
||||
}
|
||||
})
|
||||
}()
|
||||
}
|
||||
|
||||
func dropDanglingQueues() {
|
||||
@@ -282,6 +267,17 @@ func initRemoteWriteCtxs(urls []string) {
|
||||
if len(urls) == 0 {
|
||||
logger.Panicf("BUG: urls must be non-empty")
|
||||
}
|
||||
|
||||
maxInmemoryBlocks := memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
|
||||
if maxInmemoryBlocks / *queues > 100 {
|
||||
// There is no much sense in keeping higher number of blocks in memory,
|
||||
// since this means that the producer outperforms consumer and the queue
|
||||
// will continue growing. It is better storing the queue to file.
|
||||
maxInmemoryBlocks = 100 * *queues
|
||||
}
|
||||
if maxInmemoryBlocks < 2 {
|
||||
maxInmemoryBlocks = 2
|
||||
}
|
||||
rwctxs := make([]*remoteWriteCtx, len(urls))
|
||||
rwctxIdx := make([]int, len(urls))
|
||||
if retryMaxTime.String() != "" {
|
||||
@@ -296,10 +292,9 @@ func initRemoteWriteCtxs(urls []string) {
|
||||
if *showRemoteWriteURL {
|
||||
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
|
||||
}
|
||||
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, sanitizedURL)
|
||||
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
|
||||
rwctxIdx[i] = i
|
||||
}
|
||||
fs.RegisterPathFsMetrics(*tmpDataPath)
|
||||
|
||||
if *shardByURL {
|
||||
consistentHashNodes := make([]string, 0, len(urls))
|
||||
@@ -413,7 +408,7 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
|
||||
|
||||
// Push metadata separately from time series, since it doesn't need sharding,
|
||||
// relabeling, stream aggregation, deduplication, etc.
|
||||
if !tryPushMetadataToRemoteStorages(at, rwctxs, mms, forceDropSamplesOnFailure) {
|
||||
if !tryPushMetadataToRemoteStorages(rwctxs, mms, forceDropSamplesOnFailure) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -513,9 +508,7 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
|
||||
//
|
||||
// calculateHealthyRwctxIdx will rely on the order of rwctx to be in ascending order.
|
||||
func getEligibleRemoteWriteCtxs(tss []prompb.TimeSeries, forceDropSamplesOnFailure bool) ([]*remoteWriteCtx, bool) {
|
||||
// When -remoteWrite.shardByURL=true always use all configured remote writes to preserve stable metrics distribution across shards.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10507
|
||||
if !disableOnDiskQueueAny || *shardByURL {
|
||||
if !disableOnDiskQueueAny {
|
||||
return rwctxsGlobal, true
|
||||
}
|
||||
|
||||
@@ -530,6 +523,12 @@ func getEligibleRemoteWriteCtxs(tss []prompb.TimeSeries, forceDropSamplesOnFailu
|
||||
return nil, false
|
||||
}
|
||||
rowsCount := getRowsCount(tss)
|
||||
if *shardByURL {
|
||||
// Todo: When shardByURL is enabled, the following metrics won't be 100% accurate. Because vmagent don't know
|
||||
// which rwctx should data be pushed to yet. Let's consider the hashing algorithm fair and will distribute
|
||||
// data to all rwctxs evenly.
|
||||
rowsCount = rowsCount / len(rwctxsGlobal)
|
||||
}
|
||||
rwctx.rowsDroppedOnPushFailure.Add(rowsCount)
|
||||
}
|
||||
}
|
||||
@@ -547,18 +546,11 @@ func pushTimeSeriesToRemoteStoragesTrackDropped(tss []prompb.TimeSeries) {
|
||||
}
|
||||
}
|
||||
|
||||
func tryPushMetadataToRemoteStorages(at *auth.Token, rwctxs []*remoteWriteCtx, mms []prompb.MetricMetadata, forceDropSamplesOnFailure bool) bool {
|
||||
func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.MetricMetadata, forceDropSamplesOnFailure bool) bool {
|
||||
if len(mms) == 0 {
|
||||
// Nothing to push
|
||||
return true
|
||||
}
|
||||
if at != nil {
|
||||
for idx := range mms {
|
||||
mm := &mms[idx]
|
||||
mm.AccountID = at.AccountID
|
||||
mm.ProjectID = at.ProjectID
|
||||
}
|
||||
}
|
||||
// Do not shard metadata even if -remoteWrite.shardByURL is set, just replicate it among rwctxs.
|
||||
// Since metadata is usually small and there is no guarantee that metadata can be sent to
|
||||
// the same remote storage with the corresponding metrics.
|
||||
@@ -566,13 +558,11 @@ func tryPushMetadataToRemoteStorages(at *auth.Token, rwctxs []*remoteWriteCtx, m
|
||||
// Push metadata to remote storage systems in parallel to reduce
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(rwctxs))
|
||||
var anyPushFailed atomic.Bool
|
||||
for _, rwctx := range rwctxs {
|
||||
if !rwctx.enableMetadata {
|
||||
// Skip remote storage with disabled metadata
|
||||
continue
|
||||
}
|
||||
wg.Go(func() {
|
||||
go func(rwctx *remoteWriteCtx) {
|
||||
defer wg.Done()
|
||||
if !rwctx.tryPushMetadataInternal(mms) {
|
||||
rwctx.pushFailures.Inc()
|
||||
if forceDropSamplesOnFailure {
|
||||
@@ -581,7 +571,7 @@ func tryPushMetadataToRemoteStorages(at *auth.Token, rwctxs []*remoteWriteCtx, m
|
||||
}
|
||||
anyPushFailed.Store(true)
|
||||
}
|
||||
})
|
||||
}(rwctx)
|
||||
}
|
||||
wg.Wait()
|
||||
return !anyPushFailed.Load()
|
||||
@@ -613,13 +603,15 @@ func tryPushTimeSeriesToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prom
|
||||
// Push tssBlock to remote storage systems in parallel to reduce
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(rwctxs))
|
||||
var anyPushFailed atomic.Bool
|
||||
for _, rwctx := range rwctxs {
|
||||
wg.Go(func() {
|
||||
go func(rwctx *remoteWriteCtx) {
|
||||
defer wg.Done()
|
||||
if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) {
|
||||
anyPushFailed.Store(true)
|
||||
}
|
||||
})
|
||||
}(rwctx)
|
||||
}
|
||||
wg.Wait()
|
||||
return !anyPushFailed.Load()
|
||||
@@ -641,11 +633,13 @@ func tryShardingTimeSeriesAmongRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock
|
||||
if len(shard) == 0 {
|
||||
continue
|
||||
}
|
||||
wg.Go(func() {
|
||||
if !rwctx.TryPushTimeSeries(shard, forceDropSamplesOnFailure) {
|
||||
wg.Add(1)
|
||||
go func(rwctx *remoteWriteCtx, tss []prompb.TimeSeries) {
|
||||
defer wg.Done()
|
||||
if !rwctx.TryPushTimeSeries(tss, forceDropSamplesOnFailure) {
|
||||
anyPushFailed.Store(true)
|
||||
}
|
||||
})
|
||||
}(rwctx, shard)
|
||||
}
|
||||
wg.Wait()
|
||||
return !anyPushFailed.Load()
|
||||
@@ -709,7 +703,7 @@ func shardAmountRemoteWriteCtx(tssBlock []prompb.TimeSeries, shards [][]prompb.T
|
||||
}
|
||||
tmpLabels.Labels = hashLabels
|
||||
}
|
||||
h := getLabelsHashForShard(hashLabels)
|
||||
h := getLabelsHash(hashLabels)
|
||||
|
||||
// Get the rwctxIdx through consistent hashing and then map it to the index in shards.
|
||||
// The rwctxIdx is not always equal to the shardIdx, for example, when some rwctx are not available.
|
||||
@@ -800,28 +794,11 @@ var (
|
||||
dailySeriesLimitRowsDropped = metrics.NewCounter(`vmagent_daily_series_limit_rows_dropped_total`)
|
||||
)
|
||||
|
||||
// getLabelsHashForShard is a separate function from getLabelsHash because
|
||||
// it omits the '=' separator between label name and value for backward compatibility.
|
||||
// Changing it would re-shard all series across remoteWrite targets.
|
||||
func getLabelsHashForShard(labels []prompb.Label) uint64 {
|
||||
bb := labelsHashBufPool.Get()
|
||||
b := bb.B[:0]
|
||||
for _, label := range labels {
|
||||
b = append(b, label.Name...)
|
||||
b = append(b, label.Value...)
|
||||
}
|
||||
h := xxhash.Sum64(b)
|
||||
bb.B = b
|
||||
labelsHashBufPool.Put(bb)
|
||||
return h
|
||||
}
|
||||
|
||||
func getLabelsHash(labels []prompb.Label) uint64 {
|
||||
bb := labelsHashBufPool.Get()
|
||||
b := bb.B[:0]
|
||||
for _, label := range labels {
|
||||
b = append(b, label.Name...)
|
||||
b = append(b, '=')
|
||||
b = append(b, label.Value...)
|
||||
}
|
||||
h := xxhash.Sum64(b)
|
||||
@@ -860,11 +837,6 @@ type remoteWriteCtx struct {
|
||||
streamAggrKeepInput bool
|
||||
streamAggrDropInput bool
|
||||
|
||||
// enableMetadata indicates whether metadata should be sent to this remote storage.
|
||||
// It is determined by -remoteWrite.enableMetadata per-URL flag if set,
|
||||
// otherwise by the global -enableMetadata flag.
|
||||
enableMetadata bool
|
||||
|
||||
pss []*pendingSeries
|
||||
pssNextIdx atomic.Uint64
|
||||
|
||||
@@ -876,19 +848,7 @@ type remoteWriteCtx struct {
|
||||
rowsDroppedOnPushFailure *metrics.Counter
|
||||
}
|
||||
|
||||
// isMetadataEnabledForURL returns true if metadata should be sent to the remote storage at argIdx.
|
||||
// It checks the per-URL -remoteWrite.disableMetadata flag first.
|
||||
// If not set, it falls back to the global -enableMetadata flag.
|
||||
func isMetadataEnabledForURL(argIdx int) bool {
|
||||
if disableMetadataPerURL.GetOptionalArg(argIdx) {
|
||||
// Metadata is explicitly disabled for this URL
|
||||
return false
|
||||
}
|
||||
// Use global -enableMetadata value
|
||||
return prommetadata.IsEnabled()
|
||||
}
|
||||
|
||||
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string) *remoteWriteCtx {
|
||||
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
|
||||
// strip query params, otherwise changing params resets pq
|
||||
pqURL := *remoteWriteURL
|
||||
pqURL.RawQuery = ""
|
||||
@@ -903,23 +863,6 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
}
|
||||
|
||||
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
|
||||
queuesSize := queues.GetOptionalArg(argIdx)
|
||||
if queuesSize > maxQueues {
|
||||
queuesSize = maxQueues
|
||||
} else if queuesSize <= 0 {
|
||||
queuesSize = 1
|
||||
}
|
||||
|
||||
maxInmemoryBlocks := memory.Allowed() / len(*remoteWriteURLs) / *maxRowsPerBlock / 100
|
||||
if maxInmemoryBlocks/queuesSize > 100 {
|
||||
// There is no much sense in keeping higher number of blocks in memory,
|
||||
// since this means that the producer outperforms consumer and the queue
|
||||
// will continue growing. It is better storing the queue to file.
|
||||
maxInmemoryBlocks = 100 * queuesSize
|
||||
}
|
||||
if maxInmemoryBlocks < 2 {
|
||||
maxInmemoryBlocks = 2
|
||||
}
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
@@ -937,16 +880,16 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
var c *client
|
||||
switch remoteWriteURL.Scheme {
|
||||
case "http", "https":
|
||||
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, queuesSize)
|
||||
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
|
||||
default:
|
||||
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
|
||||
}
|
||||
c.init(argIdx, queuesSize, sanitizedURL)
|
||||
c.init(argIdx, *queues, sanitizedURL)
|
||||
|
||||
// Initialize pss
|
||||
sf := significantFigures.GetOptionalArg(argIdx)
|
||||
rd := roundDigits.GetOptionalArg(argIdx)
|
||||
pssLen := queuesSize
|
||||
pssLen := *queues
|
||||
if n := cgroup.AvailableCPUs(); pssLen > n {
|
||||
// There is no sense in running more than availableCPUs concurrent pendingSeries,
|
||||
// since every pendingSeries can saturate up to a single CPU.
|
||||
@@ -958,11 +901,10 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
c: c,
|
||||
pss: pss,
|
||||
enableMetadata: isMetadataEnabledForURL(argIdx),
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
c: c,
|
||||
pss: pss,
|
||||
|
||||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
||||
@@ -1147,7 +1089,7 @@ func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries)
|
||||
}()
|
||||
|
||||
if len(labelsGlobal) > 0 {
|
||||
// Make a copy of tss before adding extra labels to prevent
|
||||
// Make a copy of tss before adding extra labels in order to prevent
|
||||
// from affecting time series for other remoteWrite.url configs.
|
||||
rctx = getRelabelCtx()
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
@@ -1183,21 +1125,3 @@ func newMapFromStrings(a []string) map[string]struct{} {
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func getMaxHourlySeries() int {
|
||||
limit := *maxHourlySeries
|
||||
if limit == -1 || limit > math.MaxInt32 {
|
||||
return math.MaxInt32
|
||||
}
|
||||
|
||||
return int(limit)
|
||||
}
|
||||
|
||||
func getMaxDailySeries() int {
|
||||
limit := *maxDailySeries
|
||||
if limit == -1 || limit > math.MaxInt32 {
|
||||
return math.MaxInt32
|
||||
}
|
||||
|
||||
return int(limit)
|
||||
}
|
||||
|
||||
@@ -25,15 +25,15 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
// Distribute itemsCount hashes returned by getLabelsHash() across bucketsCount buckets.
|
||||
itemsCount := 10_000 * bucketsCount
|
||||
itemsCount := 1_000 * bucketsCount
|
||||
m := make([]int, bucketsCount)
|
||||
var labels []prompb.Label
|
||||
for i := range itemsCount {
|
||||
for i := 0; i < itemsCount; i++ {
|
||||
labels = append(labels[:0], prompb.Label{
|
||||
Name: "__name__",
|
||||
Value: fmt.Sprintf("some_name_%d", i),
|
||||
})
|
||||
for j := range 10 {
|
||||
for j := 0; j < 10; j++ {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: fmt.Sprintf("label_%d", j),
|
||||
Value: fmt.Sprintf("value_%d_%d", i, j),
|
||||
@@ -44,12 +44,10 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify that the distribution is even
|
||||
expectedItemsPerBucket := float64(itemsCount / bucketsCount)
|
||||
allowedDeviation := math.Round(float64(expectedItemsPerBucket) * 0.04)
|
||||
expectedItemsPerBucket := itemsCount / bucketsCount
|
||||
for _, n := range m {
|
||||
if math.Abs(expectedItemsPerBucket-float64(n)) > allowedDeviation {
|
||||
t.Fatalf("unexpected items in the bucket for %d buckets; got %d; want in range [%.0f, %.0f]",
|
||||
bucketsCount, n, expectedItemsPerBucket-allowedDeviation, expectedItemsPerBucket+allowedDeviation)
|
||||
if math.Abs(1-float64(n)/float64(expectedItemsPerBucket)) > 0.04 {
|
||||
t.Fatalf("unexpected items in the bucket for %d buckets; got %d; want around %d", bucketsCount, n, expectedItemsPerBucket)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -250,7 +248,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
||||
seriesCount := 100000
|
||||
// build 1000000 series
|
||||
tssBlock := make([]prompb.TimeSeries, 0, seriesCount)
|
||||
for i := range seriesCount {
|
||||
for i := 0; i < seriesCount; i++ {
|
||||
tssBlock = append(tssBlock, prompb.TimeSeries{
|
||||
Labels: []prompb.Label{
|
||||
{
|
||||
@@ -271,7 +269,7 @@ func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
||||
// build active time series set
|
||||
nodes := make([]string, 0, remoteWriteCount)
|
||||
activeTimeSeriesByNodes := make([]map[string]struct{}, remoteWriteCount)
|
||||
for i := range remoteWriteCount {
|
||||
for i := 0; i < remoteWriteCount; i++ {
|
||||
nodes = append(nodes, fmt.Sprintf("node%d", i))
|
||||
activeTimeSeriesByNodes[i] = make(map[string]struct{})
|
||||
}
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
package zabbixconnector
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="zabbixconnector"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="zabbixconnector"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="zabbixconnector"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for ZabbixConnector POST /zabbixconnector/v1/history request.
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
encoding := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, encoding, func(rows []zabbixconnector.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, rows []zabbixconnector.Row, extraLabels []prompb.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
rowsTotal := len(rows)
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
|
||||
labelsLen := len(labels)
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: bytesutil.ToUnsafeString(tag.Key),
|
||||
Value: bytesutil.ToUnsafeString(tag.Value),
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
|
||||
samplesLen := len(samples)
|
||||
samples = append(samples, prompb.Sample{
|
||||
Value: r.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
})
|
||||
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
}
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
@@ -41,7 +41,7 @@ func TestParseInputValue_Success(t *testing.T) {
|
||||
if len(outputExpected) != len(output) {
|
||||
t.Fatalf("unexpected output length; got %d; want %d", len(outputExpected), len(output))
|
||||
}
|
||||
for i := range outputExpected {
|
||||
for i := 0; i < len(outputExpected); i++ {
|
||||
if outputExpected[i].Omitted != output[i].Omitted {
|
||||
t.Fatalf("unexpected Omitted field in the output\ngot\n%v\nwant\n%v", output, outputExpected)
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"maps"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
@@ -13,7 +12,6 @@ import (
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"syscall"
|
||||
@@ -61,7 +59,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
|
||||
}
|
||||
eu, err := url.Parse(externalURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to parse external URL: %s", err)
|
||||
logger.Fatalf("failed to parse external URL: %w", err)
|
||||
}
|
||||
if err := templates.Load([]string{}, *eu); err != nil {
|
||||
logger.Fatalf("failed to load template: %v", err)
|
||||
@@ -350,7 +348,9 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
||||
for k := range alertEvalTimesMap {
|
||||
alertEvalTimes = append(alertEvalTimes, k)
|
||||
}
|
||||
slices.Sort(alertEvalTimes)
|
||||
sort.Slice(alertEvalTimes, func(i, j int) bool {
|
||||
return alertEvalTimes[i] < alertEvalTimes[j]
|
||||
})
|
||||
|
||||
// sort group eval order according to the given "group_eval_order".
|
||||
sort.Slice(testGroups, func(i, j int) bool {
|
||||
@@ -361,8 +361,12 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
||||
var groups []*rule.Group
|
||||
for _, group := range testGroups {
|
||||
mergedExternalLabels := make(map[string]string)
|
||||
maps.Copy(mergedExternalLabels, tg.ExternalLabels)
|
||||
maps.Copy(mergedExternalLabels, externalLabels)
|
||||
for k, v := range tg.ExternalLabels {
|
||||
mergedExternalLabels[k] = v
|
||||
}
|
||||
for k, v := range externalLabels {
|
||||
mergedExternalLabels[k] = v
|
||||
}
|
||||
ng := rule.NewGroup(group, q, time.Minute, mergedExternalLabels)
|
||||
ng.Init()
|
||||
groups = append(groups, ng)
|
||||
|
||||
@@ -81,9 +81,12 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
|
||||
if g.Interval.Duration() < 0 {
|
||||
return fmt.Errorf("interval shouldn't be lower than 0")
|
||||
}
|
||||
// if `eval_offset` is set, the group interval must be specified explicitly(instead of inherited from global evaluationInterval flag) and must bigger than offset.
|
||||
if g.EvalOffset.Duration().Abs() > g.Interval.Duration() {
|
||||
return fmt.Errorf("the abs value of eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
|
||||
if g.EvalOffset.Duration() < 0 {
|
||||
return fmt.Errorf("eval_offset shouldn't be lower than 0")
|
||||
}
|
||||
// if `eval_offset` is set, interval won't use global evaluationInterval flag and must bigger than offset.
|
||||
if g.EvalOffset.Duration() > g.Interval.Duration() {
|
||||
return fmt.Errorf("eval_offset should be smaller than interval; now eval_offset: %v, interval: %v", g.EvalOffset.Duration(), g.Interval.Duration())
|
||||
}
|
||||
if g.EvalOffset != nil && g.EvalDelay != nil {
|
||||
return fmt.Errorf("eval_offset cannot be used with eval_delay")
|
||||
@@ -222,9 +225,6 @@ func (r *Rule) Validate() error {
|
||||
if r.Expr == "" {
|
||||
return fmt.Errorf("expression can't be empty")
|
||||
}
|
||||
if _, ok := r.Labels["__name__"]; ok {
|
||||
return fmt.Errorf("invalid rule label __name__")
|
||||
}
|
||||
return checkOverflow(r.XXX, "rule")
|
||||
}
|
||||
|
||||
|
||||
@@ -116,7 +116,7 @@ func TestParse_Failure(t *testing.T) {
|
||||
|
||||
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
|
||||
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
|
||||
f([]string{"testdata/dir/rules0-bad.rules"}, "invalid annotations")
|
||||
f([]string{"testdata/dir/rules0-bad.rules"}, "error parsing annotation")
|
||||
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
|
||||
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
|
||||
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
|
||||
@@ -136,9 +136,6 @@ func TestRuleValidate(t *testing.T) {
|
||||
if err := (&Rule{Alert: "alert"}).Validate(); err == nil {
|
||||
t.Fatalf("expected empty expr error")
|
||||
}
|
||||
if err := (&Rule{Record: "record", Expr: "sum(test)", Labels: map[string]string{"__name__": "test"}}).Validate(); err == nil {
|
||||
t.Fatalf("invalid rule label; got %s", err)
|
||||
}
|
||||
if err := (&Rule{Alert: "alert", Expr: "test>0"}).Validate(); err != nil {
|
||||
t.Fatalf("expected valid rule; got %s", err)
|
||||
}
|
||||
@@ -179,17 +176,11 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
}, false, "interval shouldn't be lower than 0")
|
||||
|
||||
f(&Group{
|
||||
Name: "too big eval_offset",
|
||||
Name: "wrong eval_offset",
|
||||
Interval: promutil.NewDuration(time.Minute),
|
||||
EvalOffset: promutil.NewDuration(2 * time.Minute),
|
||||
}, false, "eval_offset should be smaller than interval")
|
||||
|
||||
f(&Group{
|
||||
Name: "too big negative eval_offset",
|
||||
Interval: promutil.NewDuration(time.Minute),
|
||||
EvalOffset: promutil.NewDuration(-2 * time.Minute),
|
||||
}, false, "eval_offset should be smaller than interval")
|
||||
|
||||
limit := -1
|
||||
f(&Group{
|
||||
Name: "wrong limit",
|
||||
@@ -352,6 +343,7 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}, true, "bad prometheus expr")
|
||||
|
||||
}
|
||||
|
||||
func TestGroupValidate_Success(t *testing.T) {
|
||||
|
||||
@@ -2,7 +2,6 @@ package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage"
|
||||
@@ -77,12 +76,13 @@ func (t *Type) ValidateExpr(expr string) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
|
||||
}
|
||||
labels, err := q.GetStatsLabels()
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot obtain labels from LogsQL expr: %q, err: %w", expr, err)
|
||||
}
|
||||
if slices.Contains(labels, "_time") {
|
||||
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
|
||||
fields, _ := q.GetStatsByFields()
|
||||
for i := range fields {
|
||||
// VictoriaLogs inserts `_time` field as a label in result when query with `stats by (_time:step)`,
|
||||
// making the result meaningless and may lead to cardinality issues.
|
||||
if fields[i] == "_time" {
|
||||
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
|
||||
}
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unknown datasource type=%q", t.Name)
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"maps"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
@@ -92,7 +91,9 @@ func (c *Client) Clone() *Client {
|
||||
ns.extraHeaders = make([]keyValue, len(c.extraHeaders))
|
||||
copy(ns.extraHeaders, c.extraHeaders)
|
||||
}
|
||||
maps.Copy(ns.extraParams, c.extraParams)
|
||||
for k, v := range c.extraParams {
|
||||
ns.extraParams[k] = v
|
||||
}
|
||||
|
||||
return ns
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ type promResponse struct {
|
||||
// Stats supported by VictoriaMetrics since v1.90
|
||||
Stats struct {
|
||||
SeriesFetched *string `json:"seriesFetched,omitempty"`
|
||||
} `json:"stats"`
|
||||
} `json:"stats,omitempty"`
|
||||
// IsPartial supported by VictoriaMetrics
|
||||
IsPartial *bool `json:"isPartial,omitempty"`
|
||||
}
|
||||
|
||||
@@ -772,7 +772,7 @@ func TestHeaders(t *testing.T) {
|
||||
|
||||
// basic auth
|
||||
f(func() *Client {
|
||||
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "", "bar", ""))
|
||||
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "bar", ""))
|
||||
if err != nil {
|
||||
t.Fatalf("Error get auth config: %s", err)
|
||||
}
|
||||
@@ -817,7 +817,7 @@ func TestHeaders(t *testing.T) {
|
||||
|
||||
// custom header overrides basic auth
|
||||
f(func() *Client {
|
||||
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "", "bar", ""))
|
||||
cfg, err := vmalertutil.AuthConfig(vmalertutil.WithBasicAuth("foo", "bar", ""))
|
||||
if err != nil {
|
||||
t.Fatalf("Error get auth config: %s", err)
|
||||
}
|
||||
|
||||
@@ -87,7 +87,6 @@ func (m *Metric) DelLabel(key string) {
|
||||
for i, l := range m.Labels {
|
||||
if l.Name == key {
|
||||
m.Labels = append(m.Labels[:i], m.Labels[i+1:]...)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -135,7 +134,7 @@ func (ls Labels) String() string {
|
||||
func LabelCompare(a, b Labels) int {
|
||||
l := min(len(b), len(a))
|
||||
|
||||
for i := range l {
|
||||
for i := 0; i < l; i++ {
|
||||
if a[i].Name != b[i].Name {
|
||||
if a[i].Name < b[i].Name {
|
||||
return -1
|
||||
|
||||
@@ -27,7 +27,6 @@ var (
|
||||
"Multiple headers must be delimited by '^^': -datasource.headers='header1:value1^^header2:value2'")
|
||||
|
||||
basicAuthUsername = flag.String("datasource.basicAuth.username", "", "Optional basic auth username for -datasource.url")
|
||||
basicAuthUsernameFile = flag.String("datasource.basicAuth.usernameFile", "", "Optional path to basic auth username to use for -datasource.url")
|
||||
basicAuthPassword = flag.String("datasource.basicAuth.password", "", "Optional basic auth password for -datasource.url")
|
||||
basicAuthPasswordFile = flag.String("datasource.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -datasource.url")
|
||||
|
||||
@@ -106,7 +105,7 @@ func Init(extraParams url.Values) (QuerierBuilder, error) {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -datasource.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
|
||||
}
|
||||
authCfg, err := vmalertutil.AuthConfig(
|
||||
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthUsernameFile, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
vmalertutil.WithBearer(*bearerToken, *bearerTokenFile),
|
||||
vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
|
||||
vmalertutil.WithHeaders(*headers))
|
||||
|
||||
@@ -13,7 +13,7 @@ func BenchmarkPromInstantUnmarshal(b *testing.B) {
|
||||
|
||||
// BenchmarkParsePrometheusResponse/Instant_std+fastjson-10 1760 668959 ns/op 280147 B/op 5781 allocs/op
|
||||
b.Run("Instant std+fastjson", func(b *testing.B) {
|
||||
for range b.N {
|
||||
for i := 0; i < b.N; i++ {
|
||||
var pi promInstant
|
||||
err = pi.Unmarshal(data)
|
||||
if err != nil {
|
||||
|
||||
@@ -56,7 +56,7 @@ absolute path to all .tpl files in root.
|
||||
-rule.templates="dir/**/*.tpl". Includes all the .tpl files in "dir" subfolders recursively.
|
||||
`)
|
||||
|
||||
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule', '-rule.templates' and '-notifier.config' files. "+
|
||||
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule' or '-notifier.config' files. "+
|
||||
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
|
||||
|
||||
httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "Address to listen for incoming http requests. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
@@ -81,7 +81,9 @@ absolute path to all .tpl files in root.
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
|
||||
)
|
||||
|
||||
var extURL *url.URL
|
||||
var (
|
||||
extURL *url.URL
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
@@ -159,7 +161,7 @@ func main() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
manager, err := newManager(ctx)
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to create manager: %s", err)
|
||||
logger.Fatalf("failed to init: %s", err)
|
||||
}
|
||||
logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";"))
|
||||
groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)
|
||||
|
||||
@@ -3,7 +3,6 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
@@ -46,15 +45,13 @@ func (m *manager) ruleAPI(gID, rID uint64) (rule.ApiRule, error) {
|
||||
m.groupsMu.RLock()
|
||||
defer m.groupsMu.RUnlock()
|
||||
|
||||
group, ok := m.groups[gID]
|
||||
g, ok := m.groups[gID]
|
||||
if !ok {
|
||||
return rule.ApiRule{}, fmt.Errorf("can't find group with id %d", gID)
|
||||
}
|
||||
g := group.ToAPI()
|
||||
ruleID := strconv.FormatUint(rID, 10)
|
||||
for _, r := range g.Rules {
|
||||
if r.ID == ruleID {
|
||||
return r, nil
|
||||
if r.ID() == rID {
|
||||
return r.ToAPI(), nil
|
||||
}
|
||||
}
|
||||
return rule.ApiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
|
||||
@@ -65,20 +62,17 @@ func (m *manager) alertAPI(gID, aID uint64) (*rule.ApiAlert, error) {
|
||||
m.groupsMu.RLock()
|
||||
defer m.groupsMu.RUnlock()
|
||||
|
||||
group, ok := m.groups[gID]
|
||||
g, ok := m.groups[gID]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("can't find group with id %d", gID)
|
||||
}
|
||||
g := group.ToAPI()
|
||||
for _, r := range g.Rules {
|
||||
if r.Type != rule.TypeAlerting {
|
||||
ar, ok := r.(*rule.AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
alertID := strconv.FormatUint(aID, 10)
|
||||
for _, a := range r.Alerts {
|
||||
if a.ID == alertID {
|
||||
return a, nil
|
||||
}
|
||||
if apiAlert := ar.AlertToAPI(aID); apiAlert != nil {
|
||||
return apiAlert, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)
|
||||
@@ -98,7 +92,7 @@ func (m *manager) close() {
|
||||
m.wg.Wait()
|
||||
}
|
||||
|
||||
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) {
|
||||
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
|
||||
id := g.GetID()
|
||||
g.Init()
|
||||
m.wg.Go(func() {
|
||||
@@ -110,6 +104,7 @@ func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) {
|
||||
})
|
||||
|
||||
m.groups[id] = g
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
|
||||
@@ -118,7 +113,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
for _, cfg := range groupsCfg {
|
||||
for _, r := range cfg.Rules {
|
||||
if rrPresent && arPresent {
|
||||
break
|
||||
continue
|
||||
}
|
||||
if r.Record != "" {
|
||||
rrPresent = true
|
||||
@@ -161,7 +156,10 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
}
|
||||
}
|
||||
for _, ng := range groupsRegistry {
|
||||
m.startGroup(ctx, ng, restore)
|
||||
if err := m.startGroup(ctx, ng, restore); err != nil {
|
||||
m.groupsMu.Unlock()
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.groupsMu.Unlock()
|
||||
|
||||
|
||||
@@ -65,11 +65,13 @@ func TestManagerUpdateConcurrent(t *testing.T) {
|
||||
|
||||
const workers = 500
|
||||
const iterations = 10
|
||||
var wg sync.WaitGroup
|
||||
for n := range workers {
|
||||
wg.Go(func() {
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
go func(n int) {
|
||||
defer wg.Done()
|
||||
r := rand.New(rand.NewSource(int64(n)))
|
||||
for range iterations {
|
||||
for i := 0; i < iterations; i++ {
|
||||
rnd := r.Intn(len(paths))
|
||||
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
|
||||
if err != nil { // update can fail and this is expected
|
||||
@@ -77,7 +79,7 @@ func TestManagerUpdateConcurrent(t *testing.T) {
|
||||
}
|
||||
_ = m.update(context.Background(), cfg, false)
|
||||
}
|
||||
})
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
@@ -259,7 +261,7 @@ func compareGroups(t *testing.T, a, b *rule.Group) {
|
||||
for i, r := range a.Rules {
|
||||
got, want := r, b.Rules[i]
|
||||
if a.CreateID() != b.CreateID() {
|
||||
t.Fatalf("expected to have rule %d; got %d", want.ID(), got.ID())
|
||||
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
|
||||
}
|
||||
if err := rule.CompareRules(t, want, got); err != nil {
|
||||
t.Fatalf("comparison error: %s", err)
|
||||
|
||||
@@ -80,15 +80,14 @@ func (as AlertState) String() string {
|
||||
|
||||
// AlertTplData is used to execute templating
|
||||
type AlertTplData struct {
|
||||
Type string
|
||||
Labels map[string]string
|
||||
Value float64
|
||||
Expr string
|
||||
AlertID uint64
|
||||
GroupID uint64
|
||||
ActiveAt time.Time
|
||||
For time.Duration
|
||||
IsPartial bool
|
||||
Type string
|
||||
Labels map[string]string
|
||||
Value float64
|
||||
Expr string
|
||||
AlertID uint64
|
||||
GroupID uint64
|
||||
ActiveAt time.Time
|
||||
For time.Duration
|
||||
}
|
||||
|
||||
var tplHeaders = []string{
|
||||
@@ -102,7 +101,6 @@ var tplHeaders = []string{
|
||||
"{{ $groupID := .GroupID }}",
|
||||
"{{ $activeAt := .ActiveAt }}",
|
||||
"{{ $for := .For }}",
|
||||
"{{ $isPartial := .IsPartial }}",
|
||||
}
|
||||
|
||||
// ExecTemplate executes the Alert template for given
|
||||
@@ -168,8 +166,8 @@ func templateAnnotations(annotations map[string]string, data AlertTplData, tmpl
|
||||
ctmpl, _ := tmpl.Clone()
|
||||
ctmpl = ctmpl.Option("missingkey=zero")
|
||||
if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil {
|
||||
r[key] = err.Error()
|
||||
eg.Add(fmt.Errorf("(key: %q, value: %q): %w", key, text, err))
|
||||
r[key] = text
|
||||
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
|
||||
continue
|
||||
}
|
||||
r[key] = buf.String()
|
||||
@@ -186,13 +184,13 @@ type tplData struct {
|
||||
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
|
||||
tpl, err := tpl.Parse(text)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing template: %w", err)
|
||||
return fmt.Errorf("error parsing annotation template: %w", err)
|
||||
}
|
||||
if !execute {
|
||||
return nil
|
||||
}
|
||||
if err = tpl.Execute(dst, data); err != nil {
|
||||
return fmt.Errorf("error evaluating template: %w", err)
|
||||
return fmt.Errorf("error evaluating annotation template: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ package notifier
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -14,6 +13,7 @@ import (
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
@@ -86,11 +86,6 @@ func (am *AlertManager) Send(ctx context.Context, alerts []Alert, alertLabels []
|
||||
err := am.send(ctx, alerts, alertLabels, headers)
|
||||
am.metrics.alertsSendDuration.UpdateDuration(startTime)
|
||||
if err != nil {
|
||||
// the context can be cancelled on graceful shutdown
|
||||
// or on group update. So no need to handle the error as usual.
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil
|
||||
}
|
||||
am.metrics.alertsSendErrors.Add(len(alerts))
|
||||
am.lastError = err.Error()
|
||||
} else {
|
||||
@@ -171,6 +166,11 @@ const alertManagerPath = "/api/v2/alerts"
|
||||
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
|
||||
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
|
||||
) (*AlertManager, error) {
|
||||
|
||||
if err := httputil.CheckURL(alertManagerURL); err != nil {
|
||||
return nil, fmt.Errorf("invalid alertmanager URL: %w", err)
|
||||
}
|
||||
|
||||
tls := &promauth.TLSConfig{}
|
||||
if authCfg.TLSConfig != nil {
|
||||
tls = authCfg.TLSConfig
|
||||
@@ -191,7 +191,7 @@ func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg proma
|
||||
}
|
||||
|
||||
aCfg, err := vmalertutil.AuthConfig(
|
||||
vmalertutil.WithBasicAuth(ba.Username, ba.UsernameFile, ba.Password.String(), ba.PasswordFile),
|
||||
vmalertutil.WithBasicAuth(ba.Username, ba.Password.String(), ba.PasswordFile),
|
||||
vmalertutil.WithBearer(authCfg.BearerToken.String(), authCfg.BearerTokenFile),
|
||||
vmalertutil.WithOAuth(oauth.ClientID, oauth.ClientSecret.String(), oauth.ClientSecretFile, oauth.TokenURL, strings.Join(oauth.Scopes, ";"), oauth.EndpointParams),
|
||||
vmalertutil.WithHeaders(strings.Join(authCfg.Headers, "^^")),
|
||||
|
||||
@@ -105,7 +105,7 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn
|
||||
}
|
||||
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
|
||||
for _, err := range errors {
|
||||
logger.Errorf("failed to init notifier for %q: %s", typeK, err)
|
||||
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
}
|
||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||
}
|
||||
@@ -274,7 +274,7 @@ func (cw *configWatcher) updateTargets(key TargetType, targetMts map[string]targ
|
||||
for addr, metadata := range targetMts {
|
||||
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, metadata.alertRelabelConfigs, cfg.Timeout.Duration())
|
||||
if err != nil {
|
||||
logger.Errorf("failed to init %s notifier with addr %q: %s", key, addr, err)
|
||||
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
|
||||
continue
|
||||
}
|
||||
updatedTargets = append(updatedTargets, Target{
|
||||
|
||||
@@ -212,16 +212,18 @@ consul_sd_configs:
|
||||
|
||||
const workers = 500
|
||||
const iterations = 10
|
||||
var wg sync.WaitGroup
|
||||
for n := range workers {
|
||||
wg.Go(func() {
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
go func(n int) {
|
||||
defer wg.Done()
|
||||
r := rand.New(rand.NewSource(int64(n)))
|
||||
for range iterations {
|
||||
for i := 0; i < iterations; i++ {
|
||||
rnd := r.Intn(len(paths))
|
||||
_ = cw.reload(paths[rnd]) // update can fail and this is expected
|
||||
_ = cw.notifiers()
|
||||
}
|
||||
})
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -11,8 +11,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
@@ -36,7 +36,6 @@ var (
|
||||
"For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -notifier.url. "+
|
||||
"Multiple headers must be delimited by '^^': -notifier.headers='header1:value1^^header2:value2,header3:value3'")
|
||||
basicAuthUsername = flagutil.NewArrayString("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
|
||||
basicAuthUsernameFile = flagutil.NewArrayString("notifier.basicAuth.usernameFile", "Optional path to basic auth username file for -notifier.url")
|
||||
basicAuthPassword = flagutil.NewArrayString("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
|
||||
basicAuthPasswordFile = flagutil.NewArrayString("notifier.basicAuth.passwordFile", "Optional path to basic auth password file for -notifier.url")
|
||||
|
||||
@@ -214,7 +213,6 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
|
||||
},
|
||||
BasicAuth: &promauth.BasicAuthConfig{
|
||||
Username: basicAuthUsername.GetOptionalArg(i),
|
||||
UsernameFile: basicAuthUsernameFile.GetOptionalArg(i),
|
||||
Password: promauth.NewSecret(basicAuthPassword.GetOptionalArg(i)),
|
||||
PasswordFile: basicAuthPasswordFile.GetOptionalArg(i),
|
||||
},
|
||||
@@ -231,9 +229,6 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
|
||||
Headers: []string{headers.GetOptionalArg(i)},
|
||||
}
|
||||
|
||||
if err := httputil.CheckURL(addr); err != nil {
|
||||
return nil, fmt.Errorf("invalid notifier.url %q: %w", addr, err)
|
||||
}
|
||||
addr = strings.TrimSuffix(addr, "/")
|
||||
am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i))
|
||||
if err != nil {
|
||||
@@ -271,7 +266,7 @@ func GetTargets() map[TargetType][]Target {
|
||||
if getActiveNotifiers == nil {
|
||||
return nil
|
||||
}
|
||||
targets := make(map[TargetType][]Target)
|
||||
var targets = make(map[TargetType][]Target)
|
||||
// use cached targets from configWatcher instead of getActiveNotifiers for the extra target labels
|
||||
if cw != nil {
|
||||
cw.targetsMu.RLock()
|
||||
@@ -292,7 +287,7 @@ func GetTargets() map[TargetType][]Target {
|
||||
}
|
||||
|
||||
// Send sends alerts to all active notifiers
|
||||
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) chan error {
|
||||
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) *vmalertutil.ErrGroup {
|
||||
alertsToSend := make([]Alert, 0, len(alerts))
|
||||
lblss := make([][]prompb.Label, 0, len(alerts))
|
||||
// apply global relabel config first without modifying original alerts in alerts
|
||||
@@ -305,18 +300,17 @@ func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string
|
||||
lblss = append(lblss, lbls)
|
||||
}
|
||||
|
||||
errGr := new(vmalertutil.ErrGroup)
|
||||
wg := sync.WaitGroup{}
|
||||
activeNotifiers := getActiveNotifiers()
|
||||
errCh := make(chan error, len(activeNotifiers))
|
||||
defer close(errCh)
|
||||
for i := range activeNotifiers {
|
||||
nt := activeNotifiers[i]
|
||||
wg.Go(func() {
|
||||
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
|
||||
errCh <- fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err)
|
||||
errGr.Add(fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err))
|
||||
}
|
||||
})
|
||||
}
|
||||
wg.Wait()
|
||||
return errCh
|
||||
return errGr
|
||||
}
|
||||
|
||||
@@ -55,9 +55,9 @@ func TestInitNegative(t *testing.T) {
|
||||
*blackHole = oldBlackHole
|
||||
}()
|
||||
|
||||
f := func(path string, addr []string, bh bool) {
|
||||
f := func(path, addr string, bh bool) {
|
||||
*configPath = path
|
||||
*addrs = flagutil.ArrayString(addr)
|
||||
*addrs = flagutil.ArrayString{addr}
|
||||
*blackHole = bh
|
||||
if err := Init(nil, ""); err == nil {
|
||||
t.Fatalf("expected to get error; got nil instead")
|
||||
@@ -65,12 +65,9 @@ func TestInitNegative(t *testing.T) {
|
||||
}
|
||||
|
||||
// *configPath, *addrs and *blackhole are mutually exclusive
|
||||
f("/dummy/path", []string{"127.0.0.1"}, false)
|
||||
f("/dummy/path", []string{}, true)
|
||||
f("", []string{"127.0.0.1"}, true)
|
||||
// addr cannot be ""
|
||||
f("", []string{""}, false)
|
||||
f("", []string{"127.0.0.1", ""}, false)
|
||||
f("/dummy/path", "127.0.0.1", false)
|
||||
f("/dummy/path", "", true)
|
||||
f("", "127.0.0.1", true)
|
||||
}
|
||||
|
||||
func TestBlackHole(t *testing.T) {
|
||||
@@ -205,9 +202,7 @@ alert_relabel_configs:
|
||||
},
|
||||
}
|
||||
errG := Send(context.Background(), firingAlerts, nil)
|
||||
for err := range errG {
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error when sending alerts: %s", err)
|
||||
}
|
||||
if errG.Err() != nil {
|
||||
t.Fatalf("unexpected error when sending alerts: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ type Notifier interface {
|
||||
Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, notifierHeaders map[string]string) error
|
||||
// Addr returns address where alerts are sent.
|
||||
Addr() string
|
||||
// LastError returns error, that occurred during last attempt to send data
|
||||
// LastError returns error, that occured during last attempt to send data
|
||||
LastError() string
|
||||
// Close is a destructor for the Notifier
|
||||
Close()
|
||||
|
||||
@@ -28,7 +28,6 @@ var (
|
||||
"Multiple headers must be delimited by '^^': -remoteRead.headers='header1:value1^^header2:value2'")
|
||||
|
||||
basicAuthUsername = flag.String("remoteRead.basicAuth.username", "", "Optional basic auth username for -remoteRead.url")
|
||||
basicAuthUsernameFile = flag.String("remoteRead.basicAuth.usernameFile", "", "Optional path to basic auth username to use for -remoteRead.url")
|
||||
basicAuthPassword = flag.String("remoteRead.basicAuth.password", "", "Optional basic auth password for -remoteRead.url")
|
||||
basicAuthPasswordFile = flag.String("remoteRead.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -remoteRead.url")
|
||||
|
||||
@@ -81,7 +80,7 @@ func Init() (datasource.QuerierBuilder, error) {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -remoteRead.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
|
||||
}
|
||||
authCfg, err := vmalertutil.AuthConfig(
|
||||
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthUsernameFile, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
vmalertutil.WithBearer(*bearerToken, *bearerTokenFile),
|
||||
vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
|
||||
vmalertutil.WithHeaders(*headers))
|
||||
|
||||
@@ -11,23 +11,16 @@ import (
|
||||
"path"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
"github.com/golang/snappy"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -60,11 +53,6 @@ type Client struct {
|
||||
|
||||
wg sync.WaitGroup
|
||||
doneCh chan struct{}
|
||||
|
||||
// Whether to encode the write request with VictoriaMetrics remote write protocol.
|
||||
// It is set to true by default, and will be switched to false if the client
|
||||
// receives specific errors indicating that the remote storage doesn't support VictoriaMetrics remote write protocol.
|
||||
isVMRemoteWrite atomic.Bool
|
||||
}
|
||||
|
||||
// Config is config for remote write client.
|
||||
@@ -124,12 +112,9 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
doneCh: make(chan struct{}),
|
||||
input: make(chan prompb.TimeSeries, cfg.MaxQueueSize),
|
||||
}
|
||||
c.isVMRemoteWrite.Store(true)
|
||||
|
||||
for i := 0; i < cc; i++ {
|
||||
c.wg.Go(func() {
|
||||
c.run(ctx, i)
|
||||
})
|
||||
c.run(ctx)
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
@@ -171,7 +156,8 @@ func (c *Client) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Client) run(ctx context.Context, id int) {
|
||||
func (c *Client) run(ctx context.Context) {
|
||||
ticker := time.NewTicker(c.flushInterval)
|
||||
wr := &prompb.WriteRequest{}
|
||||
shutdown := func() {
|
||||
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
|
||||
@@ -188,72 +174,40 @@ func (c *Client) run(ctx context.Context, id int) {
|
||||
cancel()
|
||||
}
|
||||
|
||||
// add jitter to spread remote write flushes over the flush interval to avoid congestion at the remote write destination
|
||||
h := xxhash.Sum64(bytesutil.ToUnsafeBytes(fmt.Sprintf("%d", id)))
|
||||
randJitter := uint64(float64(c.flushInterval) * (float64(h) / (1 << 64)))
|
||||
timer := time.NewTimer(time.Duration(randJitter))
|
||||
addJitter:
|
||||
for {
|
||||
select {
|
||||
case <-c.doneCh:
|
||||
timer.Stop()
|
||||
shutdown()
|
||||
return
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
shutdown()
|
||||
return
|
||||
case <-timer.C:
|
||||
break addJitter
|
||||
}
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(c.flushInterval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-c.doneCh:
|
||||
shutdown()
|
||||
return
|
||||
case <-ctx.Done():
|
||||
shutdown()
|
||||
return
|
||||
case <-ticker.C:
|
||||
c.flush(ctx, wr)
|
||||
// drain the potential stale tick to avoid small or empty flushes after a slow flush.
|
||||
c.wg.Go(func() {
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-c.doneCh:
|
||||
shutdown()
|
||||
return
|
||||
case <-ctx.Done():
|
||||
shutdown()
|
||||
return
|
||||
case <-ticker.C:
|
||||
default:
|
||||
}
|
||||
case ts, ok := <-c.input:
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
wr.Timeseries = append(wr.Timeseries, ts)
|
||||
if len(wr.Timeseries) >= c.maxBatchSize {
|
||||
c.flush(ctx, wr)
|
||||
case ts, ok := <-c.input:
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
wr.Timeseries = append(wr.Timeseries, ts)
|
||||
if len(wr.Timeseries) >= c.maxBatchSize {
|
||||
c.flush(ctx, wr)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
var (
|
||||
rwErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
rwTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
|
||||
|
||||
// sentRows and sentBytes are historical counters that can now be replaced by flushedRows and flushedBytes histograms. They may be deprecated in the future after the new histograms have been adopted for some time.
|
||||
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
|
||||
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
|
||||
flushedRows = metrics.NewHistogram(`vmalert_remotewrite_sent_rows`)
|
||||
flushedBytes = metrics.NewHistogram(`vmalert_remotewrite_sent_bytes`)
|
||||
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
|
||||
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
|
||||
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
|
||||
remoteWriteQueueSize = metrics.NewHistogram(`vmalert_remotewrite_queue_size`)
|
||||
|
||||
_ = metrics.NewGauge(`vmalert_remotewrite_queue_capacity`, func() float64 {
|
||||
return float64(*maxQueueSize)
|
||||
})
|
||||
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
|
||||
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
|
||||
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
|
||||
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
|
||||
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
|
||||
|
||||
_ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 {
|
||||
return float64(*concurrency)
|
||||
@@ -267,45 +221,34 @@ func GetDroppedRows() int { return int(droppedRows.Get()) }
|
||||
// it to remote-write endpoint. Flush performs limited amount of retries
|
||||
// if request fails.
|
||||
func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
|
||||
remoteWriteQueueSize.Update(float64(len(c.input)))
|
||||
if len(wr.Timeseries) < 1 {
|
||||
return
|
||||
}
|
||||
defer wr.Reset()
|
||||
defer bufferFlushDuration.UpdateDuration(time.Now())
|
||||
|
||||
bb := writeRequestBufPool.Get()
|
||||
bb.B = wr.MarshalProtobuf(bb.B[:0])
|
||||
zb := compressBufPool.Get()
|
||||
defer compressBufPool.Put(zb)
|
||||
if c.isVMRemoteWrite.Load() {
|
||||
zb.B = zstd.CompressLevel(zb.B[:0], bb.B, 0)
|
||||
} else {
|
||||
zb.B = snappy.Encode(zb.B[:cap(zb.B)], bb.B)
|
||||
}
|
||||
writeRequestBufPool.Put(bb)
|
||||
data := wr.MarshalProtobuf(nil)
|
||||
b := snappy.Encode(nil, data)
|
||||
|
||||
maxRetryInterval := *retryMaxTime
|
||||
bt := timeutil.NewBackoffTimer(*retryMinInterval, maxRetryInterval)
|
||||
retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
|
||||
if retryInterval > maxRetryInterval {
|
||||
retryInterval = maxRetryInterval
|
||||
}
|
||||
timeStart := time.Now()
|
||||
defer func() {
|
||||
sendDuration.Add(time.Since(timeStart).Seconds())
|
||||
}()
|
||||
|
||||
attempts := 0
|
||||
L:
|
||||
for {
|
||||
err := c.send(ctx, zb.B)
|
||||
for attempts := 0; ; attempts++ {
|
||||
err := c.send(ctx, b)
|
||||
if err != nil && (errors.Is(err, io.EOF) || netutil.IsTrivialNetworkError(err)) {
|
||||
// Something in the middle between client and destination might be closing
|
||||
// the connection. So we do a one more attempt in hope request will succeed.
|
||||
err = c.send(ctx, zb.B)
|
||||
err = c.send(ctx, b)
|
||||
}
|
||||
if err == nil {
|
||||
sentRows.Add(len(wr.Timeseries))
|
||||
sentBytes.Add(len(zb.B))
|
||||
flushedRows.Update(float64(len(wr.Timeseries)))
|
||||
flushedBytes.Update(float64(len(zb.B)))
|
||||
sentBytes.Add(len(b))
|
||||
return
|
||||
}
|
||||
|
||||
@@ -331,13 +274,13 @@ L:
|
||||
break
|
||||
}
|
||||
|
||||
if bt.CurrentDelay() > timeLeftForRetries {
|
||||
bt.SetDelay(timeLeftForRetries)
|
||||
if retryInterval > timeLeftForRetries {
|
||||
retryInterval = timeLeftForRetries
|
||||
}
|
||||
// sleeping to prevent remote db hammering
|
||||
bt.Wait(ctx.Done())
|
||||
time.Sleep(retryInterval)
|
||||
retryInterval *= 2
|
||||
|
||||
attempts++
|
||||
}
|
||||
|
||||
rwErrors.Inc()
|
||||
@@ -357,16 +300,12 @@ func (c *Client) send(ctx context.Context, data []byte) error {
|
||||
return fmt.Errorf("failed to create new HTTP request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", "vmalert")
|
||||
// RFC standard compliant headers
|
||||
req.Header.Set("Content-Encoding", "snappy")
|
||||
req.Header.Set("Content-Type", "application/x-protobuf")
|
||||
|
||||
if encoding.IsZstd(data) {
|
||||
req.Header.Set("Content-Encoding", "zstd")
|
||||
req.Header.Set("X-VictoriaMetrics-Remote-Write-Version", "1")
|
||||
} else {
|
||||
req.Header.Set("Content-Encoding", "snappy")
|
||||
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
}
|
||||
// Prometheus compliant headers
|
||||
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
|
||||
if c.authCfg != nil {
|
||||
err = c.authCfg.SetHeaders(req, true)
|
||||
@@ -395,29 +334,6 @@ func (c *Client) send(ctx context.Context, data []byte) error {
|
||||
// respond with HTTP 2xx status code when write is successful.
|
||||
return nil
|
||||
case 4:
|
||||
// - Remote Write v1 specification implicitly expects a `400 Bad Request` when the encoding is not supported.
|
||||
// - Remote Write v2 specification explicitly specifies a `415 Unsupported Media Type` for unsupported encodings.
|
||||
// - Real-world implementations of v1 use both 400 and 415 status codes.
|
||||
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
|
||||
if resp.StatusCode == http.StatusUnsupportedMediaType || resp.StatusCode == http.StatusBadRequest {
|
||||
if encoding.IsZstd(data) {
|
||||
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", req.URL.Redacted())
|
||||
zstdBlockLen := len(data)
|
||||
data, err = repackBlockFromZstdToSnappy(data)
|
||||
if err == nil {
|
||||
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", req.URL.Redacted())
|
||||
c.isVMRemoteWrite.Store(false)
|
||||
return c.send(ctx, data)
|
||||
}
|
||||
|
||||
logger.Warnf("failed to repack zstd block (%d bytes) to snappy: %s; The block will be rejected. "+
|
||||
"Possible cause: ungraceful shutdown leading to persisted queue corruption.",
|
||||
zstdBlockLen, err)
|
||||
}
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusTooManyRequests {
|
||||
// MUST NOT retry write requests on HTTP 4xx responses other than 429
|
||||
return &nonRetriableError{
|
||||
@@ -438,19 +354,3 @@ type nonRetriableError struct {
|
||||
func (e *nonRetriableError) Error() string {
|
||||
return e.err.Error()
|
||||
}
|
||||
|
||||
var (
|
||||
writeRequestBufPool bytesutil.ByteBufferPool
|
||||
compressBufPool bytesutil.ByteBufferPool
|
||||
)
|
||||
|
||||
// repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block.
|
||||
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
|
||||
plainBlock := make([]byte, 0, len(zstdBlock)*2)
|
||||
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return snappy.Encode(nil, plainBlock), nil
|
||||
}
|
||||
|
||||
@@ -12,7 +12,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/golang/snappy"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
@@ -43,7 +44,7 @@ func TestClient_Push(t *testing.T) {
|
||||
|
||||
r := rand.New(rand.NewSource(1))
|
||||
const rowsN = int(1e4)
|
||||
for range rowsN {
|
||||
for i := 0; i < rowsN; i++ {
|
||||
s := prompb.TimeSeries{
|
||||
Samples: []prompb.Sample{{
|
||||
Value: r.Float64(),
|
||||
@@ -101,11 +102,8 @@ func TestClient_run_maxBatchSizeDuringShutdown(t *testing.T) {
|
||||
}
|
||||
|
||||
// push time series to the client.
|
||||
for range pushCnt {
|
||||
if err = rwClient.Push(prompb.TimeSeries{
|
||||
Labels: []prompb.Label{{Name: "__name__", Value: "m"}},
|
||||
Samples: []prompb.Sample{{Value: 1, Timestamp: 1000}},
|
||||
}); err != nil {
|
||||
for i := 0; i < pushCnt; i++ {
|
||||
if err = rwClient.Push(prompb.TimeSeries{}); err != nil {
|
||||
t.Fatalf("cannot time series to the client: %s", err)
|
||||
}
|
||||
}
|
||||
@@ -158,8 +156,8 @@ func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
h := r.Header.Get("Content-Encoding")
|
||||
if h != "zstd" {
|
||||
rw.err(w, fmt.Errorf("header read error: Content-Encoding is not zstd (%q)", h))
|
||||
if h != "snappy" {
|
||||
rw.err(w, fmt.Errorf("header read error: Content-Encoding is not snappy (%q)", h))
|
||||
}
|
||||
|
||||
h = r.Header.Get("Content-Type")
|
||||
@@ -167,9 +165,9 @@ func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
|
||||
rw.err(w, fmt.Errorf("header read error: Content-Type is not x-protobuf (%q)", h))
|
||||
}
|
||||
|
||||
h = r.Header.Get("X-VictoriaMetrics-Remote-Write-Version")
|
||||
if h != "1" {
|
||||
rw.err(w, fmt.Errorf("header read error: X-VictoriaMetrics-Remote-Write-Version is not 1 (%q)", h))
|
||||
h = r.Header.Get("X-Prometheus-Remote-Write-Version")
|
||||
if h != "0.1.0" {
|
||||
rw.err(w, fmt.Errorf("header read error: X-Prometheus-Remote-Write-Version is not 0.1.0 (%q)", h))
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(r.Body)
|
||||
@@ -179,7 +177,7 @@ func (rw *rwServer) handler(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
defer func() { _ = r.Body.Close() }()
|
||||
|
||||
b, err := zstd.Decompress(nil, data)
|
||||
b, err := snappy.Decode(nil, data)
|
||||
if err != nil {
|
||||
rw.err(w, fmt.Errorf("decode err: %w", err))
|
||||
return
|
||||
|
||||
@@ -9,7 +9,8 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/golang/snappy"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
@@ -63,17 +64,19 @@ func (c *DebugClient) Close() error {
|
||||
}
|
||||
|
||||
func (c *DebugClient) send(data []byte) error {
|
||||
b := zstd.CompressLevel(nil, data, 0)
|
||||
b := snappy.Encode(nil, data)
|
||||
r := bytes.NewReader(b)
|
||||
req, err := http.NewRequest(http.MethodPost, c.addr, r)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create new HTTP request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Encoding", "zstd")
|
||||
// RFC standard compliant headers
|
||||
req.Header.Set("Content-Encoding", "snappy")
|
||||
req.Header.Set("Content-Type", "application/x-protobuf")
|
||||
|
||||
req.Header.Set("X-VictoriaMetrics-Remote-Write-Version", "1")
|
||||
// Prometheus compliant headers
|
||||
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
|
||||
if !*disablePathAppend {
|
||||
req.URL.Path = path.Join(req.URL.Path, "/api/v1/write")
|
||||
|
||||
@@ -22,7 +22,7 @@ func TestDebugClient_Push(t *testing.T) {
|
||||
|
||||
const rowsN = 100
|
||||
var sent int
|
||||
for i := range rowsN {
|
||||
for i := 0; i < rowsN; i++ {
|
||||
s := prompb.TimeSeries{
|
||||
Samples: []prompb.Sample{{
|
||||
Value: float64(i),
|
||||
|
||||
@@ -13,8 +13,8 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("remoteWrite.url", "", "Optional URL to persist alerts state and recording rules results in form of timeseries. "+
|
||||
"It must support either VictoriaMetrics remote write protocol or Prometheus remote_write protocol. "+
|
||||
addr = flag.String("remoteWrite.url", "", "Optional URL to VictoriaMetrics or vminsert where to persist alerts state "+
|
||||
"and recording rules results in form of timeseries. "+
|
||||
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
|
||||
"For example, if -remoteWrite.url=http://127.0.0.1:8428 is specified, "+
|
||||
"then the alerts state will be written to http://127.0.0.1:8428/api/v1/write . See also -remoteWrite.disablePathAppend, '-remoteWrite.showURL'.")
|
||||
@@ -26,7 +26,6 @@ var (
|
||||
"Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'")
|
||||
|
||||
basicAuthUsername = flag.String("remoteWrite.basicAuth.username", "", "Optional basic auth username for -remoteWrite.url")
|
||||
basicAuthUsernameFile = flag.String("remoteWrite.basicAuth.usernameFile", "", "Optional path to basic auth username to use for -remoteWrite.url")
|
||||
basicAuthPassword = flag.String("remoteWrite.basicAuth.password", "", "Optional basic auth password for -remoteWrite.url")
|
||||
basicAuthPasswordFile = flag.String("remoteWrite.basicAuth.passwordFile", "", "Optional path to basic auth password to use for -remoteWrite.url")
|
||||
|
||||
@@ -84,7 +83,7 @@ func Init(ctx context.Context) (*Client, error) {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", *oauth2EndpointParams, err)
|
||||
}
|
||||
authCfg, err := vmalertutil.AuthConfig(
|
||||
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthUsernameFile, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
vmalertutil.WithBasicAuth(*basicAuthUsername, *basicAuthPassword, *basicAuthPasswordFile),
|
||||
vmalertutil.WithBearer(*bearerToken, *bearerTokenFile),
|
||||
vmalertutil.WithOAuth(*oauth2ClientID, *oauth2ClientSecret, *oauth2ClientSecretFile, *oauth2TokenURL, *oauth2Scopes, endpointParams),
|
||||
vmalertutil.WithHeaders(*headers))
|
||||
|
||||
@@ -2,7 +2,6 @@ package rule
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"math"
|
||||
@@ -247,6 +246,16 @@ func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
|
||||
return alerts
|
||||
}
|
||||
|
||||
// GetAlert returns alert if id exists
|
||||
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
|
||||
ar.alertsMu.RLock()
|
||||
defer ar.alertsMu.RUnlock()
|
||||
if ar.alerts == nil {
|
||||
return nil
|
||||
}
|
||||
return ar.alerts[id]
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) {
|
||||
if !ar.Debug {
|
||||
return
|
||||
@@ -312,13 +321,6 @@ type labelSet struct {
|
||||
// On k conflicts in origin set, the original value is preferred and copied
|
||||
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
|
||||
func (ls *labelSet) add(k, v string) {
|
||||
// do not add label with empty value to the result, as it has no meaning:
|
||||
// if the label already exists in the original query result, remove it to preserve compatibility with relabeling, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10766.
|
||||
// otherwise, ignore the label, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984.
|
||||
if v == "" {
|
||||
delete(ls.processed, k)
|
||||
return
|
||||
}
|
||||
ls.processed[k] = v
|
||||
ov, ok := ls.origin[k]
|
||||
if !ok {
|
||||
@@ -348,13 +350,14 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
||||
ls.processed[l.Name] = l.Value
|
||||
}
|
||||
|
||||
// labels only support limited templating variables,
|
||||
// including `labels`, `value` and `expr`, to avoid breaking alert states or causing cardinality issue with results
|
||||
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
|
||||
Labels: ls.origin,
|
||||
Value: m.Values[0],
|
||||
Expr: ar.Expr,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %w", err)
|
||||
}
|
||||
for k, v := range extraLabels {
|
||||
ls.add(k, v)
|
||||
}
|
||||
@@ -365,7 +368,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
||||
if !*disableAlertGroupLabel && ar.GroupName != "" {
|
||||
ls.add(alertGroupNameLabel, ar.GroupName)
|
||||
}
|
||||
return ls, err
|
||||
return ls, nil
|
||||
}
|
||||
|
||||
// execRange executes alerting rule on the given time range similarly to exec.
|
||||
@@ -391,7 +394,11 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
||||
return nil, err
|
||||
}
|
||||
alertID := hash(ls.processed)
|
||||
a := ar.newAlert(s, time.Time{}, ls.processed, nil) // initial alert
|
||||
as, err := ar.expandAnnotationTemplates(s, qFn, time.Time{}, ls)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
a := ar.newAlert(s, time.Time{}, ls.processed, as) // initial alert
|
||||
|
||||
prevT := time.Time{}
|
||||
for i := range s.Values {
|
||||
@@ -407,6 +414,8 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
||||
// reset to Pending if there are gaps > EvalInterval between DPs
|
||||
a.State = notifier.StatePending
|
||||
a.ActiveAt = at
|
||||
// re-template the annotations as active timestamp is changed
|
||||
a.Annotations, _ = ar.expandAnnotationTemplates(s, qFn, at, ls)
|
||||
a.Start = time.Time{}
|
||||
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
|
||||
a.State = notifier.StateFiring
|
||||
@@ -452,7 +461,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
|
||||
defer func() {
|
||||
ar.state.add(curState)
|
||||
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
|
||||
if curState.Err != nil {
|
||||
ar.metrics.errors.Inc()
|
||||
}
|
||||
}()
|
||||
@@ -461,8 +470,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
||||
}
|
||||
|
||||
isPartial := isPartialResponse(res)
|
||||
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartial)
|
||||
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartialResponse(res))
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
res, _, err := ar.q.Query(ctx, query, ts)
|
||||
return res.Data, err
|
||||
@@ -476,9 +484,8 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
for i, m := range res.Data {
|
||||
ls, err := ar.expandLabelTemplates(m, qFn)
|
||||
if err != nil {
|
||||
// only set error in current state, but do not break alert processing
|
||||
curState.Err = err
|
||||
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
|
||||
return nil, curState.Err
|
||||
}
|
||||
at := ts
|
||||
alertID := hash(ls.processed)
|
||||
@@ -488,11 +495,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
at = a.ActiveAt
|
||||
}
|
||||
}
|
||||
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls, isPartial)
|
||||
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls)
|
||||
if err != nil {
|
||||
// only set error in current state, but do not break alert processing
|
||||
curState.Err = err
|
||||
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
|
||||
return nil, curState.Err
|
||||
}
|
||||
expandedLabels[i] = ls
|
||||
expandedAnnotations[i] = as
|
||||
@@ -601,26 +607,25 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
|
||||
ls, err := ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
return ls, fmt.Errorf("failed to expand label templates: %s", err)
|
||||
return nil, fmt.Errorf("failed to expand label templates: %s", err)
|
||||
}
|
||||
return ls, nil
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet, isPartial bool) (map[string]string, error) {
|
||||
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet) (map[string]string, error) {
|
||||
tplData := notifier.AlertTplData{
|
||||
Value: m.Values[0],
|
||||
Type: ar.Type.String(),
|
||||
Labels: ls.origin,
|
||||
Expr: ar.Expr,
|
||||
AlertID: hash(ls.processed),
|
||||
GroupID: ar.GroupID,
|
||||
ActiveAt: activeAt,
|
||||
For: ar.For,
|
||||
IsPartial: isPartial,
|
||||
Value: m.Values[0],
|
||||
Type: ar.Type.String(),
|
||||
Labels: ls.origin,
|
||||
Expr: ar.Expr,
|
||||
AlertID: hash(ls.processed),
|
||||
GroupID: ar.GroupID,
|
||||
ActiveAt: activeAt,
|
||||
For: ar.For,
|
||||
}
|
||||
as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData)
|
||||
if err != nil {
|
||||
return as, fmt.Errorf("failed to expand annotation templates: %s", err)
|
||||
return nil, fmt.Errorf("failed to expand annotation templates: %s", err)
|
||||
}
|
||||
return as, nil
|
||||
}
|
||||
@@ -820,9 +825,7 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
|
||||
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
|
||||
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
|
||||
|
||||
// query ALERTS_FOR_STATE at `ts-1s` instead `ts` to avoid retrieving data written in the current run,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10335
|
||||
res, _, err := q.Query(ctx, expr, ts.Add(-1*time.Second))
|
||||
res, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
|
||||
}
|
||||
|
||||
@@ -1,106 +0,0 @@
|
||||
//go:build synctest
|
||||
|
||||
package rule
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
)
|
||||
|
||||
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
|
||||
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
|
||||
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
|
||||
// wrap into synctest because of time manipulations
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
|
||||
ar := &AlertingRule{
|
||||
Name: "TestActiveAtPreservation",
|
||||
Labels: map[string]string{
|
||||
"test_query_in_label": `{{ "static_value" }}`,
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"description": "Alert active since {{ $activeAt }}",
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
q: fq,
|
||||
state: &ruleState{
|
||||
entries: make([]StateEntry, 10),
|
||||
},
|
||||
}
|
||||
|
||||
// Mock query result - return empty result to make suppress_for_mass_alert = false
|
||||
// (no need to add anything to fq for empty result)
|
||||
|
||||
// Add a metric that should trigger the alert
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
|
||||
|
||||
// First execution - creates new alert
|
||||
ts1 := time.Now()
|
||||
_, err := ar.exec(context.TODO(), ts1, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on first exec: %s", err)
|
||||
}
|
||||
|
||||
if len(ar.alerts) != 1 {
|
||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
||||
}
|
||||
|
||||
firstAlert := ar.GetAlerts()[0]
|
||||
// Verify first execution: activeAt should be ts1 and annotation should reflect it
|
||||
if !firstAlert.ActiveAt.Equal(ts1) {
|
||||
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
|
||||
}
|
||||
|
||||
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
|
||||
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
|
||||
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
|
||||
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Second execution - should preserve activeAt in annotation
|
||||
|
||||
// Ensure different timestamp with different seconds
|
||||
// sleep is non-blocking thanks to synctest
|
||||
time.Sleep(2 * time.Second)
|
||||
ts2 := time.Now()
|
||||
_, err = ar.exec(context.TODO(), ts2, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on second exec: %s", err)
|
||||
}
|
||||
|
||||
// Get the alert again (should be the same alert)
|
||||
if len(ar.alerts) != 1 {
|
||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
||||
}
|
||||
secondAlert := ar.GetAlerts()[0]
|
||||
|
||||
// Critical test: activeAt should still be ts1, not ts2
|
||||
if !secondAlert.ActiveAt.Equal(ts1) {
|
||||
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
|
||||
}
|
||||
|
||||
// Critical test: annotation should still contain ts1 time, not ts2
|
||||
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
|
||||
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Additional verification: annotation should NOT contain ts2 time
|
||||
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
|
||||
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
|
||||
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Verify query template in labels still works (this would fail if query templates were broken)
|
||||
if firstAlert.Labels["test_query_in_label"] != "static_value" {
|
||||
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -663,7 +664,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
|
||||
Name: "for-pending",
|
||||
Type: config.NewPrometheusType().String(),
|
||||
Labels: map[string]string{"alertname": "for-pending"},
|
||||
Annotations: map[string]string{},
|
||||
Annotations: map[string]string{"activeAt": "5000"},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
@@ -683,7 +684,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
|
||||
Name: "for-firing",
|
||||
Type: config.NewPrometheusType().String(),
|
||||
Labels: map[string]string{"alertname": "for-firing"},
|
||||
Annotations: map[string]string{},
|
||||
Annotations: map[string]string{"activeAt": "1000"},
|
||||
State: notifier.StateFiring,
|
||||
ActiveAt: time.Unix(1, 0),
|
||||
Start: time.Unix(5, 0),
|
||||
@@ -704,7 +705,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
|
||||
Name: "for-hold-pending",
|
||||
Type: config.NewPrometheusType().String(),
|
||||
Labels: map[string]string{"alertname": "for-hold-pending"},
|
||||
Annotations: map[string]string{},
|
||||
Annotations: map[string]string{"activeAt": "5000"},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
@@ -1119,7 +1120,7 @@ func TestAlertingRuleLimit_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAlertingRule_Template(t *testing.T) {
|
||||
f := func(rule *AlertingRule, metrics []datasource.Metric, isResponsePartial bool, alertsExpected map[uint64]*notifier.Alert) {
|
||||
f := func(rule *AlertingRule, metrics []datasource.Metric, alertsExpected map[uint64]*notifier.Alert) {
|
||||
t.Helper()
|
||||
|
||||
fakeGroup := Group{
|
||||
@@ -1132,7 +1133,6 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
entries: make([]StateEntry, 10),
|
||||
}
|
||||
fq.Add(metrics...)
|
||||
fq.SetPartialResponse(isResponsePartial)
|
||||
|
||||
if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
@@ -1163,7 +1163,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
}, []datasource.Metric{
|
||||
metricWithValueAndLabels(t, 1, "instance", "foo"),
|
||||
metricWithValueAndLabels(t, 1, "instance", "bar"),
|
||||
}, false, map[uint64]*notifier.Alert{
|
||||
}, map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
|
||||
Annotations: map[string]string{
|
||||
"summary": `common: Too high connection number for "foo"`,
|
||||
@@ -1192,14 +1192,14 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"instance": "{{ $labels.instance }}",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}".{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
|
||||
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}"`,
|
||||
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
}, []datasource.Metric{
|
||||
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
|
||||
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
|
||||
}, false, map[uint64]*notifier.Alert{
|
||||
}, map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "override label",
|
||||
@@ -1207,7 +1207,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"instance": "foo",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `first: Too high connection number for "foo".`,
|
||||
"summary": `first: Too high connection number for "foo"`,
|
||||
"description": `override: It is 2 connections for "foo"`,
|
||||
},
|
||||
},
|
||||
@@ -1218,7 +1218,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"instance": "bar",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `second: Too high connection number for "bar".`,
|
||||
"summary": `second: Too high connection number for "bar"`,
|
||||
"description": `override: It is 10 connections for "bar"`,
|
||||
},
|
||||
},
|
||||
@@ -1231,7 +1231,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"instance": "{{ $labels.instance }}",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}.{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
|
||||
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
}, []datasource.Metric{
|
||||
@@ -1239,7 +1239,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
alertNameLabel, "originAlertname",
|
||||
alertGroupNameLabel, "originGroupname",
|
||||
"instance", "foo"),
|
||||
}, true, map[uint64]*notifier.Alert{
|
||||
}, map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{
|
||||
alertNameLabel: "OriginLabels",
|
||||
"exported_alertname": "originAlertname",
|
||||
@@ -1255,7 +1255,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"instance": "foo",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `Alert "originAlertname(originGroupname)" for instance foo. WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.`,
|
||||
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -1363,7 +1363,6 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
||||
{Name: "instance", Value: "0.0.0.0:8800"},
|
||||
{Name: "group", Value: "vmalert"},
|
||||
{Name: "alertname", Value: "ConfigurationReloadFailure"},
|
||||
{Name: "pod", Value: "vmalert-0"},
|
||||
},
|
||||
Values: []float64{1},
|
||||
Timestamps: []int64{time.Now().UnixNano()},
|
||||
@@ -1371,11 +1370,8 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
||||
|
||||
ar := &AlertingRule{
|
||||
Labels: map[string]string{
|
||||
"instance": "override", // this should override instance with new value
|
||||
"group": "vmalert", // this shouldn't have effect since value in metric is equal
|
||||
"invalid_label": "{{ .Values.mustRuntimeFail }}",
|
||||
"empty_label": "", // this should be dropped
|
||||
"pod": "", // this should remove the pod label from query result
|
||||
"instance": "override", // this should override instance with new value
|
||||
"group": "vmalert", // this shouldn't have effect since value in metric is equal
|
||||
},
|
||||
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
|
||||
Name: "AlertingRulesError",
|
||||
@@ -1383,12 +1379,10 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
||||
}
|
||||
|
||||
expectedOriginLabels := map[string]string{
|
||||
"instance": "0.0.0.0:8800",
|
||||
"group": "vmalert",
|
||||
"alertname": "ConfigurationReloadFailure",
|
||||
"alertgroup": "vmalert",
|
||||
"pod": "vmalert-0",
|
||||
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
|
||||
"instance": "0.0.0.0:8800",
|
||||
"group": "vmalert",
|
||||
"alertname": "ConfigurationReloadFailure",
|
||||
"alertgroup": "vmalert",
|
||||
}
|
||||
|
||||
expectedProcessedLabels := map[string]string{
|
||||
@@ -1398,12 +1392,11 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
||||
"exported_alertname": "ConfigurationReloadFailure",
|
||||
"group": "vmalert",
|
||||
"alertgroup": "vmalert",
|
||||
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
|
||||
}
|
||||
|
||||
ls, err := ar.toLabels(metric, nil)
|
||||
if err == nil || !strings.Contains(err.Error(), "error evaluating template") {
|
||||
t.Fatalf("unexpected error %q", err.Error())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
|
||||
@@ -1481,3 +1474,95 @@ func TestAlertingRule_QueryTemplateInLabels(t *testing.T) {
|
||||
t.Fatalf("expected 'suppress_for_mass_alert' label to be 'true' or 'false', got '%s'", suppressLabel)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
|
||||
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
|
||||
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
|
||||
// wrap into synctest because of time manipulations
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
|
||||
ar := &AlertingRule{
|
||||
Name: "TestActiveAtPreservation",
|
||||
Labels: map[string]string{
|
||||
"test_query_in_label": `{{ "static_value" }}`,
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"description": "Alert active since {{ $activeAt }}",
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
q: fq,
|
||||
state: &ruleState{
|
||||
entries: make([]StateEntry, 10),
|
||||
},
|
||||
}
|
||||
|
||||
// Mock query result - return empty result to make suppress_for_mass_alert = false
|
||||
// (no need to add anything to fq for empty result)
|
||||
|
||||
// Add a metric that should trigger the alert
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
|
||||
|
||||
// First execution - creates new alert
|
||||
ts1 := time.Now()
|
||||
_, err := ar.exec(context.TODO(), ts1, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on first exec: %s", err)
|
||||
}
|
||||
|
||||
if len(ar.alerts) != 1 {
|
||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
||||
}
|
||||
|
||||
firstAlert := ar.GetAlerts()[0]
|
||||
// Verify first execution: activeAt should be ts1 and annotation should reflect it
|
||||
if !firstAlert.ActiveAt.Equal(ts1) {
|
||||
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
|
||||
}
|
||||
|
||||
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
|
||||
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
|
||||
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
|
||||
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Second execution - should preserve activeAt in annotation
|
||||
|
||||
// Ensure different timestamp with different seconds
|
||||
// sleep is non-blocking thanks to synctest
|
||||
time.Sleep(2 * time.Second)
|
||||
ts2 := time.Now()
|
||||
_, err = ar.exec(context.TODO(), ts2, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on second exec: %s", err)
|
||||
}
|
||||
|
||||
// Get the alert again (should be the same alert)
|
||||
if len(ar.alerts) != 1 {
|
||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
||||
}
|
||||
secondAlert := ar.GetAlerts()[0]
|
||||
|
||||
// Critical test: activeAt should still be ts1, not ts2
|
||||
if !secondAlert.ActiveAt.Equal(ts1) {
|
||||
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
|
||||
}
|
||||
|
||||
// Critical test: annotation should still contain ts1 time, not ts2
|
||||
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
|
||||
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Additional verification: annotation should NOT contain ts2 time
|
||||
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
|
||||
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
|
||||
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Verify query template in labels still works (this would fail if query templates were broken)
|
||||
if firstAlert.Labels["test_query_in_label"] != "static_value" {
|
||||
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -6,9 +6,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"maps"
|
||||
"net/url"
|
||||
"path"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -20,7 +18,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
@@ -32,8 +29,8 @@ var (
|
||||
"0 means no limit.")
|
||||
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier.")
|
||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
|
||||
maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maxiMum duration for automatic alert expiration, "+
|
||||
"which by default is 4 times evaluationInterval of the parent group")
|
||||
evalDelay = flag.Duration("rule.evalDelay", 30*time.Second, "Adjustment of the 'time' parameter for rule evaluation requests to compensate intentional data delay from the datasource. "+
|
||||
"Normally, should be equal to '-search.latencyOffset' (cmd-line flag configured for VictoriaMetrics single-node or vmselect). "+
|
||||
@@ -43,9 +40,6 @@ var (
|
||||
"For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
maxStartDelay = flag.Duration("group.maxStartDelay", 5*time.Minute, "Defines the max delay before starting the group evaluation. Group's start is artificially delayed for random duration on interval"+
|
||||
" [0..min(--group.maxStartDelay, group.interval)]. This helps smoothing out the load on the configured datasource, so evaluations aren't executed too close to each other.")
|
||||
ruleStripFilePath = flag.Bool("rule.stripFilePath", false, "Whether to strip rule file paths in logs and all API responses, including /metrics. "+
|
||||
"For example, file path '/path/to/tenant_id/rules.yml' will be stripped to 'groupHashID/rules.yml'. "+
|
||||
"This flag may be useful for hiding sensitive information in file paths, such as S3 bucket details.")
|
||||
)
|
||||
|
||||
// Group is an entity for grouping rules
|
||||
@@ -102,7 +96,9 @@ type groupMetrics struct {
|
||||
// set2 has priority over set1.
|
||||
func mergeLabels(groupName, ruleName string, set1, set2 map[string]string) map[string]string {
|
||||
r := map[string]string{}
|
||||
maps.Copy(r, set1)
|
||||
for k, v := range set1 {
|
||||
r[k] = v
|
||||
}
|
||||
for k, v := range set2 {
|
||||
if prevV, ok := r[k]; ok {
|
||||
logger.Infof("label %q=%q for rule %q.%q overwritten with external label %q=%q",
|
||||
@@ -151,12 +147,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
||||
g.EvalDelay = &cfg.EvalDelay.D
|
||||
}
|
||||
g.id = g.CreateID()
|
||||
// strip file path from group.File after generated group ID when ruleStripFilePath is set,
|
||||
// so it won't be exposed in logs and api responses
|
||||
if *ruleStripFilePath {
|
||||
_, filename := path.Split(g.File)
|
||||
g.File = fmt.Sprintf("%d/%s", g.id, filename)
|
||||
}
|
||||
for _, h := range cfg.Headers {
|
||||
g.Headers[h.Key] = h.Value
|
||||
}
|
||||
@@ -384,17 +374,15 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
|
||||
g.infof("started")
|
||||
|
||||
eval := func(ctx context.Context, ts time.Time) time.Time {
|
||||
eval := func(ctx context.Context, ts time.Time) {
|
||||
g.metrics.iterationTotal.Inc()
|
||||
|
||||
start := time.Now()
|
||||
|
||||
if len(g.Rules) < 1 {
|
||||
g.metrics.iterationDuration.UpdateDuration(start)
|
||||
g.mu.Lock()
|
||||
g.LastEvaluation = start
|
||||
g.mu.Unlock()
|
||||
return ts
|
||||
return
|
||||
}
|
||||
|
||||
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
|
||||
@@ -407,10 +395,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
}
|
||||
}
|
||||
g.metrics.iterationDuration.UpdateDuration(start)
|
||||
g.mu.Lock()
|
||||
g.LastEvaluation = start
|
||||
g.mu.Unlock()
|
||||
return ts
|
||||
}
|
||||
|
||||
evalCtx, cancel := context.WithCancel(ctx)
|
||||
@@ -419,18 +404,15 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
g.mu.Unlock()
|
||||
defer g.evalCancel()
|
||||
|
||||
// start the interval ticker before the first evaluation,
|
||||
// so that the evaluation timestamps of groups with the `eval_offset` option are also aligned,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10773
|
||||
eval(evalCtx, evalTS)
|
||||
|
||||
t := time.NewTicker(g.Interval)
|
||||
defer t.Stop()
|
||||
|
||||
realEvalTS := eval(evalCtx, evalTS)
|
||||
|
||||
// restore the rules state after the first evaluation
|
||||
// so only active alerts can be restored.
|
||||
if rr != nil {
|
||||
err := g.restore(ctx, rr, realEvalTS, *remoteReadLookBack)
|
||||
err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
|
||||
if err != nil {
|
||||
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
|
||||
}
|
||||
@@ -501,15 +483,8 @@ func (g *Group) UpdateWith(newGroup *Group) {
|
||||
// delayBeforeStart calculates delay based on Group ID, so all groups will start at different moments of time.
|
||||
func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Duration {
|
||||
if g.EvalOffset != nil {
|
||||
offset := *g.EvalOffset
|
||||
// adjust the offset for negative evalOffset, the rule is:
|
||||
// `eval_offset: -x` is equivalent to `eval_offset: y` for `interval: x+y`.
|
||||
// For example, `eval_offset: -6m` is equivalent to `eval_offset: 4m` for `interval: 10m`.
|
||||
if offset < 0 {
|
||||
offset += g.Interval
|
||||
}
|
||||
// if offset is specified, ignore the maxDelay and return a duration aligned with offset
|
||||
currentOffsetPoint := ts.Truncate(g.Interval).Add(offset)
|
||||
currentOffsetPoint := ts.Truncate(g.Interval).Add(*g.EvalOffset)
|
||||
if currentOffsetPoint.Before(ts) {
|
||||
// wait until the next offset point
|
||||
return currentOffsetPoint.Add(g.Interval).Sub(ts)
|
||||
@@ -518,8 +493,11 @@ func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Dura
|
||||
}
|
||||
|
||||
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
|
||||
// artificially limit interval, so groups with big intervals could start sooner.
|
||||
interval := min(g.Interval, maxDelay)
|
||||
interval := g.Interval
|
||||
if interval > maxDelay {
|
||||
// artificially limit interval, so groups with big intervals could start sooner.
|
||||
interval = maxDelay
|
||||
}
|
||||
var randSleep time.Duration
|
||||
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64)))
|
||||
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
|
||||
@@ -777,7 +755,6 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
|
||||
}
|
||||
|
||||
var errG vmalertutil.ErrGroup
|
||||
if e.Rw != nil {
|
||||
pushToRW := func(tss []prompb.TimeSeries) error {
|
||||
var lastErr error
|
||||
@@ -789,26 +766,20 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
return lastErr
|
||||
}
|
||||
if err := pushToRW(tss); err != nil {
|
||||
errG.Add(err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
ar, ok := r.(*AlertingRule)
|
||||
if !ok {
|
||||
return errG.Err()
|
||||
return nil
|
||||
}
|
||||
|
||||
alerts := ar.alertsToSend(resolveDuration, *resendDelay)
|
||||
if len(alerts) < 1 {
|
||||
return errG.Err()
|
||||
return nil
|
||||
}
|
||||
|
||||
notifierErr := notifier.Send(ctx, alerts, e.notifierHeaders)
|
||||
for err := range notifierErr {
|
||||
if err != nil {
|
||||
errG.Add(fmt.Errorf("rule %q: notifier failure: %w", r, err))
|
||||
}
|
||||
}
|
||||
|
||||
return errG.Err()
|
||||
errGr := notifier.Send(ctx, alerts, e.notifierHeaders)
|
||||
return errGr.Err()
|
||||
}
|
||||
|
||||
@@ -405,8 +405,7 @@ func TestGroupStart(t *testing.T) {
|
||||
|
||||
var cur uint64
|
||||
prev := g.metrics.iterationTotal.Get()
|
||||
i := 0
|
||||
for {
|
||||
for i := 0; ; i++ {
|
||||
if i > 40 {
|
||||
t.Fatalf("group wasn't able to perform %d evaluations during %d eval intervals", n, i)
|
||||
}
|
||||
@@ -415,7 +414,6 @@ func TestGroupStart(t *testing.T) {
|
||||
return
|
||||
}
|
||||
time.Sleep(interval)
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
@@ -606,15 +604,6 @@ func TestGroupStartDelay(t *testing.T) {
|
||||
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||
|
||||
// test group with negative offset -2min, which is equivalent to 3min offset for 5min interval
|
||||
offset = -2 * time.Minute
|
||||
g.EvalOffset = &offset
|
||||
|
||||
f("2023-01-01T00:00:15.000+00:00", "2023-01-01T00:03:00.000+00:00")
|
||||
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
|
||||
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||
|
||||
maxDelay = time.Minute * 1
|
||||
g.EvalOffset = nil
|
||||
|
||||
@@ -742,64 +731,3 @@ func parseTime(t *testing.T, s string) time.Time {
|
||||
}
|
||||
return tt
|
||||
}
|
||||
|
||||
func TestRuleStripFilePath(t *testing.T) {
|
||||
configG := config.Group{
|
||||
Name: "group",
|
||||
File: "/var/local/test/rules.yaml",
|
||||
Type: config.NewRawType("prometheus"),
|
||||
Concurrency: 1,
|
||||
Rules: []config.Rule{
|
||||
{
|
||||
ID: 0,
|
||||
Alert: "alert",
|
||||
},
|
||||
{
|
||||
ID: 1,
|
||||
Record: "record",
|
||||
},
|
||||
}}
|
||||
qb := &datasource.FakeQuerier{}
|
||||
g := NewGroup(configG, qb, 1*time.Minute, nil)
|
||||
|
||||
gID := g.id
|
||||
if g.File != "/var/local/test/rules.yaml" {
|
||||
t.Fatalf("expected file path to be unchanged; got %q instead", g.File)
|
||||
}
|
||||
|
||||
for _, r := range g.Rules {
|
||||
if ar, ok := r.(*AlertingRule); ok {
|
||||
if ar.File != "/var/local/test/rules.yaml" {
|
||||
t.Fatalf("expected rule file path to be unchanged; got %q instead", ar.File)
|
||||
}
|
||||
}
|
||||
if rr, ok := r.(*RecordingRule); ok {
|
||||
if rr.File != "/var/local/test/rules.yaml" {
|
||||
t.Fatalf("expected rule file path to be unchanged; got %q instead", rr.File)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
oldRuleStripFilePath := *ruleStripFilePath
|
||||
*ruleStripFilePath = true
|
||||
defer func() {
|
||||
*ruleStripFilePath = oldRuleStripFilePath
|
||||
}()
|
||||
g = NewGroup(configG, qb, 1*time.Minute, nil)
|
||||
|
||||
if g.File != fmt.Sprintf("%d/rules.yaml", gID) {
|
||||
t.Fatalf("expected file path to be stripped to %q; got %q instead", fmt.Sprintf("%d/rules.yaml", gID), g.File)
|
||||
}
|
||||
for _, r := range g.Rules {
|
||||
if ar, ok := r.(*AlertingRule); ok {
|
||||
if ar.File != fmt.Sprintf("%d/rules.yaml", gID) {
|
||||
t.Fatalf("expected rule file path to be unchanged; got %q instead", ar.File)
|
||||
}
|
||||
}
|
||||
if rr, ok := r.(*RecordingRule); ok {
|
||||
if rr.File != fmt.Sprintf("%d/rules.yaml", gID) {
|
||||
t.Fatalf("expected rule file path to be unchanged; got %q instead", rr.File)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ package rule
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -198,7 +197,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
|
||||
|
||||
defer func() {
|
||||
rr.state.add(curState)
|
||||
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
|
||||
if curState.Err != nil {
|
||||
rr.metrics.errors.Inc()
|
||||
}
|
||||
}()
|
||||
@@ -237,8 +236,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
|
||||
Labels: stringToLabels(k),
|
||||
Samples: []prompb.Sample{
|
||||
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
|
||||
},
|
||||
})
|
||||
}})
|
||||
}
|
||||
rr.lastEvaluation = curEvaluation
|
||||
return tss, nil
|
||||
@@ -293,13 +291,6 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompb.TimeSeries {
|
||||
}
|
||||
// add extra labels configured by user
|
||||
for k := range rr.Labels {
|
||||
// do not add label with empty value to the result, as it has no meaning:
|
||||
// if the label already exists in the original query result, remove it to preserve compatibility with relabeling, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10766.
|
||||
// otherwise, ignore the label, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984.
|
||||
if rr.Labels[k] == "" {
|
||||
m.DelLabel(k)
|
||||
continue
|
||||
}
|
||||
existingLabel := promrelabel.GetLabelByName(m.Labels, k)
|
||||
if existingLabel != nil { // there is a conflict between extra and existing label
|
||||
if existingLabel.Value == rr.Labels[k] {
|
||||
|
||||
@@ -163,13 +163,11 @@ func TestRecordingRule_Exec(t *testing.T) {
|
||||
f(&RecordingRule{
|
||||
Name: "job:foo",
|
||||
Labels: map[string]string{
|
||||
"source": "test",
|
||||
"empty_label": "", // this should be dropped
|
||||
"pod": "", // this should remove the pod label from query result
|
||||
"source": "test",
|
||||
},
|
||||
}, [][]datasource.Metric{{
|
||||
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo", "pod", "vmalert-0"),
|
||||
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar", "source", "origin", "pod", "vmalert-1"),
|
||||
metricWithValueAndLabels(t, 2, "__name__", "foo", "job", "foo"),
|
||||
metricWithValueAndLabels(t, 1, "__name__", "bar", "job", "bar", "source", "origin"),
|
||||
metricWithValueAndLabels(t, 1, "__name__", "baz", "job", "baz", "source", "test"),
|
||||
}}, [][]prompb.TimeSeries{{
|
||||
newTimeSeries([]float64{2}, []int64{ts.UnixNano()}, []prompb.Label{
|
||||
|
||||
@@ -121,7 +121,7 @@ func (s *ruleState) add(e StateEntry) {
|
||||
func replayRule(r Rule, start, end time.Time, rw remotewrite.RWClient, replayRuleRetryAttempts int) (int, error) {
|
||||
var err error
|
||||
var tss []prompb.TimeSeries
|
||||
for i := range replayRuleRetryAttempts {
|
||||
for i := 0; i < replayRuleRetryAttempts; i++ {
|
||||
tss, err = r.execRange(context.Background(), start, end)
|
||||
if err == nil {
|
||||
break
|
||||
|
||||
@@ -40,7 +40,7 @@ func TestRule_state(t *testing.T) {
|
||||
}
|
||||
|
||||
var last time.Time
|
||||
for range stateEntriesN * 2 {
|
||||
for i := 0; i < stateEntriesN*2; i++ {
|
||||
last = time.Now()
|
||||
r.state.add(StateEntry{At: last})
|
||||
}
|
||||
@@ -65,15 +65,17 @@ func TestRule_stateConcurrent(_ *testing.T) {
|
||||
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
|
||||
const workers = 50
|
||||
const iterations = 100
|
||||
var wg sync.WaitGroup
|
||||
for range workers {
|
||||
wg.Go(func() {
|
||||
for range iterations {
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for i := 0; i < iterations; i++ {
|
||||
r.state.add(StateEntry{At: time.Now()})
|
||||
r.state.getAll()
|
||||
r.state.getLast()
|
||||
}
|
||||
})
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -19,13 +19,13 @@ func CompareRules(t *testing.T, a, b Rule) error {
|
||||
case *AlertingRule:
|
||||
br, ok := b.(*AlertingRule)
|
||||
if !ok {
|
||||
return fmt.Errorf("rule %d supposed to be of type AlertingRule", b.ID())
|
||||
return fmt.Errorf("rule %q supposed to be of type AlertingRule", b.ID())
|
||||
}
|
||||
return compareAlertingRules(t, v, br)
|
||||
case *RecordingRule:
|
||||
br, ok := b.(*RecordingRule)
|
||||
if !ok {
|
||||
return fmt.Errorf("rule %d supposed to be of type RecordingRule", b.ID())
|
||||
return fmt.Errorf("rule %q supposed to be of type RecordingRule", b.ID())
|
||||
}
|
||||
return compareRecordingRules(t, v, br)
|
||||
default:
|
||||
|
||||
@@ -57,8 +57,12 @@ type ApiGroup struct {
|
||||
EvalOffset float64 `json:"eval_offset,omitempty"`
|
||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||
EvalDelay float64 `json:"eval_delay,omitempty"`
|
||||
// States represents counts per each rule state
|
||||
States map[string]int `json:"states"`
|
||||
// Unhealthy unhealthy rules count
|
||||
Unhealthy int
|
||||
// Healthy passing rules count
|
||||
Healthy int
|
||||
// NoMatch not matching rules count
|
||||
NoMatch int
|
||||
}
|
||||
|
||||
// APILink returns a link to the group's JSON representation.
|
||||
@@ -130,11 +134,6 @@ type ApiRule struct {
|
||||
Updates []StateEntry `json:"-"`
|
||||
}
|
||||
|
||||
// IsNoMatch returns true if rule is in nomatch state
|
||||
func (r *ApiRule) IsNoMatch() bool {
|
||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||
}
|
||||
|
||||
// ApiAlert represents a notifier.AlertingRule state
|
||||
// for WEB view
|
||||
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||
@@ -210,6 +209,15 @@ func (ar *AlertingRule) AlertsToAPI() []*ApiAlert {
|
||||
return alerts
|
||||
}
|
||||
|
||||
// AlertToAPI generates apiAlert object from alert by its id(hash)
|
||||
func (ar *AlertingRule) AlertToAPI(id uint64) *ApiAlert {
|
||||
a := ar.GetAlert(id)
|
||||
if a == nil {
|
||||
return nil
|
||||
}
|
||||
return NewAlertAPI(ar, a)
|
||||
}
|
||||
|
||||
// NewAlertAPI creates apiAlert for notifier.Alert
|
||||
func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
||||
aa := &ApiAlert{
|
||||
@@ -236,25 +244,8 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
||||
return aa
|
||||
}
|
||||
|
||||
func (r *ApiRule) ExtendState() {
|
||||
if len(r.Alerts) > 0 {
|
||||
return
|
||||
}
|
||||
if r.State == "" {
|
||||
r.State = "ok"
|
||||
}
|
||||
if r.Health != "ok" {
|
||||
r.State = "unhealthy"
|
||||
} else if r.IsNoMatch() {
|
||||
r.State = "nomatch"
|
||||
}
|
||||
}
|
||||
|
||||
// ToAPI returns ApiGroup representation of g
|
||||
func (g *Group) ToAPI() *ApiGroup {
|
||||
if g == nil {
|
||||
return &ApiGroup{}
|
||||
}
|
||||
g.mu.RLock()
|
||||
defer g.mu.RUnlock()
|
||||
ag := ApiGroup{
|
||||
@@ -270,7 +261,6 @@ func (g *Group) ToAPI() *ApiGroup {
|
||||
Headers: headersToStrings(g.Headers),
|
||||
NotifierHeaders: headersToStrings(g.NotifierHeaders),
|
||||
Labels: g.Labels,
|
||||
States: make(map[string]int),
|
||||
}
|
||||
if g.EvalOffset != nil {
|
||||
ag.EvalOffset = g.EvalOffset.Seconds()
|
||||
@@ -278,10 +268,9 @@ func (g *Group) ToAPI() *ApiGroup {
|
||||
if g.EvalDelay != nil {
|
||||
ag.EvalDelay = g.EvalDelay.Seconds()
|
||||
}
|
||||
ag.Rules = make([]ApiRule, 0, len(g.Rules))
|
||||
ag.Rules = make([]ApiRule, 0)
|
||||
for _, r := range g.Rules {
|
||||
ar := r.ToAPI()
|
||||
ag.Rules = append(ag.Rules, ar)
|
||||
ag.Rules = append(ag.Rules, r.ToAPI())
|
||||
}
|
||||
return &ag
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
<path d="M224.163 175.27a1.9 1.9 0 0 0 2.8 0l6-5.9a2.1 2.1 0 0 0 .2-2.7 1.9 1.9 0 0 0-3-.2l-2.6 2.6v-5.2c0-1.54-1.667-2.502-3-1.732-.619.357-1 1.017-1 1.732v5.2l-2.6-2.6a1.9 1.9 0 0 0-3 .2 2.1 2.1 0 0 0 .2 2.7zm-16.459-23.297h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1m36 4h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1m-16.59-23.517a1.9 1.9 0 0 0-2.8 0l-6 5.9a2.1 2.1 0 0 0-.2 2.7 1.9 1.9 0 0 0 3 .2l2.6-2.6v5.2c0 1.54 1.667 2.502 3 1.732.619-.357 1-1.017 1-1.732v-5.2l2.6 2.6a1.9 1.9 0 0 0 3-.2 2.1 2.1 0 0 0-.2-2.7z"/>
|
||||
</symbol>
|
||||
|
||||
<symbol id="state" viewBox="-10 -10 320 310">
|
||||
<symbol id="filter" viewBox="-10 -10 320 310">
|
||||
<path d="M288.953 0h-277c-5.522 0-10 4.478-10 10v49.531c0 5.522 4.478 10 10 10h12.372l91.378 107.397v113.978a10 10 0 0 0 15.547 8.32l49.5-33a10 10 0 0 0 4.453-8.32v-80.978l91.378-107.397h12.372c5.522 0 10-4.478 10-10V10c0-5.522-4.477-10-10-10M167.587 166.77a10 10 0 0 0-2.384 6.48v79.305l-29.5 19.666V173.25a10 10 0 0 0-2.384-6.48L50.585 69.531h199.736zM278.953 49.531h-257V20h257z"/>
|
||||
</symbol>
|
||||
|
||||
|
||||
|
Before Width: | Height: | Size: 4.7 KiB After Width: | Height: | Size: 4.7 KiB |
@@ -8,9 +8,9 @@ function actionAll(isCollapse) {
|
||||
});
|
||||
}
|
||||
|
||||
function groupForState(key) {
|
||||
function groupFilter(key) {
|
||||
if (key) {
|
||||
location.href = `?state=${key}`;
|
||||
location.href = `?filter=${key}`;
|
||||
} else {
|
||||
window.location = window.location.pathname;
|
||||
}
|
||||
|
||||
@@ -402,20 +402,6 @@ func templateFuncs() textTpl.FuncMap {
|
||||
return t, nil
|
||||
},
|
||||
|
||||
// formatTime formats the given Unix timestamp with the provided layout.
|
||||
// For example: {{ now | formatTime "2006-01-02T15:04:05Z07:00" }}
|
||||
"formatTime": func(layout string, i any) (string, error) {
|
||||
v, err := toFloat64(i)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("formatTime: %w", err)
|
||||
}
|
||||
if math.IsNaN(v) || math.IsInf(v, 0) {
|
||||
return "", fmt.Errorf("formatTime: cannot convert %v to time", v)
|
||||
}
|
||||
t := timeFromUnixTimestamp(v).Time().UTC()
|
||||
return t.Format(layout), nil
|
||||
},
|
||||
|
||||
/* URLs */
|
||||
|
||||
// externalURL returns value of `external.url` flag
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
textTpl "text/template"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestTemplateFuncs_StringConversion(t *testing.T) {
|
||||
@@ -104,26 +103,6 @@ func TestTemplateFuncs_Formatting(t *testing.T) {
|
||||
f("humanizeTimestamp", 1679055557, "2023-03-17 12:19:17 +0000 UTC")
|
||||
}
|
||||
|
||||
func TestTemplateFuncs_FormatTime(t *testing.T) {
|
||||
funcs := templateFuncs()
|
||||
formatTime := funcs["formatTime"].(func(layout string, i any) (string, error))
|
||||
|
||||
f := func(layout string, input any, expected string) {
|
||||
t.Helper()
|
||||
result, err := formatTime(layout, input)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error for formatTime(%q, %v): %s", layout, input, err)
|
||||
}
|
||||
if result != expected {
|
||||
t.Fatalf("unexpected result for formatTime(%q, %v); got\n%s\nwant\n%s", layout, input, result, expected)
|
||||
}
|
||||
}
|
||||
|
||||
f(time.RFC3339, float64(1679055557), "2023-03-17T12:19:17Z")
|
||||
f("2006-01-02T15:04:05", int64(1679055557), "2023-03-17T12:19:17")
|
||||
f(time.RFC822, int(1679055557), "17 Mar 23 12:19 UTC")
|
||||
}
|
||||
|
||||
func mkTemplate(current, replacement any) textTemplate {
|
||||
tmpl := textTemplate{}
|
||||
if current != nil {
|
||||
|
||||
@@ -20,12 +20,11 @@ func AuthConfig(filterOptions ...AuthConfigOptions) (*promauth.Config, error) {
|
||||
}
|
||||
|
||||
// WithBasicAuth returns AuthConfigOptions and initialized promauth.BasicAuthConfig based on given params
|
||||
func WithBasicAuth(username, usernameFile, password, passwordFile string) AuthConfigOptions {
|
||||
func WithBasicAuth(username, password, passwordFile string) AuthConfigOptions {
|
||||
return func(config *promauth.HTTPClientConfig) {
|
||||
if username != "" || usernameFile != "" || password != "" || passwordFile != "" {
|
||||
if username != "" || password != "" || passwordFile != "" {
|
||||
config.BasicAuth = &promauth.BasicAuthConfig{
|
||||
Username: username,
|
||||
UsernameFile: usernameFile,
|
||||
Password: promauth.NewSecret(password),
|
||||
PasswordFile: passwordFile,
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ func (eg *ErrGroup) Error() string {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, "errors(%d): \n", len(eg.errs))
|
||||
fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
|
||||
for i, err := range eg.errs {
|
||||
b.WriteString(err.Error())
|
||||
if i != len(eg.errs)-1 {
|
||||
|
||||
@@ -30,8 +30,8 @@ func TestErrGroup(t *testing.T) {
|
||||
}
|
||||
|
||||
f(nil, "")
|
||||
f([]error{errors.New("timeout")}, "errors(1): \ntimeout")
|
||||
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): \ntimeout\ndeadline")
|
||||
f([]error{errors.New("timeout")}, "errors(1): timeout")
|
||||
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): timeout\ndeadline")
|
||||
}
|
||||
|
||||
// TestErrGroupConcurrent supposed to test concurrent
|
||||
@@ -42,7 +42,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
|
||||
|
||||
const writersN = 4
|
||||
payload := make(chan error, writersN)
|
||||
for range writersN {
|
||||
for i := 0; i < writersN; i++ {
|
||||
go func() {
|
||||
for err := range payload {
|
||||
eg.Add(err)
|
||||
@@ -51,7 +51,7 @@ func TestErrGroupConcurrent(_ *testing.T) {
|
||||
}
|
||||
|
||||
const iterations = 500
|
||||
for i := range iterations {
|
||||
for i := 0; i < iterations; i++ {
|
||||
payload <- fmt.Errorf("error %d", i)
|
||||
if i%10 == 0 {
|
||||
_ = eg.Err()
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"slices"
|
||||
"strconv"
|
||||
@@ -52,13 +50,6 @@ var (
|
||||
"alert": rule.TypeAlerting,
|
||||
"record": rule.TypeRecording,
|
||||
}
|
||||
|
||||
// The "recovering", "noData", "normal", "error" states are used by Grafana.
|
||||
// Ignore "recovering" since it is not currently acknowledged by vmalert,
|
||||
// treat "noData" as an alias for "nomatch",
|
||||
// treat "normal" as an alias for "inactive",
|
||||
// treat "error" as an alias for "unhealthy"
|
||||
ruleStates = []string{"ok", "nomatch", "inactive", "firing", "pending", "unhealthy", "recovering", "noData", "normal", "error"}
|
||||
)
|
||||
|
||||
type requestHandler struct {
|
||||
@@ -72,14 +63,6 @@ var (
|
||||
staticServer = http.StripPrefix("/vmalert", staticHandler)
|
||||
)
|
||||
|
||||
func marshalJson(v any, kind string) ([]byte, *httpserver.ErrorWithStatusCode) {
|
||||
data, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf("failed to marshal %s: %s", kind, err), http.StatusInternalServerError)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if strings.HasPrefix(r.URL.Path, "/vmalert/static") {
|
||||
staticServer.ServeHTTP(w, r)
|
||||
@@ -111,32 +94,40 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
WriteRule(w, r, rule)
|
||||
WriteRuleDetails(w, r, rule)
|
||||
return true
|
||||
// current used by old vmalert UI and Grafana Alerts
|
||||
case "/vmalert/groups", "/rules":
|
||||
case "/vmalert/groups":
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
// only support filtering by a single state
|
||||
state := ""
|
||||
if len(rf.states) > 0 {
|
||||
state = rf.states[0]
|
||||
rf.states = rf.states[:1]
|
||||
}
|
||||
lr := rh.groups(rf)
|
||||
WriteListGroups(w, r, lr.Data.Groups, state)
|
||||
data := rh.groups(rf)
|
||||
WriteListGroups(w, r, data, rf.filter)
|
||||
return true
|
||||
case "/vmalert/notifiers":
|
||||
WriteListTargets(w, r, notifier.GetTargets())
|
||||
return true
|
||||
|
||||
// special cases for Grafana requests,
|
||||
// served without `vmalert` prefix:
|
||||
case "/rules":
|
||||
// Grafana makes an extra request to `/rules`
|
||||
// handler in addition to `/api/v1/rules` calls in alerts UI
|
||||
var data []*rule.ApiGroup
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data = rh.groups(rf)
|
||||
WriteListGroups(w, r, data, rf.filter)
|
||||
return true
|
||||
|
||||
case "/vmalert/api/v1/notifiers", "/api/v1/notifiers":
|
||||
data, err := rh.listNotifiers()
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -144,14 +135,15 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
case "/vmalert/api/v1/rules", "/api/v1/rules":
|
||||
// path used by Grafana for ng alerting
|
||||
var data []byte
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data, err := rh.listGroups(rf)
|
||||
data, err = rh.listGroups(rf)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -160,14 +152,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
|
||||
case "/vmalert/api/v1/alerts", "/api/v1/alerts":
|
||||
// path used by Grafana for ng alerting
|
||||
gf, err := newGroupsFilter(r)
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data, err := rh.listAlerts(gf)
|
||||
data, err := rh.listAlerts(rf)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -176,12 +168,12 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case "/vmalert/api/v1/alert", "/api/v1/alert":
|
||||
alert, err := rh.getAlert(r)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data, err := marshalJson(alert, "alert")
|
||||
data, err := json.Marshal(alert)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "failed to marshal alert: %s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -190,16 +182,16 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case "/vmalert/api/v1/rule", "/api/v1/rule":
|
||||
apiRule, err := rh.getRule(r)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
rwu := rule.ApiRuleWithUpdates{
|
||||
ApiRule: apiRule,
|
||||
StateUpdates: apiRule.Updates,
|
||||
}
|
||||
data, err := marshalJson(rwu, "rule")
|
||||
data, err := json.Marshal(rwu)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "failed to marshal rule: %s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -208,12 +200,12 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case "/vmalert/api/v1/group", "/api/v1/group":
|
||||
group, err := rh.getGroup(r)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data, err := marshalJson(group, "group")
|
||||
data, err := json.Marshal(group)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "failed to marshal group: %s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -233,10 +225,10 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, *httpserver.ErrorWithStatusCode) {
|
||||
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
||||
}
|
||||
obj, err := rh.m.groupAPI(groupID)
|
||||
if err != nil {
|
||||
@@ -245,14 +237,14 @@ func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, *httpserver
|
||||
return obj, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, *httpserver.ErrorWithStatusCode) {
|
||||
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return rule.ApiRule{}, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
|
||||
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
||||
}
|
||||
ruleID, err := strconv.ParseUint(r.FormValue(rule.ParamRuleID), 10, 64)
|
||||
if err != nil {
|
||||
return rule.ApiRule{}, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err), http.StatusBadRequest)
|
||||
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err)
|
||||
}
|
||||
obj, err := rh.m.ruleAPI(groupID, ruleID)
|
||||
if err != nil {
|
||||
@@ -261,14 +253,14 @@ func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, *httpserver.Er
|
||||
return obj, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, *httpserver.ErrorWithStatusCode) {
|
||||
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
||||
}
|
||||
alertID, err := strconv.ParseUint(r.FormValue(rule.ParamAlertID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err), http.StatusBadRequest)
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err)
|
||||
}
|
||||
a, err := rh.m.alertAPI(groupID, alertID)
|
||||
if err != nil {
|
||||
@@ -278,76 +270,28 @@ func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, *httpserver
|
||||
}
|
||||
|
||||
type listGroupsResponse struct {
|
||||
Status string `json:"status"`
|
||||
Page int `json:"page,omitempty"`
|
||||
TotalPages int `json:"total_pages,omitempty"`
|
||||
TotalGroups int `json:"total_groups,omitempty"`
|
||||
TotalRules int `json:"total_rules,omitempty"`
|
||||
Data struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
Groups []*rule.ApiGroup `json:"groups"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type groupsFilter struct {
|
||||
groupNames []string
|
||||
files []string
|
||||
dsType config.Type
|
||||
}
|
||||
|
||||
func newGroupsFilter(r *http.Request) (*groupsFilter, *httpserver.ErrorWithStatusCode) {
|
||||
_ = r.ParseForm()
|
||||
vs := r.Form
|
||||
gf := &groupsFilter{
|
||||
groupNames: vs["rule_group[]"],
|
||||
files: vs["file[]"],
|
||||
}
|
||||
dsType := vs.Get("datasource_type")
|
||||
if len(dsType) > 0 {
|
||||
if config.SupportedType(dsType) {
|
||||
gf.dsType = config.NewRawType(dsType)
|
||||
} else {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
return gf, nil
|
||||
}
|
||||
|
||||
func (gf *groupsFilter) matches(group *rule.Group) bool {
|
||||
if len(gf.groupNames) > 0 && !slices.Contains(gf.groupNames, group.Name) {
|
||||
return false
|
||||
}
|
||||
if len(gf.files) > 0 && !slices.Contains(gf.files, group.File) {
|
||||
return false
|
||||
}
|
||||
if len(gf.dsType.Name) > 0 && gf.dsType.String() != group.Type.String() {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// see https://prometheus.io/docs/prometheus/latest/querying/api/#rules
|
||||
type rulesFilter struct {
|
||||
gf *groupsFilter
|
||||
ruleNames []string
|
||||
ruleType string
|
||||
excludeAlerts bool
|
||||
states []string
|
||||
maxGroups int
|
||||
pageNum int
|
||||
search string
|
||||
extendedStates bool
|
||||
files []string
|
||||
groupNames []string
|
||||
ruleNames []string
|
||||
ruleType string
|
||||
excludeAlerts bool
|
||||
filter string
|
||||
dsType config.Type
|
||||
}
|
||||
|
||||
func newRulesFilter(r *http.Request) (*rulesFilter, *httpserver.ErrorWithStatusCode) {
|
||||
gf, err := newGroupsFilter(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
func newRulesFilter(r *http.Request) (*rulesFilter, error) {
|
||||
rf := &rulesFilter{}
|
||||
query := r.URL.Query()
|
||||
|
||||
var rf rulesFilter
|
||||
rf.gf = gf
|
||||
vs := r.Form
|
||||
ruleTypeParam := vs.Get("type")
|
||||
ruleTypeParam := query.Get("type")
|
||||
if len(ruleTypeParam) > 0 {
|
||||
if ruleType, ok := ruleTypeMap[ruleTypeParam]; ok {
|
||||
rf.ruleType = ruleType
|
||||
@@ -356,155 +300,102 @@ func newRulesFilter(r *http.Request) (*rulesFilter, *httpserver.ErrorWithStatusC
|
||||
}
|
||||
}
|
||||
|
||||
states := vs["state"]
|
||||
if len(states) == 0 {
|
||||
states = vs["filter"]
|
||||
dsType := query.Get("datasource_type")
|
||||
if len(dsType) > 0 {
|
||||
if config.SupportedType(dsType) {
|
||||
rf.dsType = config.NewRawType(dsType)
|
||||
} else {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
for _, s := range states {
|
||||
values := strings.Split(s, ",")
|
||||
for _, v := range values {
|
||||
if len(v) == 0 {
|
||||
continue
|
||||
}
|
||||
if !slices.Contains(ruleStates, v) {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "state": contains not supported value %q`, v), http.StatusBadRequest)
|
||||
}
|
||||
// Replace grafana states with supported internal states
|
||||
switch v {
|
||||
case "noData":
|
||||
v = "nomatch"
|
||||
case "normal":
|
||||
v = "inactive"
|
||||
case "error":
|
||||
v = "unhealthy"
|
||||
}
|
||||
rf.states = append(rf.states, v)
|
||||
|
||||
filter := strings.ToLower(query.Get("filter"))
|
||||
if len(filter) > 0 {
|
||||
if filter == "nomatch" || filter == "unhealthy" {
|
||||
rf.filter = filter
|
||||
} else {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "filter": not supported value %q`, filter), http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
|
||||
rf.excludeAlerts = httputil.GetBool(r, "exclude_alerts")
|
||||
rf.extendedStates = httputil.GetBool(r, "extended_states")
|
||||
rf.ruleNames = append([]string{}, vs["rule_name[]"]...)
|
||||
rf.search = strings.ToLower(vs.Get("search"))
|
||||
|
||||
pageNum := vs.Get("page_num")
|
||||
maxGroups := vs.Get("group_limit")
|
||||
if pageNum != "" {
|
||||
if maxGroups == "" {
|
||||
return nil, errResponse(fmt.Errorf(`"group_limit" needs to be present in order to paginate over the groups`), http.StatusBadRequest)
|
||||
}
|
||||
v, err := strconv.Atoi(pageNum)
|
||||
if err != nil || v <= 0 {
|
||||
return nil, errResponse(fmt.Errorf(`"page_num" is expected to be a positive number, found %q`, pageNum), http.StatusBadRequest)
|
||||
}
|
||||
rf.pageNum = v
|
||||
}
|
||||
if maxGroups != "" {
|
||||
v, err := strconv.Atoi(maxGroups)
|
||||
if err != nil || v <= 0 {
|
||||
return nil, errResponse(fmt.Errorf(`"group_limit" is expected to be a positive number, found %q`, maxGroups), http.StatusBadRequest)
|
||||
}
|
||||
rf.maxGroups = v
|
||||
}
|
||||
return &rf, nil
|
||||
rf.ruleNames = append([]string{}, r.Form["rule_name[]"]...)
|
||||
rf.groupNames = append([]string{}, r.Form["rule_group[]"]...)
|
||||
rf.files = append([]string{}, r.Form["file[]"]...)
|
||||
return rf, nil
|
||||
}
|
||||
|
||||
func (rf *rulesFilter) matchesRule(r *rule.ApiRule) bool {
|
||||
if rf.ruleType != "" && rf.ruleType != r.Type {
|
||||
func (rf *rulesFilter) matchesGroup(group *rule.Group) bool {
|
||||
if len(rf.groupNames) > 0 && !slices.Contains(rf.groupNames, group.Name) {
|
||||
return false
|
||||
}
|
||||
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, r.Name) {
|
||||
if len(rf.files) > 0 && !slices.Contains(rf.files, group.File) {
|
||||
return false
|
||||
}
|
||||
if len(rf.states) == 0 {
|
||||
return true
|
||||
if len(rf.dsType.Name) > 0 && rf.dsType.String() != group.Type.String() {
|
||||
return false
|
||||
}
|
||||
return slices.Contains(rf.states, r.State)
|
||||
return true
|
||||
}
|
||||
|
||||
func (rh *requestHandler) groups(rf *rulesFilter) *listGroupsResponse {
|
||||
func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
skipGroups := (rf.pageNum - 1) * rf.maxGroups
|
||||
lr := &listGroupsResponse{
|
||||
Status: "success",
|
||||
}
|
||||
lr.Data.Groups = make([]*rule.ApiGroup, 0)
|
||||
if skipGroups >= len(rh.m.groups) {
|
||||
return lr
|
||||
}
|
||||
// sort list of groups for deterministic output
|
||||
groups := make([]*rule.Group, 0, len(rh.m.groups))
|
||||
groups := make([]*rule.ApiGroup, 0)
|
||||
for _, group := range rh.m.groups {
|
||||
groups = append(groups, group)
|
||||
}
|
||||
|
||||
slices.SortFunc(groups, func(a, b *rule.Group) int {
|
||||
nameCmp := cmp.Compare(a.Name, b.Name)
|
||||
if nameCmp != 0 {
|
||||
return nameCmp
|
||||
}
|
||||
return cmp.Compare(a.File, b.File)
|
||||
})
|
||||
for _, group := range groups {
|
||||
if !rf.gf.matches(group) {
|
||||
if !rf.matchesGroup(group) {
|
||||
continue
|
||||
}
|
||||
groupFound := len(rf.search) == 0 || strings.Contains(strings.ToLower(group.Name), rf.search) || strings.Contains(strings.ToLower(group.File), rf.search)
|
||||
g := group.ToAPI()
|
||||
// the returned list should always be non-nil
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
|
||||
filteredRules := make([]rule.ApiRule, 0)
|
||||
for _, rule := range g.Rules {
|
||||
if !groupFound && !strings.Contains(strings.ToLower(rule.Name), rf.search) {
|
||||
if rf.ruleType != "" && rf.ruleType != rule.Type {
|
||||
continue
|
||||
}
|
||||
if rf.extendedStates {
|
||||
rule.ExtendState()
|
||||
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, rule.Name) {
|
||||
continue
|
||||
}
|
||||
if !rf.matchesRule(&rule) {
|
||||
if (rule.LastError == "" && rf.filter == "unhealthy") || (!isNoMatch(rule) && rf.filter == "nomatch") {
|
||||
continue
|
||||
}
|
||||
if rf.excludeAlerts {
|
||||
rule.Alerts = nil
|
||||
}
|
||||
g.States[rule.State]++
|
||||
if rule.LastError != "" {
|
||||
g.Unhealthy++
|
||||
} else {
|
||||
g.Healthy++
|
||||
}
|
||||
if isNoMatch(rule) {
|
||||
g.NoMatch++
|
||||
}
|
||||
filteredRules = append(filteredRules, rule)
|
||||
}
|
||||
if len(g.Rules) == 0 || len(filteredRules) > 0 {
|
||||
if rf.maxGroups > 0 {
|
||||
lr.TotalGroups++
|
||||
lr.TotalRules += len(filteredRules)
|
||||
}
|
||||
if skipGroups > 0 {
|
||||
skipGroups--
|
||||
continue
|
||||
}
|
||||
if rf.maxGroups == 0 || len(lr.Data.Groups) < rf.maxGroups {
|
||||
g.Rules = filteredRules
|
||||
lr.Data.Groups = append(lr.Data.Groups, g)
|
||||
}
|
||||
g.Rules = filteredRules
|
||||
groups = append(groups, g)
|
||||
}
|
||||
// sort list of groups for deterministic output
|
||||
slices.SortFunc(groups, func(a, b *rule.ApiGroup) int {
|
||||
if a.Name != b.Name {
|
||||
return strings.Compare(a.Name, b.Name)
|
||||
}
|
||||
}
|
||||
if rf.maxGroups > 0 {
|
||||
lr.Page = rf.pageNum
|
||||
lr.TotalPages = max(int(math.Ceil(float64(lr.TotalGroups)/float64(rf.maxGroups))), 1)
|
||||
}
|
||||
return lr
|
||||
return strings.Compare(a.File, b.File)
|
||||
})
|
||||
return groups
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, *httpserver.ErrorWithStatusCode) {
|
||||
lr := rh.groups(rf)
|
||||
if rf.pageNum > 1 && len(lr.Data.Groups) == 0 {
|
||||
return nil, errResponse(fmt.Errorf(`page_num exceeds total amount of pages`), http.StatusBadRequest)
|
||||
}
|
||||
if lr.Page > lr.TotalPages {
|
||||
return nil, errResponse(fmt.Errorf(`page_num=%d exceeds total amount of pages in result=%d`, lr.Page, lr.TotalPages), http.StatusBadRequest)
|
||||
}
|
||||
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, error) {
|
||||
lr := listGroupsResponse{Status: "success"}
|
||||
lr.Data.Groups = rh.groups(rf)
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf(`error encoding list of groups: %w`, err), http.StatusInternalServerError)
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
@@ -521,18 +412,18 @@ func (rh *requestHandler) groupAlerts() []rule.GroupAlerts {
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
var gAlerts []rule.GroupAlerts
|
||||
for _, group := range rh.m.groups {
|
||||
for _, g := range rh.m.groups {
|
||||
var alerts []*rule.ApiAlert
|
||||
g := group.ToAPI()
|
||||
for _, r := range g.Rules {
|
||||
if r.Type != rule.TypeAlerting {
|
||||
a, ok := r.(*rule.AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
alerts = append(alerts, r.Alerts...)
|
||||
alerts = append(alerts, a.AlertsToAPI()...)
|
||||
}
|
||||
if len(alerts) > 0 {
|
||||
gAlerts = append(gAlerts, rule.GroupAlerts{
|
||||
Group: g,
|
||||
Group: g.ToAPI(),
|
||||
Alerts: alerts,
|
||||
})
|
||||
}
|
||||
@@ -543,22 +434,22 @@ func (rh *requestHandler) groupAlerts() []rule.GroupAlerts {
|
||||
return gAlerts
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listAlerts(gf *groupsFilter) ([]byte, *httpserver.ErrorWithStatusCode) {
|
||||
func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
lr := listAlertsResponse{Status: "success"}
|
||||
lr.Data.Alerts = make([]*rule.ApiAlert, 0)
|
||||
for _, group := range rh.m.groups {
|
||||
if !gf.matches(group) {
|
||||
if !rf.matchesGroup(group) {
|
||||
continue
|
||||
}
|
||||
g := group.ToAPI()
|
||||
for _, r := range g.Rules {
|
||||
if r.Type != rule.TypeAlerting {
|
||||
for _, r := range group.Rules {
|
||||
a, ok := r.(*rule.AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
lr.Data.Alerts = append(lr.Data.Alerts, r.Alerts...)
|
||||
lr.Data.Alerts = append(lr.Data.Alerts, a.AlertsToAPI()...)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -569,7 +460,10 @@ func (rh *requestHandler) listAlerts(gf *groupsFilter) ([]byte, *httpserver.Erro
|
||||
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf(`error encoding list of active alerts: %w`, err), http.StatusInternalServerError)
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
@@ -581,7 +475,7 @@ type listNotifiersResponse struct {
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listNotifiers() ([]byte, *httpserver.ErrorWithStatusCode) {
|
||||
func (rh *requestHandler) listNotifiers() ([]byte, error) {
|
||||
targets := notifier.GetTargets()
|
||||
|
||||
lr := listNotifiersResponse{Status: "success"}
|
||||
@@ -603,7 +497,10 @@ func (rh *requestHandler) listNotifiers() ([]byte, *httpserver.ErrorWithStatusCo
|
||||
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf(`error encoding list of notifiers: %w`, err), http.StatusInternalServerError)
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of notifiers: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
@@ -614,8 +511,3 @@ func errResponse(err error, sc int) *httpserver.ErrorWithStatusCode {
|
||||
StatusCode: sc,
|
||||
}
|
||||
}
|
||||
|
||||
func errJson(w http.ResponseWriter, r *http.Request, err *httpserver.ErrorWithStatusCode) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
httpserver.Errorf(w, r, `{"error":%q,"errorType":%d}`, err, err.StatusCode)
|
||||
}
|
||||
|
||||
@@ -9,10 +9,9 @@
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
) %}
|
||||
|
||||
{% func Controls(prefix, currentIcon, currentText string, icons, states map[string]string, search bool) %}
|
||||
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
|
||||
<div class="btn-toolbar mb-3" role="toolbar">
|
||||
<div class="d-flex gap-2 justify-content-between w-100">
|
||||
<div class="d-flex gap-2 align-items-center">
|
||||
@@ -28,10 +27,10 @@
|
||||
<use href="{%s prefix %}static/icons/icons.svg#expand"/>
|
||||
</svg>
|
||||
</a>
|
||||
{% if len(states) > 0 %}
|
||||
{% if len(filters) > 0 %}
|
||||
<span class="d-none d-md-inline-block">Filter by status:</span>
|
||||
<svg class="d-md-none" width="20" height="20">
|
||||
<use href="{%s prefix %}static/icons/icons.svg#state">
|
||||
<use href="{%s prefix %}static/icons/icons.svg#filter">
|
||||
</svg>
|
||||
<div class="dropdown">
|
||||
<button
|
||||
@@ -46,10 +45,10 @@
|
||||
</svg>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
{% for key, title := range states %}
|
||||
{% for key, title := range filters %}
|
||||
{% if title != currentText %}
|
||||
<li>
|
||||
<a class="dropdown-item" onclick="groupForState('{%s key %}')">
|
||||
<a class="dropdown-item" onclick="groupFilter('{%s key %}')">
|
||||
<span class="d-none d-md-inline-block">{%s title %}</span>
|
||||
<svg class="d-md-none" width="22" height="22">
|
||||
<use href="{%s prefix %}static/icons/icons.svg#{%s icons[key] %}"/>
|
||||
@@ -79,8 +78,6 @@
|
||||
{% func Welcome(r *http.Request) %}
|
||||
{%= tpl.Header(r, navItems, "vmalert", getLastConfigError()) %}
|
||||
<p>
|
||||
Version {%s buildinfo.Version %} <br>
|
||||
|
||||
API:<br>
|
||||
{% for _, p := range apiLinks %}
|
||||
{%code p, doc := p[0], p[1] %}
|
||||
@@ -97,10 +94,10 @@
|
||||
{%= tpl.Footer(r) %}
|
||||
{% endfunc %}
|
||||
|
||||
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, state string) %}
|
||||
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, filter string) %}
|
||||
{%code
|
||||
prefix := vmalertutil.Prefix(r.URL.Path)
|
||||
states := map[string]string{
|
||||
filters := map[string]string{
|
||||
"": "All",
|
||||
"unhealthy": "Unhealthy",
|
||||
"nomatch": "No Match",
|
||||
@@ -110,14 +107,14 @@
|
||||
"unhealthy": "unhealthy",
|
||||
"nomatch": "nomatch",
|
||||
}
|
||||
currentText := states[state]
|
||||
currentIcon := icons[state]
|
||||
currentText := filters[filter]
|
||||
currentIcon := icons[filter]
|
||||
%}
|
||||
{%= tpl.Header(r, navItems, "Groups", getLastConfigError()) %}
|
||||
{%= Controls(prefix, currentIcon, currentText, icons, states, true) %}
|
||||
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
|
||||
{% if len(groups) > 0 %}
|
||||
{% for _, g := range groups %}
|
||||
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.States["unhealthy"] > 0 %} alert-danger{% endif %}">
|
||||
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
|
||||
<span class="d-flex justify-content-between">
|
||||
<a
|
||||
class="vm-group-search"
|
||||
@@ -130,9 +127,9 @@
|
||||
data-bs-target="#item-{%s g.ID %}"
|
||||
>
|
||||
<span class="d-flex gap-2">
|
||||
{% if g.States["unhealthy"] > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.States["unhealthy"] %}</span> {% endif %}
|
||||
{% if g.States["nomatch"] > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d g.States["nomatch"] %}</span> {% endif %}
|
||||
<span class="badge bg-success" title="Number of rules with status Ok">{%d g.States["ok"] %}</span>
|
||||
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
|
||||
{% if g.NoMatch > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d g.NoMatch %}</span> {% endif %}
|
||||
<span class="badge bg-success" title="Number of rules with status Ok">{%d g.Healthy %}</span>
|
||||
</span>
|
||||
</span>
|
||||
</span>
|
||||
@@ -189,7 +186,7 @@
|
||||
<b>record:</b> {%s r.Name %}
|
||||
{% endif %}
|
||||
|
|
||||
{%= seriesFetchedWarn(prefix, &r) %}
|
||||
{%= seriesFetchedWarn(prefix, r) %}
|
||||
<span><a target="_blank" href="{%s prefix+r.WebLink() %}">Details</a></span>
|
||||
</div>
|
||||
<div class="col-12">
|
||||
@@ -476,7 +473,7 @@
|
||||
{% endfunc %}
|
||||
|
||||
|
||||
{% func Rule(r *http.Request, rule rule.ApiRule) %}
|
||||
{% func RuleDetails(r *http.Request, rule rule.ApiRule) %}
|
||||
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
||||
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
||||
{%code
|
||||
@@ -605,11 +602,11 @@
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" title="The time when the rule was executed">Updated at</th>
|
||||
<th scope="col" title="The time when event was created">Updated at</th>
|
||||
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
||||
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
|
||||
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" class="text-center" title="The time used in execution query request">Execution timestamp</th>
|
||||
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
|
||||
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
||||
</tr>
|
||||
</thead>
|
||||
@@ -661,8 +658,8 @@
|
||||
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
|
||||
{% endfunc %}
|
||||
|
||||
{% func seriesFetchedWarn(prefix string, r *rule.ApiRule) %}
|
||||
{% if r.IsNoMatch() %}
|
||||
{% func seriesFetchedWarn(prefix string, r rule.ApiRule) %}
|
||||
{% if isNoMatch(r) %}
|
||||
<svg
|
||||
data-bs-toggle="tooltip"
|
||||
title="No match! This rule's last evaluation hasn't selected any time series from the datasource.
|
||||
@@ -673,3 +670,9 @@
|
||||
</svg>
|
||||
{% endif %}
|
||||
{% endfunc %}
|
||||
|
||||
{%code
|
||||
func isNoMatch (r rule.ApiRule) bool {
|
||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||
}
|
||||
%}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -210,7 +210,7 @@ func TestHandler(t *testing.T) {
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("/api/v1/rules&states", func(t *testing.T) {
|
||||
t.Run("/api/v1/rules&filters", func(t *testing.T) {
|
||||
check := func(url string, statusCode, expGroups, expRules int) {
|
||||
t.Helper()
|
||||
lr := listGroupsResponse{}
|
||||
@@ -252,15 +252,9 @@ func TestHandler(t *testing.T) {
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=foo", 200, 0, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml", 200, 3, 6)
|
||||
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 0, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 3, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert", 200, 3, 3)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert&rule_name[]=record", 200, 3, 6)
|
||||
|
||||
check("/api/v1/rules?group_limit=1", 200, 1, 2)
|
||||
check("/api/v1/rules?group_limit=1&type=alert", 200, 1, 1)
|
||||
check("/api/v1/rules?group_limit=1&type=record", 200, 1, 1)
|
||||
check("/api/v1/rules?group_limit=2", 200, 2, 4)
|
||||
check(fmt.Sprintf("/api/v1/rules?group_limit=1&page_num=%d", 1), 200, 1, 2)
|
||||
})
|
||||
t.Run("/api/v1/rules&exclude_alerts=true", func(t *testing.T) {
|
||||
// check if response returns active alerts by default
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"math"
|
||||
@@ -13,7 +12,6 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"slices"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -29,7 +27,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fscore"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
@@ -67,11 +64,10 @@ type AuthConfig struct {
|
||||
type UserInfo struct {
|
||||
Name string `yaml:"name,omitempty"`
|
||||
|
||||
BearerToken string `yaml:"bearer_token,omitempty"`
|
||||
JWT *JWTConfig `yaml:"jwt,omitempty"`
|
||||
AuthToken string `yaml:"auth_token,omitempty"`
|
||||
Username string `yaml:"username,omitempty"`
|
||||
Password string `yaml:"password,omitempty"`
|
||||
BearerToken string `yaml:"bearer_token,omitempty"`
|
||||
AuthToken string `yaml:"auth_token,omitempty"`
|
||||
Username string `yaml:"username,omitempty"`
|
||||
Password string `yaml:"password,omitempty"`
|
||||
|
||||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||
DiscoverBackendIPs *bool `yaml:"discover_backend_ips,omitempty"`
|
||||
@@ -92,79 +88,30 @@ type UserInfo struct {
|
||||
|
||||
MetricLabels map[string]string `yaml:"metric_labels,omitempty"`
|
||||
|
||||
AccessLog *AccessLog `yaml:"access_log,omitempty"`
|
||||
|
||||
concurrencyLimitCh chan struct{}
|
||||
concurrencyLimitReached *metrics.Counter
|
||||
|
||||
rt http.RoundTripper
|
||||
|
||||
requests *metrics.Counter
|
||||
requestErrors *metrics.Counter
|
||||
backendRequests *metrics.Counter
|
||||
backendErrors *metrics.Counter
|
||||
requestsDuration *metrics.Summary
|
||||
}
|
||||
|
||||
// AccessLog represents configuration for access log settings.
|
||||
type AccessLog struct {
|
||||
Filters *AccessLogFilters `yaml:"filters"`
|
||||
}
|
||||
|
||||
// AccessLogFilters represents list of filters for access logs printing
|
||||
type AccessLogFilters struct {
|
||||
// SkipStatusCodes is a list of HTTP status codes for which access logs will be skipped
|
||||
SkipStatusCodes []int `yaml:"skip_status_codes"`
|
||||
}
|
||||
|
||||
func (ui *UserInfo) logRequest(r *http.Request, userName string, statusCode int, duration time.Duration) {
|
||||
if ui.AccessLog == nil {
|
||||
return
|
||||
}
|
||||
filters := ui.AccessLog.Filters
|
||||
if filters != nil && len(filters.SkipStatusCodes) > 0 {
|
||||
if slices.Contains(filters.SkipStatusCodes, statusCode) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
logger.Infof("access_log request_host=%q request_uri=%q status_code=%d remote_addr=%s user_agent=%q referer=%q duration_ms=%d username=%q",
|
||||
r.Host, requestURI, statusCode, remoteAddr, r.UserAgent(), r.Referer(), duration.Milliseconds(), userName)
|
||||
}
|
||||
|
||||
// HeadersConf represents config for request and response headers.
|
||||
type HeadersConf struct {
|
||||
RequestHeaders []*Header `yaml:"headers,omitempty"`
|
||||
ResponseHeaders []*Header `yaml:"response_headers,omitempty"`
|
||||
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
|
||||
hasAnyPlaceHolders bool
|
||||
RequestHeaders []*Header `yaml:"headers,omitempty"`
|
||||
ResponseHeaders []*Header `yaml:"response_headers,omitempty"`
|
||||
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
|
||||
}
|
||||
|
||||
func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
|
||||
func (ui *UserInfo) beginConcurrencyLimit() error {
|
||||
select {
|
||||
case ui.concurrencyLimitCh <- struct{}{}:
|
||||
return nil
|
||||
default:
|
||||
// The number of concurrently executed requests for the given user equals the limit.
|
||||
// Wait until some of the currently executed requests are finished, so the current request could be executed.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
|
||||
select {
|
||||
case ui.concurrencyLimitCh <- struct{}{}:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
err := ctx.Err()
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
// The current request couldn't be executed until the request timeout.
|
||||
ui.concurrencyLimitReached.Inc()
|
||||
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because %d concurrent requests from the user %s are executed",
|
||||
*maxQueueDuration, ui.getMaxConcurrentRequests(), ui.name())
|
||||
}
|
||||
|
||||
return fmt.Errorf("cannot start executing the request because %d concurrent requests from the user %s are executed: %w",
|
||||
ui.getMaxConcurrentRequests(), ui.name(), err)
|
||||
}
|
||||
ui.concurrencyLimitReached.Inc()
|
||||
return fmt.Errorf("cannot handle more than %d concurrent requests from user %s", ui.getMaxConcurrentRequests(), ui.name())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -180,28 +127,6 @@ func (ui *UserInfo) getMaxConcurrentRequests() int {
|
||||
return mcr
|
||||
}
|
||||
|
||||
func (ui *UserInfo) stopHealthChecks() {
|
||||
if ui == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if ui.URLPrefix != nil {
|
||||
bus := ui.URLPrefix.bus.Load()
|
||||
bus.stopHealthChecks()
|
||||
}
|
||||
if ui.DefaultURL != nil {
|
||||
bus := ui.DefaultURL.bus.Load()
|
||||
bus.stopHealthChecks()
|
||||
}
|
||||
for i := range ui.URLMaps {
|
||||
um := &ui.URLMaps[i]
|
||||
if um.URLPrefix != nil {
|
||||
bus := um.URLPrefix.bus.Load()
|
||||
bus.stopHealthChecks()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Header is `Name: Value` http header, which must be added to the proxied request.
|
||||
type Header struct {
|
||||
Name string
|
||||
@@ -337,7 +262,7 @@ type URLPrefix struct {
|
||||
// the list of backend urls
|
||||
//
|
||||
// the list can be dynamically updated if `discover_backend_ips` option is set.
|
||||
bus atomic.Pointer[backendURLs]
|
||||
bus atomic.Pointer[[]*backendURL]
|
||||
|
||||
// if this option is set, then backend ips for busOriginal are periodically re-discovered and put to bus.
|
||||
discoverBackendIPs bool
|
||||
@@ -361,116 +286,21 @@ func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
|
||||
}
|
||||
}
|
||||
|
||||
type backendURLs struct {
|
||||
bhc backendHealthCheck
|
||||
bus []*backendURL
|
||||
}
|
||||
|
||||
type backendHealthCheck struct {
|
||||
ctx context.Context
|
||||
// mu protects fields below
|
||||
cancel func()
|
||||
mu sync.Mutex
|
||||
isStopped bool
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
func (bhc *backendHealthCheck) run(hc func()) {
|
||||
bhc.mu.Lock()
|
||||
defer bhc.mu.Unlock()
|
||||
if bhc.isStopped {
|
||||
return
|
||||
}
|
||||
bhc.wg.Go(hc)
|
||||
}
|
||||
|
||||
func (bhc *backendHealthCheck) stop() {
|
||||
bhc.mu.Lock()
|
||||
bhc.cancel()
|
||||
bhc.isStopped = true
|
||||
bhc.mu.Unlock()
|
||||
bhc.wg.Wait()
|
||||
}
|
||||
|
||||
func newBackendURLs() *backendURLs {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
return &backendURLs{
|
||||
bhc: backendHealthCheck{
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (bus *backendURLs) add(u *url.URL) {
|
||||
bus.bus = append(bus.bus, &backendURL{
|
||||
url: u,
|
||||
bhc: &bus.bhc,
|
||||
hasPlaceHolders: hasAnyPlaceholders(u),
|
||||
})
|
||||
}
|
||||
|
||||
func (bus *backendURLs) stopHealthChecks() {
|
||||
bus.bhc.stop()
|
||||
}
|
||||
|
||||
type backendURL struct {
|
||||
broken atomic.Bool
|
||||
|
||||
bhc *backendHealthCheck
|
||||
|
||||
brokenDeadline atomic.Uint64
|
||||
concurrentRequests atomic.Int32
|
||||
|
||||
url *url.URL
|
||||
|
||||
hasPlaceHolders bool
|
||||
}
|
||||
|
||||
func (bu *backendURL) isBroken() bool {
|
||||
return bu.broken.Load()
|
||||
ct := fasttime.UnixTimestamp()
|
||||
return ct < bu.brokenDeadline.Load()
|
||||
}
|
||||
|
||||
func (bu *backendURL) setBroken() {
|
||||
if bu.broken.CompareAndSwap(false, true) {
|
||||
bu.bhc.run(func() {
|
||||
bu.runHealthCheck()
|
||||
bu.broken.Store(false)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (bu *backendURL) runHealthCheck() {
|
||||
port := bu.url.Port()
|
||||
if port == "" {
|
||||
port = "80"
|
||||
}
|
||||
addr := net.JoinHostPort(bu.url.Hostname(), port)
|
||||
|
||||
t := time.NewTicker(*failTimeout)
|
||||
defer t.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-t.C:
|
||||
// Verify network connectivity via TCP dial before marking backend healthy.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997
|
||||
ctx, cancel := context.WithTimeout(bu.bhc.ctx, time.Second)
|
||||
c, err := netutil.Dialer.DialContext(ctx, "tcp", addr)
|
||||
cancel()
|
||||
if err != nil {
|
||||
if errors.Is(bu.bhc.ctx.Err(), context.Canceled) {
|
||||
return
|
||||
}
|
||||
logger.Warnf("ignoring the backend at %s for %s because of dial error: %s", addr, *failTimeout, err)
|
||||
continue
|
||||
}
|
||||
|
||||
_ = c.Close()
|
||||
return
|
||||
case <-bu.bhc.ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
deadline := fasttime.UnixTimestamp() + uint64((*failTimeout).Seconds())
|
||||
bu.brokenDeadline.Store(deadline)
|
||||
}
|
||||
|
||||
func (bu *backendURL) get() {
|
||||
@@ -482,8 +312,8 @@ func (bu *backendURL) put() {
|
||||
}
|
||||
|
||||
func (up *URLPrefix) getBackendsCount() int {
|
||||
bus := up.bus.Load()
|
||||
return len(bus.bus)
|
||||
pbus := up.bus.Load()
|
||||
return len(*pbus)
|
||||
}
|
||||
|
||||
// getBackendURL returns the backendURL depending on the load balance policy.
|
||||
@@ -494,15 +324,16 @@ func (up *URLPrefix) getBackendsCount() int {
|
||||
func (up *URLPrefix) getBackendURL() *backendURL {
|
||||
up.discoverBackendAddrsIfNeeded()
|
||||
|
||||
bus := up.bus.Load()
|
||||
if len(bus.bus) == 0 {
|
||||
pbus := up.bus.Load()
|
||||
bus := *pbus
|
||||
if len(bus) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if up.loadBalancingPolicy == "first_available" {
|
||||
return getFirstAvailableBackendURL(bus.bus)
|
||||
return getFirstAvailableBackendURL(bus)
|
||||
}
|
||||
return getLeastLoadedBackendURL(bus.bus, &up.n)
|
||||
return getLeastLoadedBackendURL(bus, &up.n)
|
||||
}
|
||||
|
||||
func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
|
||||
@@ -576,24 +407,25 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
|
||||
cancel()
|
||||
|
||||
// generate new backendURLs for the resolved IPs
|
||||
busNew := newBackendURLs()
|
||||
var busNew []*backendURL
|
||||
for _, bu := range up.busOriginal {
|
||||
host := bu.Hostname()
|
||||
for _, addr := range hostToAddrs[host] {
|
||||
buCopy := *bu
|
||||
buCopy.Host = addr
|
||||
busNew.add(&buCopy)
|
||||
busNew = append(busNew, &backendURL{
|
||||
url: &buCopy,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
bus := up.bus.Load()
|
||||
if areEqualBackendURLs(bus.bus, busNew.bus) {
|
||||
pbus := up.bus.Load()
|
||||
if areEqualBackendURLs(*pbus, busNew) {
|
||||
return
|
||||
}
|
||||
|
||||
// Store new backend urls
|
||||
up.bus.Store(busNew)
|
||||
bus.stopHealthChecks()
|
||||
up.bus.Store(&busNew)
|
||||
}
|
||||
|
||||
func areEqualBackendURLs(a, b []*backendURL) bool {
|
||||
@@ -610,7 +442,6 @@ func areEqualBackendURLs(a, b []*backendURL) bool {
|
||||
}
|
||||
|
||||
// getFirstAvailableBackendURL returns the first available backendURL, which isn't broken.
|
||||
// If all backendURLs are broken, then returns the first backendURL.
|
||||
//
|
||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||
func getFirstAvailableBackendURL(bus []*backendURL) *backendURL {
|
||||
@@ -625,31 +456,27 @@ func getFirstAvailableBackendURL(bus []*backendURL) *backendURL {
|
||||
for i := 1; i < len(bus); i++ {
|
||||
if !bus[i].isBroken() {
|
||||
bu = bus[i]
|
||||
bu.get()
|
||||
return bu
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// All backend urls are unavailable, then returning a first one, it could help increase the success rate of the requests。
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10837#issuecomment-4307050980.
|
||||
bu.get()
|
||||
return bu
|
||||
}
|
||||
|
||||
// getLeastLoadedBackendURL returns a non-broken backendURL with the lowest number of concurrent requests.
|
||||
// If all backendURLs are broken, then returns the first backendURL.
|
||||
// getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
|
||||
//
|
||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||
func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL {
|
||||
firstBu := bus[0]
|
||||
if len(bus) == 1 {
|
||||
firstBu.get()
|
||||
return firstBu
|
||||
// Fast path - return the only backend url.
|
||||
bu := bus[0]
|
||||
bu.get()
|
||||
return bu
|
||||
}
|
||||
|
||||
// Slow path - select other backend urls.
|
||||
n := atomicCounter.Add(1) - 1
|
||||
for i := range uint32(len(bus)) {
|
||||
for i := uint32(0); i < uint32(len(bus)); i++ {
|
||||
idx := (n + i) % uint32(len(bus))
|
||||
bu := bus[idx]
|
||||
if bu.isBroken() {
|
||||
@@ -659,7 +486,7 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
|
||||
// The Load() in front of CompareAndSwap() avoids CAS overhead for items with values bigger than 0.
|
||||
if bu.concurrentRequests.Load() == 0 && bu.concurrentRequests.CompareAndSwap(0, 1) {
|
||||
atomicCounter.CompareAndSwap(n+1, idx+1)
|
||||
// There is no need in the call bu.get(), because we already incremented bu.concurrentRequests above.
|
||||
// There is no need in the call bu.get(), because we already incremented bu.concrrentRequests above.
|
||||
return bu
|
||||
}
|
||||
}
|
||||
@@ -667,7 +494,7 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
|
||||
// Slow path - return the backend with the minimum number of concurrently executed requests.
|
||||
buMinIdx := n % uint32(len(bus))
|
||||
minRequests := bus[buMinIdx].concurrentRequests.Load()
|
||||
for i := uint32(1); i < uint32(len(bus)); i++ {
|
||||
for i := uint32(0); i < uint32(len(bus)); i++ {
|
||||
idx := (n + i) % uint32(len(bus))
|
||||
bu := bus[idx]
|
||||
if bu.isBroken() {
|
||||
@@ -681,12 +508,6 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
|
||||
}
|
||||
}
|
||||
buMin := bus[buMinIdx]
|
||||
if buMin.isBroken() {
|
||||
// If all backendURLs are broken, then returns the first backendURL.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10837#issuecomment-4307050980.
|
||||
firstBu.get()
|
||||
return firstBu
|
||||
}
|
||||
buMin.get()
|
||||
atomicCounter.CompareAndSwap(n+1, buMinIdx+1)
|
||||
return buMin
|
||||
@@ -805,9 +626,11 @@ func initAuthConfig() {
|
||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||
|
||||
stopCh = make(chan struct{})
|
||||
authConfigWG.Go(func() {
|
||||
authConfigWG.Add(1)
|
||||
go func() {
|
||||
defer authConfigWG.Done()
|
||||
authConfigReloader(sighupCh)
|
||||
})
|
||||
}()
|
||||
}
|
||||
|
||||
func stopAuthConfig() {
|
||||
@@ -863,9 +686,6 @@ var (
|
||||
// authUsers contains the currently loaded auth users
|
||||
authUsers atomic.Pointer[map[string]*UserInfo]
|
||||
|
||||
// jwt authentication cache
|
||||
jwtAuthCache atomic.Pointer[jwtCache]
|
||||
|
||||
authConfigWG sync.WaitGroup
|
||||
stopCh chan struct{}
|
||||
)
|
||||
@@ -882,7 +702,7 @@ func reloadAuthConfig() (bool, error) {
|
||||
|
||||
ok, err := reloadAuthConfigData(data)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse -auth.config=%q: %w", *authConfigPath, err)
|
||||
return false, fmt.Errorf("failed to pars -auth.config=%q: %w", *authConfigPath, err)
|
||||
}
|
||||
if !ok {
|
||||
return false, nil
|
||||
@@ -905,16 +725,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
||||
return false, fmt.Errorf("failed to parse auth config: %w", err)
|
||||
}
|
||||
|
||||
jui, oidcDP, err := parseJWTUsers(ac)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse JWT users from auth config: %w", err)
|
||||
}
|
||||
oidcDP.startDiscovery()
|
||||
jwtc := &jwtCache{
|
||||
users: jui,
|
||||
oidcDP: oidcDP,
|
||||
}
|
||||
|
||||
m, err := parseAuthConfigUsers(ac)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse users from auth config: %w", err)
|
||||
@@ -922,24 +732,13 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
||||
|
||||
acPrev := authConfig.Load()
|
||||
if acPrev != nil {
|
||||
acPrev.UnauthorizedUser.stopHealthChecks()
|
||||
for i := range acPrev.Users {
|
||||
acPrev.Users[i].stopHealthChecks()
|
||||
}
|
||||
|
||||
metrics.UnregisterSet(acPrev.ms, true)
|
||||
}
|
||||
metrics.RegisterSet(ac.ms)
|
||||
|
||||
jwtcPrev := jwtAuthCache.Load()
|
||||
if jwtcPrev != nil {
|
||||
jwtcPrev.oidcDP.stopDiscovery()
|
||||
}
|
||||
|
||||
authConfig.Store(ac)
|
||||
authConfigData.Store(&data)
|
||||
authUsers.Store(&m)
|
||||
jwtAuthCache.Store(jwtc)
|
||||
|
||||
return true, nil
|
||||
}
|
||||
@@ -964,18 +763,12 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
||||
if ui.BearerToken != "" {
|
||||
return nil, fmt.Errorf("field bearer_token can't be specified for unauthorized_user section")
|
||||
}
|
||||
if ui.JWT != nil {
|
||||
return nil, fmt.Errorf("field jwt can't be specified for unauthorized_user section")
|
||||
}
|
||||
if ui.AuthToken != "" {
|
||||
return nil, fmt.Errorf("field auth_token can't be specified for unauthorized_user section")
|
||||
}
|
||||
if ui.Name != "" {
|
||||
return nil, fmt.Errorf("field name can't be specified for unauthorized_user section")
|
||||
}
|
||||
if err := parseJWTPlaceholdersForUserInfo(ui, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := ui.initURLs(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -985,8 +778,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
||||
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
|
||||
}
|
||||
ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
|
||||
ui.requestErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_errors_total` + metricLabels)
|
||||
ui.backendRequests = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_requests_total` + metricLabels)
|
||||
ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
|
||||
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
|
||||
@@ -1016,27 +807,16 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
||||
}
|
||||
for i := range uis {
|
||||
ui := &uis[i]
|
||||
// users with jwt tokens are parsed by parseJWTUsers function.
|
||||
// the function also checks that users with jwt tokens do not have auth tokens, bearer tokens, usernames and passwords.
|
||||
if ui.JWT != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
ats, err := getAuthTokens(ui.AuthToken, ui.BearerToken, ui.Username, ui.Password)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, at := range ats {
|
||||
if uiOld := byAuthToken[at]; uiOld != nil {
|
||||
return nil, fmt.Errorf("duplicate auth token=%q found for username=%q, name=%q; the previous one is set for username=%q, name=%q",
|
||||
at, ui.Username, ui.Name, uiOld.Username, uiOld.Name)
|
||||
}
|
||||
}
|
||||
|
||||
if err := parseJWTPlaceholdersForUserInfo(ui, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := ui.initURLs(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -1046,8 +826,6 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
||||
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||
}
|
||||
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
||||
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
|
||||
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
|
||||
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
||||
mcr := ui.getMaxConcurrentRequests()
|
||||
@@ -1136,7 +914,6 @@ func (ui *UserInfo) initURLs() error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, e := range ui.URLMaps {
|
||||
if len(e.SrcPaths) == 0 && len(e.SrcHosts) == 0 && len(e.SrcQueryArgs) == 0 && len(e.SrcHeaders) == 0 {
|
||||
return fmt.Errorf("missing `src_paths`, `src_hosts`, `src_query_args` and `src_headers` in `url_map`")
|
||||
@@ -1196,9 +973,6 @@ func (ui *UserInfo) name() string {
|
||||
h := xxhash.Sum64([]byte(ui.AuthToken))
|
||||
return fmt.Sprintf("auth_token:hash:%016X", h)
|
||||
}
|
||||
if ui.JWT != nil {
|
||||
return `jwt`
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -1286,11 +1060,13 @@ func (up *URLPrefix) sanitizeAndInitialize() error {
|
||||
}
|
||||
|
||||
// Initialize up.bus
|
||||
bus := newBackendURLs()
|
||||
for _, bu := range up.busOriginal {
|
||||
bus.add(bu)
|
||||
bus := make([]*backendURL, len(up.busOriginal))
|
||||
for i, bu := range up.busOriginal {
|
||||
bus[i] = &backendURL{
|
||||
url: bu,
|
||||
}
|
||||
}
|
||||
up.bus.Store(bus)
|
||||
up.bus.Store(&bus)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -4,11 +4,8 @@ import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
@@ -279,50 +276,6 @@ users:
|
||||
url_prefix: http://foo.bar
|
||||
metric_labels:
|
||||
not-prometheus-compatible: value
|
||||
`)
|
||||
// placeholder in url_prefix
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_prefix: 'http://ahost/{{a_placeholder}}/foobar'
|
||||
`)
|
||||
// placeholder in a header
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
headers:
|
||||
- 'X-Foo: {{a_placeholder}}'
|
||||
url_prefix: 'http://ahost'
|
||||
`)
|
||||
// placeholder in url_prefix
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_prefix: 'http://ahost/{{a_placeholder}}/foobar'
|
||||
`)
|
||||
// placeholder in a header in url_map
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_map:
|
||||
- src_paths: ["/select/.*"]
|
||||
headers:
|
||||
- 'X-Foo: {{a_placeholder}}'
|
||||
url_prefix: 'http://ahost'
|
||||
`)
|
||||
|
||||
// placeholder in a header in url_map
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_map:
|
||||
- src_paths: ["/select/.*"]
|
||||
url_prefix: 'http://ahost/{{a_placeholder}}/foobar'
|
||||
`)
|
||||
}
|
||||
|
||||
@@ -425,7 +378,7 @@ users:
|
||||
RetryStatusCodes: []int{500, 501},
|
||||
LoadBalancingPolicy: "first_available",
|
||||
MergeQueryArgs: []string{"foo", "bar"},
|
||||
DropSrcPathPrefixParts: new(1),
|
||||
DropSrcPathPrefixParts: intp(1),
|
||||
DiscoverBackendIPs: &discoverBackendIPsTrue,
|
||||
},
|
||||
}, nil)
|
||||
@@ -668,47 +621,6 @@ unauthorized_user:
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// skip user info with jwt, it is parsed by parseJWTUsers
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_prefix: http://aaa:343/bbb
|
||||
- jwt: {skip_verify: true}
|
||||
url_prefix: http://aaa:343/bbb
|
||||
`, map[string]*UserInfo{
|
||||
getHTTPAuthBasicToken("foo", "bar"): {
|
||||
Username: "foo",
|
||||
Password: "bar",
|
||||
URLPrefix: mustParseURL("http://aaa:343/bbb"),
|
||||
},
|
||||
}, nil)
|
||||
|
||||
// Multiple users with access logs enabled
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
url_prefix: http://foo
|
||||
access_log: {}
|
||||
- username: bar
|
||||
url_prefix: https://bar/x/
|
||||
access_log:
|
||||
filters:
|
||||
skip_status_codes: [404]
|
||||
`, map[string]*UserInfo{
|
||||
getHTTPAuthBasicToken("foo", ""): {
|
||||
Username: "foo",
|
||||
URLPrefix: mustParseURL("http://foo"),
|
||||
AccessLog: &AccessLog{},
|
||||
},
|
||||
getHTTPAuthBasicToken("bar", ""): {
|
||||
Username: "bar",
|
||||
URLPrefix: mustParseURL("https://bar/x/"),
|
||||
AccessLog: &AccessLog{Filters: &AccessLogFilters{SkipStatusCodes: []int{404}}},
|
||||
},
|
||||
}, nil)
|
||||
|
||||
}
|
||||
|
||||
func TestParseAuthConfigPassesTLSVerificationConfig(t *testing.T) {
|
||||
@@ -841,7 +753,7 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
|
||||
up.loadBalancingPolicy = "least_loaded"
|
||||
|
||||
pbus := up.bus.Load()
|
||||
bus := pbus.bus
|
||||
bus := *pbus
|
||||
|
||||
fn := func(ns ...int) {
|
||||
t.Helper()
|
||||
@@ -913,13 +825,13 @@ func TestBrokenBackend(t *testing.T) {
|
||||
})
|
||||
up.loadBalancingPolicy = "least_loaded"
|
||||
pbus := up.bus.Load()
|
||||
bus := pbus.bus
|
||||
bus := *pbus
|
||||
|
||||
// explicitly mark one of the backends as broken
|
||||
bus[1].setBroken()
|
||||
|
||||
// broken backend should never return while there are healthy backends
|
||||
for range int(1e3) {
|
||||
for i := 0; i < 1e3; i++ {
|
||||
b := up.getBackendURL()
|
||||
if b.isBroken() {
|
||||
t.Fatalf("unexpected broken backend %q", b.url)
|
||||
@@ -936,7 +848,7 @@ func TestDiscoverBackendIPsWithIPV6(t *testing.T) {
|
||||
|
||||
up.discoverBackendAddrsIfNeeded()
|
||||
pbus := up.bus.Load()
|
||||
bus := pbus.bus
|
||||
bus := *pbus
|
||||
|
||||
if len(bus) != 1 {
|
||||
t.Fatalf("expected url list to be of size 1; got %d instead", len(bus))
|
||||
@@ -996,68 +908,6 @@ func TestDiscoverBackendIPsWithIPV6(t *testing.T) {
|
||||
|
||||
}
|
||||
|
||||
func TestLogRequest(t *testing.T) {
|
||||
ui := &UserInfo{AccessLog: &AccessLog{}}
|
||||
|
||||
testOutput := &bytes.Buffer{}
|
||||
logger.SetOutputForTests(testOutput)
|
||||
defer logger.ResetOutputForTest()
|
||||
|
||||
req, err := http.NewRequest("GET", "http://localhost:8080/select/0/prometheus", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
f := func(user string, status int, duration time.Duration, expectedLog string) {
|
||||
t.Helper()
|
||||
|
||||
testOutput.Reset()
|
||||
ui.logRequest(req, user, status, duration)
|
||||
|
||||
got := testOutput.String()
|
||||
if expectedLog == "" && got != "" {
|
||||
t.Fatalf("expected empty log, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, expectedLog) {
|
||||
t.Fatalf("output \n%q \nshould contain \n%q", testOutput.String(), expectedLog)
|
||||
}
|
||||
}
|
||||
|
||||
f("foo", 200, 10*time.Millisecond, `access_log request_host="localhost:8080" request_uri="" status_code=200 remote_addr="" user_agent="" referer="" duration_ms=10 username="foo"`)
|
||||
f("foo", 404, time.Second, `access_log request_host="localhost:8080" request_uri="" status_code=404 remote_addr="" user_agent="" referer="" duration_ms=1000 username="foo"`)
|
||||
|
||||
ui.AccessLog.Filters = &AccessLogFilters{SkipStatusCodes: []int{200}}
|
||||
f("foo", 200, 10*time.Millisecond, ``)
|
||||
f("foo", 404, 10*time.Millisecond, `access_log request_host="localhost:8080" request_uri="" status_code=404 remote_addr="" user_agent="" referer="" duration_ms=10 username="foo"`)
|
||||
}
|
||||
|
||||
func TestGetFirstAvailableBackend(t *testing.T) {
|
||||
f := func(broken []bool, expectedIdx int) {
|
||||
t.Helper()
|
||||
bus := make([]*backendURL, len(broken))
|
||||
for i := range broken {
|
||||
bus[i] = &backendURL{
|
||||
url: &url.URL{Host: fmt.Sprintf("server-%d", i)},
|
||||
}
|
||||
bus[i].broken.Store(broken[i])
|
||||
}
|
||||
bu := getFirstAvailableBackendURL(bus)
|
||||
if bu == nil {
|
||||
t.Fatalf("unexpected nil backend")
|
||||
}
|
||||
if bu.url.Host != fmt.Sprintf("server-%d", expectedIdx) {
|
||||
t.Fatalf("unexpected backend, expected server-%d, got %s", expectedIdx, bu.url.Host)
|
||||
}
|
||||
}
|
||||
|
||||
f([]bool{false, false, false}, 0)
|
||||
f([]bool{true, true, false}, 2)
|
||||
// all backend are broken, then return the first one.
|
||||
f([]bool{true, true, true}, 0)
|
||||
f([]bool{true}, 0)
|
||||
|
||||
}
|
||||
|
||||
func getRegexs(paths []string) []*Regex {
|
||||
var sps []*Regex
|
||||
for _, path := range paths {
|
||||
@@ -1092,14 +942,16 @@ func mustParseURL(u string) *URLPrefix {
|
||||
}
|
||||
|
||||
func mustParseURLs(us []string) *URLPrefix {
|
||||
bus := newBackendURLs()
|
||||
bus := make([]*backendURL, len(us))
|
||||
urls := make([]*url.URL, len(us))
|
||||
for i, u := range us {
|
||||
pu, err := url.Parse(u)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("BUG: cannot parse %q: %w", u, err))
|
||||
}
|
||||
bus.add(pu)
|
||||
bus[i] = &backendURL{
|
||||
url: pu,
|
||||
}
|
||||
urls[i] = pu
|
||||
}
|
||||
up := &URLPrefix{}
|
||||
@@ -1108,11 +960,15 @@ func mustParseURLs(us []string) *URLPrefix {
|
||||
} else {
|
||||
up.vOriginal = us
|
||||
}
|
||||
up.bus.Store(bus)
|
||||
up.bus.Store(&bus)
|
||||
up.busOriginal = urls
|
||||
return up
|
||||
}
|
||||
|
||||
func intp(n int) *int {
|
||||
return &n
|
||||
}
|
||||
|
||||
func mustNewRegex(s string) *Regex {
|
||||
var re Regex
|
||||
if err := yaml.Unmarshal([]byte(s), &re); err != nil {
|
||||
|
||||
@@ -116,20 +116,6 @@ users:
|
||||
- "http://default1:8888/unsupported_url_handler"
|
||||
- "http://default2:8888/unsupported_url_handler"
|
||||
|
||||
# A JWT token based routing:
|
||||
# - Requests with JWT token that has the following structure:
|
||||
# {"team": "ops", "security": {"read_access": "1"}, "vm_access": {"metrics_account_id": 1000,"metrics_project_id":5}}
|
||||
# is routed to vmselect nodes and request url placeholder replaced with metrics tenant identificators
|
||||
- name: jwt-opts-team
|
||||
jwt:
|
||||
match_claims:
|
||||
team: ops
|
||||
security.read_access: "1"
|
||||
skip_verify: true
|
||||
url_prefix:
|
||||
- "http://vmselect1:8481/select/{{.MetricsTenant}}/prometheus"
|
||||
- "http://vmselect2:8481/select/{{.MetricsTenant}}/prometheus"
|
||||
|
||||
# Requests without Authorization header are proxied according to `unauthorized_user` section.
|
||||
# Requests are proxied in round-robin fashion between `url_prefix` backends.
|
||||
# The deny_partial_response query arg is added to all the proxied requests.
|
||||
@@ -139,8 +125,3 @@ unauthorized_user:
|
||||
- http://vmselect-az1/?deny_partial_response=1
|
||||
- http://vmselect-az2/?deny_partial_response=1
|
||||
retry_status_codes: [503, 500]
|
||||
# log access for requests routed to this user
|
||||
access_log:
|
||||
filters:
|
||||
# except requests with Status Codes below
|
||||
skip_status_codes: [200, 202]
|
||||
|
||||
@@ -1,486 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
metricsTenantPlaceholder = `{{.MetricsTenant}}`
|
||||
metricsExtraLabelsPlaceholder = `{{.MetricsExtraLabels}}`
|
||||
metricsExtraFiltersPlaceholder = `{{.MetricsExtraFilters}}`
|
||||
|
||||
logsAccountIDPlaceholder = `{{.LogsAccountID}}`
|
||||
logsProjectIDPlaceholder = `{{.LogsProjectID}}`
|
||||
logsExtraFiltersPlaceholder = `{{.LogsExtraFilters}}`
|
||||
logsExtraStreamFiltersPlaceholder = `{{.LogsExtraStreamFilters}}`
|
||||
|
||||
placeholderPrefix = `{{`
|
||||
)
|
||||
|
||||
var allPlaceholders = []string{
|
||||
metricsTenantPlaceholder,
|
||||
metricsExtraLabelsPlaceholder,
|
||||
metricsExtraFiltersPlaceholder,
|
||||
logsAccountIDPlaceholder,
|
||||
logsProjectIDPlaceholder,
|
||||
logsExtraFiltersPlaceholder,
|
||||
logsExtraStreamFiltersPlaceholder,
|
||||
}
|
||||
|
||||
var urlPathPlaceHolders = []string{
|
||||
metricsTenantPlaceholder,
|
||||
logsAccountIDPlaceholder,
|
||||
logsProjectIDPlaceholder,
|
||||
}
|
||||
|
||||
type jwtCache struct {
|
||||
// users contain UserInfo`s from AuthConfig with JWTConfig set
|
||||
users []*UserInfo
|
||||
|
||||
oidcDP *oidcDiscovererPool
|
||||
}
|
||||
|
||||
type JWTConfig struct {
|
||||
PublicKeys []string `yaml:"public_keys,omitempty"`
|
||||
PublicKeyFiles []string `yaml:"public_key_files,omitempty"`
|
||||
SkipVerify bool `yaml:"skip_verify,omitempty"`
|
||||
OIDC *oidcConfig `yaml:"oidc,omitempty"`
|
||||
MatchClaims map[string]string `yaml:"match_claims,omitempty"`
|
||||
parsedMatchClaims []*jwt.Claim
|
||||
|
||||
// verifierPool is used to verify JWT tokens.
|
||||
// It is initialized from PublicKeys and/or PublicKeyFiles.
|
||||
// In this case, it is initialized once at config reload and never updated until next reload
|
||||
// In case of OIDC, it is initialized on config reload and periodically updated by discovery process.
|
||||
verifierPool atomic.Pointer[jwt.VerifierPool]
|
||||
}
|
||||
|
||||
func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
jui := make([]*UserInfo, 0, len(ac.Users))
|
||||
oidcDP := &oidcDiscovererPool{}
|
||||
|
||||
uniqClaims := make(map[string]*UserInfo)
|
||||
var sortedClaims []string
|
||||
for idx, ui := range ac.Users {
|
||||
jwtToken := ui.JWT
|
||||
if jwtToken == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if ui.AuthToken != "" || ui.BearerToken != "" || ui.Username != "" || ui.Password != "" {
|
||||
return nil, nil, fmt.Errorf("auth_token, bearer_token, username and password cannot be specified if jwt is set")
|
||||
}
|
||||
if len(jwtToken.PublicKeys) == 0 && len(jwtToken.PublicKeyFiles) == 0 && !jwtToken.SkipVerify && jwtToken.OIDC == nil {
|
||||
return nil, nil, fmt.Errorf("jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true")
|
||||
}
|
||||
var claimsString string
|
||||
sortedClaims = sortedClaims[:0]
|
||||
parsedClaims := make([]*jwt.Claim, 0, len(jwtToken.MatchClaims))
|
||||
for ck, cv := range jwtToken.MatchClaims {
|
||||
sortedClaims = append(sortedClaims, fmt.Sprintf("%s=%s", ck, cv))
|
||||
pc, err := jwt.NewClaim(ck, cv)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("incorrect match claim, key=%q, value regex=%q: %w", ck, cv, err)
|
||||
}
|
||||
parsedClaims = append(parsedClaims, pc)
|
||||
}
|
||||
ui.JWT.parsedMatchClaims = parsedClaims
|
||||
sort.Strings(sortedClaims)
|
||||
claimsString = strings.Join(sortedClaims, ",")
|
||||
|
||||
if oldUI, ok := uniqClaims[claimsString]; ok {
|
||||
return nil, nil, fmt.Errorf("duplicate match claims=%q found for name=%q at idx=%d; the previous one is set for name=%q", claimsString, ui.Name, idx, oldUI.Name)
|
||||
}
|
||||
uniqClaims[claimsString] = &ui
|
||||
if len(jwtToken.PublicKeys) > 0 || len(jwtToken.PublicKeyFiles) > 0 {
|
||||
keys := make([]any, 0, len(jwtToken.PublicKeys)+len(jwtToken.PublicKeyFiles))
|
||||
|
||||
for i := range jwtToken.PublicKeys {
|
||||
k, err := jwt.ParseKey([]byte(jwtToken.PublicKeys[i]))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
keys = append(keys, k)
|
||||
}
|
||||
|
||||
for _, filePath := range jwtToken.PublicKeyFiles {
|
||||
keyData, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot read public key from file %q: %w", filePath, err)
|
||||
}
|
||||
k, err := jwt.ParseKey(keyData)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot parse public key from file %q: %w", filePath, err)
|
||||
}
|
||||
keys = append(keys, k)
|
||||
}
|
||||
|
||||
vp, err := jwt.NewVerifierPool(keys)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
jwtToken.verifierPool.Store(vp)
|
||||
}
|
||||
if jwtToken.OIDC != nil {
|
||||
if len(jwtToken.PublicKeys) > 0 || len(jwtToken.PublicKeyFiles) > 0 || jwtToken.SkipVerify {
|
||||
return nil, nil, fmt.Errorf("jwt with oidc cannot contain public keys or have skip_verify=true")
|
||||
}
|
||||
|
||||
if jwtToken.OIDC.Issuer == "" {
|
||||
return nil, nil, fmt.Errorf("oidc issuer cannot be empty")
|
||||
}
|
||||
isserURL, err := url.Parse(jwtToken.OIDC.Issuer)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("oidc issuer %q must be a valid URL", jwtToken.OIDC.Issuer)
|
||||
}
|
||||
if isserURL.Scheme != "https" && isserURL.Scheme != "http" {
|
||||
return nil, nil, fmt.Errorf("oidc issuer %q must have http or https scheme", jwtToken.OIDC.Issuer)
|
||||
}
|
||||
|
||||
oidcDP.createOrAdd(ui.JWT.OIDC.Issuer, &ui.JWT.verifierPool)
|
||||
}
|
||||
|
||||
if err := parseJWTPlaceholdersForUserInfo(&ui, true); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
if err := ui.initURLs(); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
metricLabels, err := ui.getMetricLabels()
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||
}
|
||||
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
||||
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
|
||||
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
|
||||
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
||||
mcr := ui.getMaxConcurrentRequests()
|
||||
ui.concurrencyLimitCh = make(chan struct{}, mcr)
|
||||
ui.concurrencyLimitReached = ac.ms.GetOrCreateCounter(`vmauth_user_concurrent_requests_limit_reached_total` + metricLabels)
|
||||
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_capacity`+metricLabels, func() float64 {
|
||||
return float64(cap(ui.concurrencyLimitCh))
|
||||
})
|
||||
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_current`+metricLabels, func() float64 {
|
||||
return float64(len(ui.concurrencyLimitCh))
|
||||
})
|
||||
|
||||
rt, err := newRoundTripper(ui.TLSCAFile, ui.TLSCertFile, ui.TLSKeyFile, ui.TLSServerName, ui.TLSInsecureSkipVerify)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot initialize HTTP RoundTripper: %w", err)
|
||||
}
|
||||
ui.rt = rt
|
||||
|
||||
jui = append(jui, &ui)
|
||||
}
|
||||
|
||||
// sort by amount of matching claims
|
||||
// it allows to more specific claim win in case of clash
|
||||
sort.SliceStable(jui, func(i, j int) bool {
|
||||
return len(jui[i].JWT.MatchClaims) > len(jui[j].JWT.MatchClaims)
|
||||
})
|
||||
|
||||
return jui, oidcDP, nil
|
||||
}
|
||||
|
||||
var tokenPool sync.Pool
|
||||
|
||||
func getToken() *jwt.Token {
|
||||
tkn := tokenPool.Get()
|
||||
if tkn == nil {
|
||||
return &jwt.Token{}
|
||||
}
|
||||
return tkn.(*jwt.Token)
|
||||
}
|
||||
|
||||
func putToken(tkn *jwt.Token) {
|
||||
tkn.Reset()
|
||||
tokenPool.Put(tkn)
|
||||
}
|
||||
|
||||
func getJWTUserInfo(ats []string) (*UserInfo, *jwt.Token) {
|
||||
js := *jwtAuthCache.Load()
|
||||
if len(js.users) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
tkn := getToken()
|
||||
|
||||
for _, at := range ats {
|
||||
if strings.Count(at, ".") != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
at, _ = strings.CutPrefix(at, `http_auth:`)
|
||||
tkn.Reset()
|
||||
if err := tkn.Parse(at, true); err != nil {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("cannot parse jwt token: %s", err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if tkn.IsExpired(time.Now()) {
|
||||
if *logInvalidAuthTokens {
|
||||
// TODO: add more context:
|
||||
// token claims with issuer
|
||||
logger.Infof("jwt token is expired")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if ui := getUserInfoByJWTToken(tkn, js.users); ui != nil {
|
||||
return ui, tkn
|
||||
}
|
||||
}
|
||||
|
||||
putToken(tkn)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func getUserInfoByJWTToken(tkn *jwt.Token, users []*UserInfo) *UserInfo {
|
||||
for _, ui := range users {
|
||||
if !tkn.MatchClaims(ui.JWT.parsedMatchClaims) {
|
||||
continue
|
||||
}
|
||||
|
||||
if ui.JWT.SkipVerify {
|
||||
return ui
|
||||
}
|
||||
|
||||
if ui.JWT.OIDC != nil {
|
||||
// OIDC requires iss claim.
|
||||
// It must match the discovery issuer URL set in OIDC config.
|
||||
// https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderMetadata
|
||||
if tkn.Issuer() == "" {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("jwt token must have issuer filed")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if tkn.Issuer() != ui.JWT.OIDC.Issuer {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("jwt token issuer: %q does not match oidc issuer: %q", tkn.Issuer(), ui.JWT.OIDC.Issuer)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
vp := ui.JWT.verifierPool.Load()
|
||||
if vp == nil {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("jwt verifier not initialed")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := vp.Verify(tkn); err != nil {
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("cannot verify jwt token: %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return ui
|
||||
}
|
||||
|
||||
if *logInvalidAuthTokens {
|
||||
logger.Infof("no user match jwt token")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func replaceJWTPlaceholders(bu *backendURL, hc HeadersConf, vma *jwt.VMAccessClaim) (*url.URL, HeadersConf) {
|
||||
if !bu.hasPlaceHolders && !hc.hasAnyPlaceHolders {
|
||||
return bu.url, hc
|
||||
}
|
||||
targetURL := bu.url
|
||||
data := jwtClaimsData(vma)
|
||||
if bu.hasPlaceHolders {
|
||||
// template url params and request path
|
||||
// make a copy of url
|
||||
uCopy := *bu.url
|
||||
for _, uph := range urlPathPlaceHolders {
|
||||
replacement := data[uph]
|
||||
uCopy.Path = strings.ReplaceAll(uCopy.Path, uph, replacement[0])
|
||||
}
|
||||
query := uCopy.Query()
|
||||
var foundAnyQueryPlaceholder bool
|
||||
var templatedValues []string
|
||||
for param, values := range query {
|
||||
templatedValues = templatedValues[:0]
|
||||
// filter in-place values with placeholders
|
||||
// and accumulate replacements
|
||||
// it will change the order of param values
|
||||
// but it's not guaranteed
|
||||
// and will be changed in any way with multiple arg templates
|
||||
var cnt int
|
||||
for _, value := range values {
|
||||
if dv, ok := data[value]; ok {
|
||||
foundAnyQueryPlaceholder = true
|
||||
templatedValues = append(templatedValues, dv...)
|
||||
continue
|
||||
}
|
||||
values[cnt] = value
|
||||
cnt++
|
||||
}
|
||||
values = values[:cnt]
|
||||
values = append(values, templatedValues...)
|
||||
query[param] = values
|
||||
}
|
||||
if foundAnyQueryPlaceholder {
|
||||
uCopy.RawQuery = query.Encode()
|
||||
}
|
||||
targetURL = &uCopy
|
||||
}
|
||||
if hc.hasAnyPlaceHolders {
|
||||
// make a copy of headers and update only values with placeholder
|
||||
rhs := make([]*Header, 0, len(hc.RequestHeaders))
|
||||
for _, rh := range hc.RequestHeaders {
|
||||
if dv, ok := data[rh.Value]; ok {
|
||||
rh := &Header{
|
||||
Name: rh.Name,
|
||||
Value: strings.Join(dv, ","),
|
||||
}
|
||||
rhs = append(rhs, rh)
|
||||
continue
|
||||
}
|
||||
rhs = append(rhs, rh)
|
||||
}
|
||||
hc.RequestHeaders = rhs
|
||||
}
|
||||
|
||||
return targetURL, hc
|
||||
}
|
||||
|
||||
func jwtClaimsData(vma *jwt.VMAccessClaim) map[string][]string {
|
||||
data := map[string][]string{
|
||||
// TODO: optimize at parsing stage
|
||||
metricsTenantPlaceholder: {fmt.Sprintf("%d:%d", vma.MetricsAccountID, vma.MetricsProjectID)},
|
||||
metricsExtraLabelsPlaceholder: vma.MetricsExtraLabels,
|
||||
metricsExtraFiltersPlaceholder: vma.MetricsExtraFilters,
|
||||
|
||||
// TODO: optimize at parsing stage
|
||||
logsAccountIDPlaceholder: {fmt.Sprintf("%d", vma.LogsAccountID)},
|
||||
logsProjectIDPlaceholder: {fmt.Sprintf("%d", vma.LogsProjectID)},
|
||||
logsExtraFiltersPlaceholder: vma.LogsExtraFilters,
|
||||
logsExtraStreamFiltersPlaceholder: vma.LogsExtraStreamFilters,
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
func parseJWTPlaceholdersForUserInfo(ui *UserInfo, isAllowed bool) error {
|
||||
if ui.URLPrefix != nil {
|
||||
if err := validateJWTPlaceholdersForURL(ui.URLPrefix, isAllowed); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := parsePlaceholdersForHC(&ui.HeadersConf, isAllowed); err != nil {
|
||||
return err
|
||||
}
|
||||
if ui.DefaultURL != nil {
|
||||
if err := validateJWTPlaceholdersForURL(ui.DefaultURL, isAllowed); err != nil {
|
||||
return fmt.Errorf("invalid `default_url` placeholders: %w", err)
|
||||
}
|
||||
}
|
||||
for i := range ui.URLMaps {
|
||||
e := &ui.URLMaps[i]
|
||||
if e.URLPrefix != nil {
|
||||
if err := validateJWTPlaceholdersForURL(e.URLPrefix, isAllowed); err != nil {
|
||||
return fmt.Errorf("invalid `url_map` `url_prefix` placeholders: %w", err)
|
||||
}
|
||||
}
|
||||
if err := parsePlaceholdersForHC(&e.HeadersConf, isAllowed); err != nil {
|
||||
return fmt.Errorf("invalid `url_map` headers placeholders: %w", err)
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateJWTPlaceholdersForURL(up *URLPrefix, isAllowed bool) error {
|
||||
for _, bu := range up.busOriginal {
|
||||
ok := strings.Contains(bu.Path, placeholderPrefix)
|
||||
if ok && !isAllowed {
|
||||
return fmt.Errorf("placeholder: %q is only allowed at JWT token context", bu.Path)
|
||||
}
|
||||
if ok {
|
||||
p := bu.Path
|
||||
for _, ph := range allPlaceholders {
|
||||
p = strings.ReplaceAll(p, ph, ``)
|
||||
}
|
||||
if strings.Contains(p, placeholderPrefix) {
|
||||
return fmt.Errorf("invalid placeholder found in URL request path: %q, supported values are: %s", bu.Path, strings.Join(allPlaceholders, ", "))
|
||||
|
||||
}
|
||||
}
|
||||
for param, values := range bu.Query() {
|
||||
for _, value := range values {
|
||||
ok := strings.Contains(value, placeholderPrefix)
|
||||
if ok && !isAllowed {
|
||||
return fmt.Errorf("query param: %q with placeholder: %q is only allowed at JWT token context", param, value)
|
||||
}
|
||||
if ok {
|
||||
// possible placeholder
|
||||
if !slices.Contains(allPlaceholders, value) {
|
||||
return fmt.Errorf("query param: %q has unsupported placeholder string: %q, supported values are: %s", param, value, strings.Join(allPlaceholders, ", "))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func parsePlaceholdersForHC(hc *HeadersConf, isAllowed bool) error {
|
||||
for _, rhs := range hc.RequestHeaders {
|
||||
ok := strings.Contains(rhs.Value, placeholderPrefix)
|
||||
if ok && !isAllowed {
|
||||
return fmt.Errorf("request header: %q placeholder: %q is only supported at JWT context", rhs.Name, rhs.Value)
|
||||
}
|
||||
if ok {
|
||||
if !slices.Contains(allPlaceholders, rhs.Value) {
|
||||
return fmt.Errorf("request header: %q has unsupported placeholder: %q, supported values are: %s", rhs.Name, rhs.Value, strings.Join(allPlaceholders, ", "))
|
||||
}
|
||||
hc.hasAnyPlaceHolders = true
|
||||
}
|
||||
}
|
||||
for _, rhs := range hc.ResponseHeaders {
|
||||
if strings.Contains(rhs.Value, placeholderPrefix) {
|
||||
return fmt.Errorf("response header placeholders are not supported; found placeholder prefix at header: %q with value: %q", rhs.Name, rhs.Value)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasAnyPlaceholders(u *url.URL) bool {
|
||||
if strings.Contains(u.Path, placeholderPrefix) {
|
||||
return true
|
||||
}
|
||||
if len(u.Query()) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, values := range u.Query() {
|
||||
for _, value := range values {
|
||||
if strings.HasPrefix(value, placeholderPrefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -1,503 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestJWTParseAuthConfigFailure(t *testing.T) {
|
||||
validRSAPublicKey := `-----BEGIN PUBLIC KEY-----
|
||||
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiX7oPWKOWRQsGFEWvwZO
|
||||
mL2PYsdYUsu9nr0qtPCjxQHUJgLfT3rdKlvKpPFYv7ZmKnqTncg36Wz9uiYmWJ7e
|
||||
IB5Z+fko8kVIMzarCqVvpAJDzYF/pUii68xvuYoK3L9TIOAeyCXv+prwnr2IH+Mw
|
||||
9AONzWbRrYoO74XyTE9vMU5qmI/L1VPk+PR8lqPOSptLvzsfoaIk2ED4yK2nRB+6
|
||||
st+k4nccPqbErqHc8aiXnXfugfnr6b+NPFYUzKsDqkymGOokVijrI8B3jNw6c6Do
|
||||
zphk+D3wgLsXYHfMcZbXIMqffqm/aB8Qg88OpFOkQ3rd2p6R9+hacnZkfkn3Phiw
|
||||
yQIDAQAB
|
||||
-----END PUBLIC KEY-----
|
||||
`
|
||||
// ECDSA with the P-521 curve
|
||||
validECDSAPublicKey := `-----BEGIN PUBLIC KEY-----
|
||||
MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAU9RmtkCRuYTKCyvLlDn5DtBZOHSe
|
||||
QTa5j9q/oQVpCKqcXVFrH5dgh0GL+P/ZhkeuowPzCZqntGf0+7wPt9OxSJcADVJm
|
||||
dv92m540MXss8zdHf5qtE0gsu2Ved0R7Z8a8QwGZ/1mYZ+kFGGbdQTlSvRqDySTq
|
||||
XOtclIk1uhc03oL9nOQ=
|
||||
-----END PUBLIC KEY-----
|
||||
`
|
||||
|
||||
f := func(s string, expErr string) {
|
||||
t.Helper()
|
||||
ac, err := parseAuthConfig([]byte(s))
|
||||
if err != nil {
|
||||
if expErr != err.Error() {
|
||||
t.Fatalf("unexpected error; got\n%q\nwant\n%q", err.Error(), expErr)
|
||||
}
|
||||
return
|
||||
}
|
||||
users, oidcDP, err := parseJWTUsers(ac)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error; got %v", users)
|
||||
}
|
||||
if expErr != err.Error() {
|
||||
t.Fatalf("unexpected error; got\n%q\nwant \n%q", err.Error(), expErr)
|
||||
}
|
||||
if oidcDP != nil {
|
||||
t.Fatalf("expecting nil oidcDP; got %v", oidcDP)
|
||||
}
|
||||
}
|
||||
|
||||
// unauthorized_user cannot be used with jwt
|
||||
f(`
|
||||
unauthorized_user:
|
||||
jwt: {skip_verify: true}
|
||||
url_prefix: http://foo.bar
|
||||
`, `field jwt can't be specified for unauthorized_user section`)
|
||||
|
||||
// username and jwt in a single config
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
jwt: {skip_verify: true}
|
||||
url_prefix: http://foo.bar
|
||||
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
|
||||
// bearer_token and jwt in a single config
|
||||
f(`
|
||||
users:
|
||||
- bearer_token: foo
|
||||
jwt: {skip_verify: true}
|
||||
url_prefix: http://foo.bar
|
||||
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
|
||||
// bearer_token and jwt in a single config
|
||||
f(`
|
||||
users:
|
||||
- auth_token: "Foo token"
|
||||
jwt: {skip_verify: true}
|
||||
url_prefix: http://foo.bar
|
||||
`, `auth_token, bearer_token, username and password cannot be specified if jwt is set`)
|
||||
|
||||
// jwt public_keys or skip_verify must be set, part 1
|
||||
f(`
|
||||
users:
|
||||
- jwt: {}
|
||||
url_prefix: http://foo.bar
|
||||
`, `jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true`)
|
||||
|
||||
// jwt public_keys or skip_verify must be set, part 2
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_keys: null}
|
||||
url_prefix: http://foo.bar
|
||||
`, `jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true`)
|
||||
|
||||
// jwt public_keys or skip_verify must be set, part 3
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_keys: []}
|
||||
url_prefix: http://foo.bar
|
||||
`, `jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true`)
|
||||
|
||||
// jwt public_keys, public_key_files or skip_verify must be set
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_key_files: []}
|
||||
url_prefix: http://foo.bar
|
||||
`, `jwt must contain at least a single public key, public_key_files, oidc or have skip_verify=true`)
|
||||
|
||||
// invalid public key, part 1
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_keys: [""]}
|
||||
url_prefix: http://foo.bar
|
||||
`, `failed to parse key "": failed to decode PEM block containing public key`)
|
||||
|
||||
// invalid public key, part 2
|
||||
f(`
|
||||
users:
|
||||
- jwt: {public_keys: ["invalid"]}
|
||||
url_prefix: http://foo.bar
|
||||
`, `failed to parse key "invalid": failed to decode PEM block containing public key`)
|
||||
|
||||
// invalid public key, part 2
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
- %q
|
||||
- "invalid"
|
||||
url_prefix: http://foo.bar
|
||||
`, validRSAPublicKey, validECDSAPublicKey), `failed to parse key "invalid": failed to decode PEM block containing public key`)
|
||||
|
||||
// several jwt users
|
||||
// invalid public key, part 2
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, validRSAPublicKey, validECDSAPublicKey), `duplicate match claims="" found for name="" at idx=1; the previous one is set for name=""`)
|
||||
|
||||
// public key file doesn't exist
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
public_key_files:
|
||||
- /path/to/nonexistent/file.pem
|
||||
url_prefix: http://foo.bar
|
||||
`, "cannot read public key from file \"/path/to/nonexistent/file.pem\": open /path/to/nonexistent/file.pem: no such file or directory")
|
||||
|
||||
// public key file invalid
|
||||
// auth with key from file
|
||||
publicKeyFile := filepath.Join(t.TempDir(), "a_public_key.pem")
|
||||
if err := os.WriteFile(publicKeyFile, []byte(`invalidPEM`), 0o644); err != nil {
|
||||
t.Fatalf("failed to write public key file: %s", err)
|
||||
}
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
public_key_files:
|
||||
- `+publicKeyFile+`
|
||||
url_prefix: http://foo.bar
|
||||
`, "cannot parse public key from file \""+publicKeyFile+"\": failed to parse key \"invalidPEM\": failed to decode PEM block containing public key")
|
||||
|
||||
// unsupported placeholder in a header
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
url_prefix: http://foo.bar/{{.UnsupportedPlaceholder}}/foo`,
|
||||
"invalid placeholder found in URL request path: \"/{{.UnsupportedPlaceholder}}/foo\", supported values are: {{.MetricsTenant}}, {{.MetricsExtraLabels}}, {{.MetricsExtraFilters}}, {{.LogsAccountID}}, {{.LogsProjectID}}, {{.LogsExtraFilters}}, {{.LogsExtraStreamFilters}}",
|
||||
)
|
||||
// unsupported placeholder in a header
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
headers:
|
||||
- "AccountID: {{.UnsupportedPlaceholder}}"
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"request header: \"AccountID\" has unsupported placeholder: \"{{.UnsupportedPlaceholder}}\", supported values are: {{.MetricsTenant}}, {{.MetricsExtraLabels}}, {{.MetricsExtraFilters}}, {{.LogsAccountID}}, {{.LogsProjectID}}, {{.LogsExtraFilters}}, {{.LogsExtraStreamFilters}}",
|
||||
)
|
||||
|
||||
// spaces in templating not allowed
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
headers:
|
||||
- "AccountID: {{ .LogsAccountID }}"
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"request header: \"AccountID\" has unsupported placeholder: \"{{ .LogsAccountID }}\", supported values are: {{.MetricsTenant}}, {{.MetricsExtraLabels}}, {{.MetricsExtraFilters}}, {{.LogsAccountID}}, {{.LogsProjectID}}, {{.LogsExtraFilters}}, {{.LogsExtraStreamFilters}}",
|
||||
)
|
||||
|
||||
// oidc is not an object
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
oidc: "not an object"
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"cannot unmarshal AuthConfig data: yaml: unmarshal errors:\n line 4: cannot unmarshal !!str `not an ...` into main.oidcConfig",
|
||||
)
|
||||
|
||||
// oidc issuer empty
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
oidc: {}
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"oidc issuer cannot be empty",
|
||||
)
|
||||
|
||||
// oidc issuer invalid urls
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
oidc:
|
||||
issuer: "::invalid-url"
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"oidc issuer \"::invalid-url\" must be a valid URL",
|
||||
)
|
||||
|
||||
// oidc issuer invalid urls
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
oidc:
|
||||
issuer: "invalid-url"
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"oidc issuer \"invalid-url\" must have http or https scheme",
|
||||
)
|
||||
|
||||
// oidc and public_keys are not allowed
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
oidc:
|
||||
issuer: https://example.com
|
||||
url_prefix: http://foo.bar
|
||||
`, validRSAPublicKey),
|
||||
"jwt with oidc cannot contain public keys or have skip_verify=true",
|
||||
)
|
||||
|
||||
// oidc and skip_verify are not allowed
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
oidc:
|
||||
issuer: https://example.com
|
||||
url_prefix: http://foo.bar
|
||||
`,
|
||||
"jwt with oidc cannot contain public keys or have skip_verify=true",
|
||||
)
|
||||
// duplicate claims
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
match_claims:
|
||||
team: ops
|
||||
name: user-1
|
||||
url_prefix: http://foo.bar
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
match_claims:
|
||||
team: ops
|
||||
name: user-2
|
||||
url_prefix: http://foo.bar`,
|
||||
"duplicate match claims=\"team=ops\" found for name=\"user-2\" at idx=1; the previous one is set for name=\"user-1\"",
|
||||
)
|
||||
}
|
||||
|
||||
func TestJWTParseAuthConfigSuccess(t *testing.T) {
|
||||
validRSAPublicKey := `-----BEGIN PUBLIC KEY-----
|
||||
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiX7oPWKOWRQsGFEWvwZO
|
||||
mL2PYsdYUsu9nr0qtPCjxQHUJgLfT3rdKlvKpPFYv7ZmKnqTncg36Wz9uiYmWJ7e
|
||||
IB5Z+fko8kVIMzarCqVvpAJDzYF/pUii68xvuYoK3L9TIOAeyCXv+prwnr2IH+Mw
|
||||
9AONzWbRrYoO74XyTE9vMU5qmI/L1VPk+PR8lqPOSptLvzsfoaIk2ED4yK2nRB+6
|
||||
st+k4nccPqbErqHc8aiXnXfugfnr6b+NPFYUzKsDqkymGOokVijrI8B3jNw6c6Do
|
||||
zphk+D3wgLsXYHfMcZbXIMqffqm/aB8Qg88OpFOkQ3rd2p6R9+hacnZkfkn3Phiw
|
||||
yQIDAQAB
|
||||
-----END PUBLIC KEY-----
|
||||
`
|
||||
// ECDSA with the P-521 curve
|
||||
validECDSAPublicKey := `-----BEGIN PUBLIC KEY-----
|
||||
MIGbMBAGByqGSM49AgEGBSuBBAAjA4GGAAQAU9RmtkCRuYTKCyvLlDn5DtBZOHSe
|
||||
QTa5j9q/oQVpCKqcXVFrH5dgh0GL+P/ZhkeuowPzCZqntGf0+7wPt9OxSJcADVJm
|
||||
dv92m540MXss8zdHf5qtE0gsu2Ved0R7Z8a8QwGZ/1mYZ+kFGGbdQTlSvRqDySTq
|
||||
XOtclIk1uhc03oL9nOQ=
|
||||
-----END PUBLIC KEY-----
|
||||
`
|
||||
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
ac, err := parseAuthConfig([]byte(s))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
jui, oidcDP, err := parseJWTUsers(ac)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
oidcDP.startDiscovery()
|
||||
defer oidcDP.stopDiscovery()
|
||||
|
||||
for _, ui := range jui {
|
||||
if ui.JWT == nil {
|
||||
t.Fatalf("unexpected nil JWTConfig")
|
||||
}
|
||||
|
||||
if ui.JWT.SkipVerify {
|
||||
if ui.JWT.verifierPool.Load() != nil {
|
||||
t.Fatalf("unexpected non-nil verifier pool for skip_verify=true")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if ui.JWT.verifierPool.Load() == nil {
|
||||
t.Fatalf("unexpected nil verifier pool for non-empty public keys")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, validRSAPublicKey))
|
||||
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, validECDSAPublicKey))
|
||||
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, validRSAPublicKey, validECDSAPublicKey))
|
||||
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
url_prefix: http://foo.bar
|
||||
`)
|
||||
|
||||
// combined with other auth methods
|
||||
f(`
|
||||
users:
|
||||
- username: foo
|
||||
password: bar
|
||||
url_prefix: http://foo.bar
|
||||
|
||||
- jwt:
|
||||
skip_verify: true
|
||||
url_prefix: http://foo.bar
|
||||
|
||||
- bearer_token: foo
|
||||
url_prefix: http://foo.bar
|
||||
`)
|
||||
|
||||
rsaKeyFile := filepath.Join(t.TempDir(), "rsa_public_key.pem")
|
||||
if err := os.WriteFile(rsaKeyFile, []byte(validRSAPublicKey), 0o644); err != nil {
|
||||
t.Fatalf("failed to write RSA key file: %s", err)
|
||||
}
|
||||
ecdsaKeyFile := filepath.Join(t.TempDir(), "ecdsa_public_key.pem")
|
||||
if err := os.WriteFile(ecdsaKeyFile, []byte(validECDSAPublicKey), 0o644); err != nil {
|
||||
t.Fatalf("failed to write ECDSA key file: %s", err)
|
||||
}
|
||||
|
||||
// Test single public key file
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_key_files:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, rsaKeyFile))
|
||||
|
||||
// Test multiple public key files
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_key_files:
|
||||
- %q
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, rsaKeyFile, ecdsaKeyFile))
|
||||
|
||||
// Test combined inline keys and files
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
public_key_files:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
`, validECDSAPublicKey, rsaKeyFile))
|
||||
|
||||
// oidc stub server
|
||||
var ipSrv *httptest.Server
|
||||
ipSrv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/.well-known/openid-configuration" {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(map[string]string{
|
||||
"issuer": ipSrv.URL,
|
||||
"jwks_uri": fmt.Sprintf("%s/jwks", ipSrv.URL),
|
||||
})
|
||||
return
|
||||
}
|
||||
if r.URL.Path == "/jwks" {
|
||||
// resp generated by https://jwkset.com/generate
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write([]byte(`
|
||||
{
|
||||
"keys": [
|
||||
{
|
||||
"kty": "RSA",
|
||||
"kid": "f13eee91-f566-4829-80fa-fca847c21f0e",
|
||||
"d": "Ua1llEFz3LZ05CrK5a2JxKMUEWJGXhBPPF20hHQjzxd1w0IEJK_mhPZQG8dNtBROBNIi1FC9l6QRw-RTnVIVat5Xy4yDFNKXXL3ZLXejOHY8SXrNEIDqQ-cSwIpK9cK7Umib0PcPeEeeAED5mqDH75D8_YssWFF18kLbNB5Z9pZmn6Fshiht7l2Sh4GN-KcReOW6eiQQwckDte3OGmZCRbtEriLWJt5TUGUvfZVIlcclqNMycNB6jGa9E1pO5Up7Ki3ZbI_-6XmRgZPtqnR9oLJ1zn3fj3hYpCXo-zcqLuOu3qxcslsq5igsfBzgGtfIJHY9LfWmHUsaDEa5cAX1gQ",
|
||||
"n": "xbLXXBTNREk70UCMiqZ53_mTzYh89W-UaPU61GZ-RZ5lYcLgyWOb5mdyRbvJpcgfZpsOeGAUWbk3GkQ4vqn8kUMnnWhUum2Qk9kGubOJGLW6yaURd00j3E-ilQ5xO2R_Hzz8bAojxV8GKdGTQ-iTf8z8nsSHH8kR2SERbNJCFFtwtFU7vyFWyoH4Lmvu2UpICTHFCR9RqwQVjyoKB1JjJ6Dh1L4zPTlsvQEnqoeFQHPYr0QcQSMYXdfPvlt_FiLOAOE89fX_9T2r9WbFAoda3uTRE5_aal0jxUU2cFyeVSIgauNtF07fp422XFb4XPkWQWrdNx0KX53laSIYQ9HOpw",
|
||||
"e": "AQAB",
|
||||
"p": "2JT57AD-Q2lamgjgyn0wL7DgYZ3OoCTTrDm5_NHg6h13uDvyIlXSukuUeWm4tzPSDedpstbS7dgXkLw5eQXBHwPYtByTcEZS8Z37CBnhMOOhfo_U1aNIPPanJACvWBgz47-TxHsxW1YhztZqghRoicBZPSSBAj49MgANJ4jF0zc",
|
||||
"q": "6a4MkeSXJI-ZzQ-bgP8hwJqpLFr0AiNGQcjZMH4Nn4CPGdnGiqqe6flhfLimgbNhbb67B0-8fLIji8zGhGKDL_JSIpAAdmfs2vzeEsY2hScrqVbd1VbfRcRh0J6lsn7obxkbvQthp9sX2DQbeDcEeaFEvd9gDKQSATYEqWo7eBE",
|
||||
"dp": "haL2yu6Z9RJuuxi7S3YPY33qFZF_y0St71j3L854zzw7gMxMTW9TRWwZQwk-1pv9AmNFzvnK0MNDVyUs-UXZsb932TrApshdqYRnPsppLvdl0GgDVYcYrbUr0IUzrFHSwraVAOlavRbaaXvX4EejcUvkRFvf1nh83fs2Iqy8E-U",
|
||||
"dq": "Cnf5qC-Ndd3ZDg688LJ9WJuVKJ-Kfu4Fn7zXvgxnn9Wqk4XmFyA9rk21yFidXQIkQz5gMpun3g48-W5bFmMzbVp1w4af_q35NnZNnJm0p5Jxqkxx87TIm9-IYkg5NB3rW87MJ1PzNAnkr5LmCCSu1qQa6Eaxjt9qzxMUcmKH94E",
|
||||
"qi": "saAeU11iaKHmye3cwCAYkegcyWbXV3xIXEVJtS9Af_yM19UhspwY2VhuwRaajcwYZwtvR9_ITmX9M-ea7uLdd7aDYO1fujC8NGbopeC4Hkr7yb5vTly3pfKf4h-3LwGGUucJUetdz1lmMIYiyuG4_gSf1yIEtPDLKzXiedgEMdI"
|
||||
}
|
||||
]
|
||||
}
|
||||
`))
|
||||
return
|
||||
}
|
||||
|
||||
http.NotFound(w, r)
|
||||
}))
|
||||
defer ipSrv.Close()
|
||||
|
||||
f(`
|
||||
users:
|
||||
- jwt:
|
||||
oidc:
|
||||
issuer: ` + ipSrv.URL + `
|
||||
url_prefix: http://foo.bar
|
||||
`)
|
||||
// multiple match claims
|
||||
f(fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
match_claims:
|
||||
role: ro
|
||||
team: dev
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
- jwt:
|
||||
match_claims:
|
||||
role: admin
|
||||
team: dev
|
||||
public_key_files:
|
||||
- %q
|
||||
- %q
|
||||
url_prefix: http://foo.bar
|
||||
- jwt:
|
||||
match_claims:
|
||||
role: viewer
|
||||
team: dev
|
||||
department: ceo
|
||||
skip_verify: true
|
||||
url_prefix: http://foo.bar
|
||||
|
||||
|
||||
`, validRSAPublicKey, rsaKeyFile, ecdsaKeyFile))
|
||||
|
||||
}
|
||||
@@ -16,7 +16,6 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
@@ -25,7 +24,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ioutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
@@ -42,38 +40,22 @@ var (
|
||||
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host")
|
||||
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
|
||||
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host. "+
|
||||
"See also -maxConcurrentRequests")
|
||||
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
|
||||
"It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services")
|
||||
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
|
||||
|
||||
requestBufferSize = flagutil.NewBytes("requestBufferSize", 32*1024, "The size of the buffer for reading the request body before proxying the request to backends. "+
|
||||
"This allows reducing the consumption of backend resources when processing requests from clients connected via slow networks. "+
|
||||
"Set to 0 to disable request buffering. See https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering")
|
||||
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size to buffer in memory for potential retries at other backends. "+
|
||||
"Request bodies larger than this size cannot be retried if the backend fails. Zero or negative value disables retries. "+
|
||||
"See also -requestBufferSize")
|
||||
|
||||
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process simultaneously. "+
|
||||
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
|
||||
"This protects vmauth itself from overloading and out-of-memory (OOM) failures. See also -maxConcurrentPerUserRequests "+
|
||||
"and https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
|
||||
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 100, "The maximum number of concurrent requests vmauth can process per each configured user. "+
|
||||
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
|
||||
"This provides fairness and isolation between users, preventing a single user from consuming all the available resources. "+
|
||||
"It works in conjunction with -maxConcurrentRequests, which sets the global limit across all users. "+
|
||||
"This default can be overridden for individual users via max_concurrent_requests option in per-user config. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
|
||||
maxQueueDuration = flag.Duration("maxQueueDuration", 10*time.Second, "The maximum duration to wait before rejecting incoming requests if concurrency limit "+
|
||||
"specified via -maxConcurrentRequests or -maxConcurrentPerUserRequests command-line flags is reached. "+
|
||||
"Requests are rejected with '429 Too Many Requests' http status code if the limit is still reached after the -maxQueueDuration duration. "+
|
||||
"This allows graceful handling of short spikes in concurrent requests. See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
|
||||
|
||||
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
|
||||
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process. Other requests are rejected with "+
|
||||
"'429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options")
|
||||
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 300, "The maximum number of concurrent requests vmauth can process per each configured user. "+
|
||||
"Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option "+
|
||||
"in per-user config")
|
||||
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
|
||||
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
|
||||
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
|
||||
|
||||
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
|
||||
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size, which can be cached and re-tried at other backends. "+
|
||||
"Bigger values may require more memory. Zero or negative value disables caching of request body. This may be useful when proxying data ingestion requests")
|
||||
backendTLSInsecureSkipVerify = flag.Bool("backend.tlsInsecureSkipVerify", false, "Whether to skip TLS verification when connecting to backends over HTTPS. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup")
|
||||
backendTLSCAFile = flag.String("backend.TLSCAFile", "", "Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. "+
|
||||
@@ -169,12 +151,13 @@ func requestHandlerWithInternalRoutes(w http.ResponseWriter, r *http.Request) bo
|
||||
}
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
|
||||
ats := getAuthTokensFromRequest(r)
|
||||
if len(ats) == 0 {
|
||||
// Process requests for unauthorized users
|
||||
ui := authConfig.Load().UnauthorizedUser
|
||||
if ui != nil {
|
||||
processUserRequest(w, r, ui, nil)
|
||||
processUserRequest(w, r, ui)
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -182,36 +165,29 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
if ui := getUserInfoByAuthTokens(ats); ui != nil {
|
||||
processUserRequest(w, r, ui, nil)
|
||||
return true
|
||||
}
|
||||
if ui, tkn := getJWTUserInfo(ats); ui != nil {
|
||||
if tkn == nil {
|
||||
logger.Panicf("BUG: unexpected nil jwt token for user %q", ui.name())
|
||||
ui := getUserInfoByAuthTokens(ats)
|
||||
if ui == nil {
|
||||
uu := authConfig.Load().UnauthorizedUser
|
||||
if uu != nil {
|
||||
processUserRequest(w, r, uu)
|
||||
return true
|
||||
}
|
||||
|
||||
invalidAuthTokenRequests.Inc()
|
||||
if *logInvalidAuthTokens {
|
||||
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: err,
|
||||
StatusCode: http.StatusUnauthorized,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
} else {
|
||||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||||
}
|
||||
defer putToken(tkn)
|
||||
processUserRequest(w, r, ui, tkn)
|
||||
return true
|
||||
}
|
||||
|
||||
uu := authConfig.Load().UnauthorizedUser
|
||||
if uu != nil {
|
||||
processUserRequest(w, r, uu, nil)
|
||||
return true
|
||||
}
|
||||
|
||||
invalidAuthTokenRequests.Inc()
|
||||
if *logInvalidAuthTokens {
|
||||
err := fmt.Errorf("cannot authorize request with auth tokens %q", ats)
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: err,
|
||||
StatusCode: http.StatusUnauthorized,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
} else {
|
||||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||||
}
|
||||
processUserRequest(w, r, ui)
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -226,172 +202,33 @@ func getUserInfoByAuthTokens(ats []string) *UserInfo {
|
||||
return nil
|
||||
}
|
||||
|
||||
// responseWriterWithStatus is a wrapper around http.ResponseWriter that captures the status code written to the response.
|
||||
type responseWriterWithStatus struct {
|
||||
http.ResponseWriter
|
||||
status int
|
||||
}
|
||||
|
||||
// WriteHeader records the status so it can be easily retrieved later
|
||||
func (rws *responseWriterWithStatus) WriteHeader(status int) {
|
||||
rws.status = status
|
||||
rws.ResponseWriter.WriteHeader(status)
|
||||
}
|
||||
|
||||
// Flush implements net/http.Flusher interface
|
||||
//
|
||||
// This is needed for the copyStreamToClient()
|
||||
func (rws *responseWriterWithStatus) Flush() {
|
||||
flusher, ok := rws.ResponseWriter.(http.Flusher)
|
||||
if !ok {
|
||||
logger.Panicf("BUG: it is expected http.ResponseWriter (%T) supports http.Flusher interface", rws.ResponseWriter)
|
||||
}
|
||||
flusher.Flush()
|
||||
}
|
||||
|
||||
// Unwrap returns the original ResponseWriter wrapped by rws.
|
||||
//
|
||||
// This is needed for the net/http.ResponseController - see https://pkg.go.dev/net/http#NewResponseController
|
||||
func (rws *responseWriterWithStatus) Unwrap() http.ResponseWriter {
|
||||
return rws.ResponseWriter
|
||||
}
|
||||
|
||||
func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tkn *jwt.Token) {
|
||||
func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
startTime := time.Now()
|
||||
defer ui.requestsDuration.UpdateDuration(startTime)
|
||||
|
||||
ui.requests.Inc()
|
||||
|
||||
ctx, cancel := context.WithTimeout(r.Context(), *maxQueueDuration)
|
||||
defer cancel()
|
||||
|
||||
userName := ui.name()
|
||||
if userName == "" {
|
||||
userName = "unauthorized"
|
||||
}
|
||||
|
||||
if ui.AccessLog != nil {
|
||||
w = &responseWriterWithStatus{ResponseWriter: w}
|
||||
defer func() {
|
||||
rws := w.(*responseWriterWithStatus)
|
||||
duration := time.Since(startTime)
|
||||
ui.logRequest(r, userName, rws.status, duration)
|
||||
}()
|
||||
}
|
||||
|
||||
// Acquire global concurrency limit.
|
||||
if err := beginConcurrencyLimit(ctx); err != nil {
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
return
|
||||
}
|
||||
defer endConcurrencyLimit()
|
||||
|
||||
// Set read deadline for reading the initial chunk for the request body.
|
||||
rc := http.NewResponseController(w)
|
||||
deadline, ok := ctx.Deadline()
|
||||
if !ok {
|
||||
logger.Panicf("BUG: expecting valid deadline for the context")
|
||||
}
|
||||
if err := rc.SetReadDeadline(deadline); err != nil {
|
||||
logger.Panicf("BUG: cannot set read deadline: %s", err)
|
||||
}
|
||||
|
||||
// Read the initial chunk for the request body.
|
||||
bb, err := bufferRequestBody(ctx, r.Body, userName)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return
|
||||
}
|
||||
r.Body = bb
|
||||
|
||||
// Disable the read deadline for the rest of the request body.
|
||||
if err := rc.SetReadDeadline(time.Time{}); err != nil {
|
||||
logger.Panicf("BUG: cannot reset read deadline: %s", err)
|
||||
}
|
||||
|
||||
// Acquire concurrency limit for the given user.
|
||||
if err := ui.beginConcurrencyLimit(ctx); err != nil {
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
return
|
||||
}
|
||||
defer ui.endConcurrencyLimit()
|
||||
|
||||
// Process the request.
|
||||
processRequest(w, r, ui, tkn)
|
||||
}
|
||||
|
||||
func beginConcurrencyLimit(ctx context.Context) error {
|
||||
// Limit the concurrency of requests to backends
|
||||
concurrencyLimitOnce.Do(concurrencyLimitInit)
|
||||
select {
|
||||
case concurrencyLimitCh <- struct{}{}:
|
||||
return nil
|
||||
default:
|
||||
// The -maxConcurrentRequests are executed. Wait until some of the requests are finished,
|
||||
// so the current request could be executed.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
|
||||
select {
|
||||
case concurrencyLimitCh <- struct{}{}:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
err := ctx.Err()
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
// The current request couldn't be executed until the request timeout.
|
||||
concurrentRequestsLimitReached.Inc()
|
||||
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because -maxConcurrentRequests=%d concurrent requests are executed",
|
||||
*maxQueueDuration, cap(concurrencyLimitCh))
|
||||
}
|
||||
return fmt.Errorf("cannot start executing the request because -maxConcurrentRequests=%d concurrent requests are executed: %w", cap(concurrencyLimitCh), err)
|
||||
if err := ui.beginConcurrencyLimit(); err != nil {
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
<-concurrencyLimitCh
|
||||
return
|
||||
}
|
||||
default:
|
||||
concurrentRequestsLimitReached.Inc()
|
||||
err := fmt.Errorf("cannot serve more than -maxConcurrentRequests=%d concurrent requests", cap(concurrencyLimitCh))
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func endConcurrencyLimit() {
|
||||
processRequest(w, r, ui)
|
||||
ui.endConcurrencyLimit()
|
||||
<-concurrencyLimitCh
|
||||
}
|
||||
|
||||
func bufferRequestBody(ctx context.Context, r io.ReadCloser, userName string) (io.ReadCloser, error) {
|
||||
if r == nil {
|
||||
// This is a GET request with nil reader.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
maxBufSize := max(requestBufferSize.IntN(), maxRequestBodySizeToRetry.IntN())
|
||||
if maxBufSize <= 0 {
|
||||
// Request buffering is disabled.
|
||||
return r, nil
|
||||
}
|
||||
|
||||
lr := ioutil.GetLimitedReader(r, int64(maxBufSize))
|
||||
defer ioutil.PutLimitedReader(lr)
|
||||
|
||||
start := time.Now()
|
||||
buf, err := io.ReadAll(lr)
|
||||
bufferRequestBodyDuration.UpdateDuration(start)
|
||||
|
||||
if err != nil {
|
||||
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
|
||||
rejectSlowClientRequests.Inc()
|
||||
|
||||
d := time.Since(start)
|
||||
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("reject request from the user %s because the request body couldn't be read in -maxQueueDuration=%s; read %d bytes in %s",
|
||||
userName, *maxQueueDuration, len(buf), d.Truncate(time.Second)),
|
||||
StatusCode: http.StatusBadRequest,
|
||||
}
|
||||
}
|
||||
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("cannot read request body: %w", err),
|
||||
StatusCode: http.StatusBadRequest,
|
||||
}
|
||||
}
|
||||
|
||||
bb := newBufferedBody(r, buf, maxBufSize)
|
||||
return bb, nil
|
||||
}
|
||||
|
||||
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tkn *jwt.Token) {
|
||||
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
u := normalizeURL(r.URL)
|
||||
up, hc := ui.getURLPrefixAndHeaders(u, r.Host, r.Header)
|
||||
isDefault := false
|
||||
@@ -416,31 +253,28 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tkn *j
|
||||
isDefault = true
|
||||
}
|
||||
|
||||
rtb := newReadTrackingBody(r.Body, maxRequestBodySizeToRetry.IntN())
|
||||
r.Body = rtb
|
||||
|
||||
maxAttempts := up.getBackendsCount()
|
||||
for range maxAttempts {
|
||||
for i := 0; i < maxAttempts; i++ {
|
||||
bu := up.getBackendURL()
|
||||
if bu == nil {
|
||||
break
|
||||
}
|
||||
targetURL := bu.url
|
||||
if tkn != nil {
|
||||
// for security reasons allow templating only for configured url values and headers
|
||||
targetURL, hc = replaceJWTPlaceholders(bu, hc, tkn.VMAccess())
|
||||
}
|
||||
// Don't change path and add request_path query param for default route.
|
||||
if isDefault {
|
||||
// Don't change path and add request_path query param for default route.
|
||||
targetURLCopy := *targetURL
|
||||
query := targetURL.Query()
|
||||
query.Set("request_path", u.String())
|
||||
targetURLCopy.RawQuery = query.Encode()
|
||||
targetURL = &targetURLCopy
|
||||
} else {
|
||||
// Update path for regular routes.
|
||||
targetURL.RawQuery = query.Encode()
|
||||
} else { // Update path for regular routes.
|
||||
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
|
||||
}
|
||||
|
||||
wasLocalRetry := false
|
||||
again:
|
||||
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui, bu)
|
||||
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui)
|
||||
if needLocalRetry && !wasLocalRetry {
|
||||
wasLocalRetry = true
|
||||
goto again
|
||||
@@ -450,20 +284,17 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo, tkn *j
|
||||
if ok {
|
||||
return
|
||||
}
|
||||
|
||||
bu.setBroken()
|
||||
ui.backendErrors.Inc()
|
||||
}
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("all the %d backends for the user %q are unavailable for proxying the request - check previous WARN logs to see the exact error for each failed backend", up.getBackendsCount(), ui.name()),
|
||||
Err: fmt.Errorf("all the %d backends for the user %q are unavailable", up.getBackendsCount(), ui.name()),
|
||||
StatusCode: http.StatusBadGateway,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
ui.requestErrors.Inc()
|
||||
ui.backendErrors.Inc()
|
||||
}
|
||||
|
||||
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo, bu *backendURL) (bool, bool) {
|
||||
ui.backendRequests.Inc()
|
||||
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo) (bool, bool) {
|
||||
req := sanitizeRequestHeaders(r)
|
||||
|
||||
req.URL = targetURL
|
||||
@@ -477,22 +308,28 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
}
|
||||
}
|
||||
|
||||
bb, bbOK := req.Body.(*bufferedBody)
|
||||
canRetry := !bbOK || bb.canRetry()
|
||||
|
||||
rtb, rtbOK := req.Body.(*readTrackingBody)
|
||||
res, err := ui.rt.RoundTrip(req)
|
||||
if err == nil {
|
||||
defer func() { _ = res.Body.Close() }()
|
||||
}
|
||||
|
||||
if errors.Is(r.Context().Err(), context.Canceled) {
|
||||
// Do not retry canceled requests.
|
||||
clientCanceledRequests.Inc()
|
||||
return true, false
|
||||
if ctxErr := r.Context().Err(); ctxErr != nil {
|
||||
// Override the error returned by the RoundTrip with the context error if it isn't non-nil
|
||||
// This makes sure the proper logging for canceled and timed out requests - log the real cause of the error
|
||||
// instead of the random error, which could be returned from RoundTrip because of canceled or timed out request.
|
||||
err = ctxErr
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if !canRetry {
|
||||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
// Do not retry canceled or timed out requests
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
// Timed out request must be counted as errors, since this usually means that the backend is slow.
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; timeout while proxying the response from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||
ui.backendErrors.Inc()
|
||||
}
|
||||
return false, false
|
||||
}
|
||||
if !rtbOK || !rtb.canRetry() {
|
||||
// Request body cannot be re-sent to another backend. Return the error to the client then.
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("cannot proxy the request to %s: %w", targetURL, err),
|
||||
@@ -500,51 +337,41 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
ui.backendErrors.Inc()
|
||||
ui.requestErrors.Inc()
|
||||
bu.setBroken()
|
||||
return true, false
|
||||
}
|
||||
if netutil.IsTrivialNetworkError(err) {
|
||||
// Retry request at the same backend on trivial network errors, such as proxy idle timeout misconfiguration or socket close by OS
|
||||
if bbOK {
|
||||
bb.resetReader()
|
||||
}
|
||||
return false, true
|
||||
}
|
||||
|
||||
// Retry the request at another backend
|
||||
// Retry the request if its body wasn't read yet. This usually means that the backend isn't reachable.
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed: %s, retrying the request at another backend", remoteAddr, requestURI, targetURL, err)
|
||||
if bbOK {
|
||||
bb.resetReader()
|
||||
}
|
||||
// NOTE: do not use httpserver.GetRequestURI
|
||||
// it explicitly reads request body, which may fail retries.
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because of response error: %s", remoteAddr, req.URL, targetURL, err)
|
||||
return false, false
|
||||
}
|
||||
if slices.Contains(retryStatusCodes, res.StatusCode) {
|
||||
if !canRetry {
|
||||
_ = res.Body.Close()
|
||||
if !rtbOK || !rtb.canRetry() {
|
||||
// If we get an error from the retry_status_codes list, but cannot execute retry,
|
||||
// we consider such a request an error as well.
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request at another backend, because the request body has been already consumed",
|
||||
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request on another backend, because the request has been already consumed",
|
||||
res.StatusCode, targetURL),
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
ui.backendErrors.Inc()
|
||||
ui.requestErrors.Inc()
|
||||
return true, false
|
||||
}
|
||||
|
||||
// Retry requests at other backends if it matches retryStatusCodes.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed, retrying the request at another backend because response status code=%d belongs to retry_status_codes=%d",
|
||||
remoteAddr, requestURI, targetURL, res.StatusCode, retryStatusCodes)
|
||||
if bbOK {
|
||||
bb.resetReader()
|
||||
}
|
||||
// NOTE: do not use httpserver.GetRequestURI
|
||||
// it explicitly reads request body, which may fail retries.
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because response status code=%d belongs to retry_status_codes=%d",
|
||||
remoteAddr, req.URL, targetURL, res.StatusCode, retryStatusCodes)
|
||||
return false, false
|
||||
}
|
||||
removeHopHeaders(res.Header)
|
||||
@@ -553,18 +380,12 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
w.WriteHeader(res.StatusCode)
|
||||
|
||||
err = copyStreamToClient(w, res.Body)
|
||||
|
||||
if errors.Is(r.Context().Err(), context.Canceled) {
|
||||
// Do not retry canceled requests.
|
||||
clientCanceledRequests.Inc()
|
||||
return true, false
|
||||
}
|
||||
|
||||
if err != nil && !netutil.IsTrivialNetworkError(err) {
|
||||
_ = res.Body.Close()
|
||||
if err != nil && !netutil.IsTrivialNetworkError(err) && !errors.Is(err, context.Canceled) {
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||
ui.requestErrors.Inc()
|
||||
return true, false
|
||||
}
|
||||
return true, false
|
||||
@@ -692,10 +513,6 @@ var (
|
||||
configReloadRequests = metrics.NewCounter(`vmauth_http_requests_total{path="/-/reload"}`)
|
||||
invalidAuthTokenRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="invalid_auth_token"}`)
|
||||
missingRouteRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`)
|
||||
clientCanceledRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="client_canceled"}`)
|
||||
rejectSlowClientRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="reject_slow_client"}`)
|
||||
|
||||
bufferRequestBodyDuration = metrics.NewSummary(`vmauth_buffer_request_body_duration_seconds`)
|
||||
)
|
||||
|
||||
func newRoundTripper(caFileOpt, certFileOpt, keyFileOpt, serverNameOpt string, insecureSkipVerifyP *bool) (http.RoundTripper, error) {
|
||||
@@ -766,7 +583,7 @@ var concurrentRequestsLimitReached = metrics.NewCounter("vmauth_concurrent_reque
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics components or any other HTTP backends.
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/victoriametrics/vmauth/ .
|
||||
`
|
||||
@@ -779,13 +596,6 @@ func handleMissingAuthorizationError(w http.ResponseWriter) {
|
||||
}
|
||||
|
||||
func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err error) {
|
||||
if errors.Is(r.Context().Err(), context.Canceled) {
|
||||
// Do not return any response for the request canceled by the client,
|
||||
// since the connection to the client is already closed.
|
||||
clientCanceledRequests.Inc()
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Add("Retry-After", "10")
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: err,
|
||||
@@ -794,81 +604,120 @@ func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err err
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
}
|
||||
|
||||
// bufferedBody serves two purposes:
|
||||
//
|
||||
// 1. It enables request retries when the request body size does not exceed maxBufSize
|
||||
// by fully buffering the request body in memory.
|
||||
// 2. It prevents slow clients from reducing effective server capacity
|
||||
// by buffering the request body before acquiring a per-user concurrency slot.
|
||||
//
|
||||
// See bufferRequestBody for details on how bufferedBody is used.
|
||||
type bufferedBody struct {
|
||||
// r contains reader for reading the data after buf is read.
|
||||
// readTrackingBody must be obtained via getReadTrackingBody()
|
||||
type readTrackingBody struct {
|
||||
// maxBodySize is the maximum body size to cache in buf.
|
||||
//
|
||||
// r is nil if buf contains all the data.
|
||||
// Bigger bodies cannot be retried.
|
||||
maxBodySize int
|
||||
|
||||
// r contains reader for initial data reading
|
||||
r io.ReadCloser
|
||||
|
||||
// buf contains the initial buffer read from r.
|
||||
// buf is a buffer for data read from r. Buf size is limited by maxBodySize.
|
||||
// If more than maxBodySize is read from r, then cannotRetry is set to true.
|
||||
buf []byte
|
||||
|
||||
// bufOffset is the offset at buf for already read bytes.
|
||||
bufOffset int
|
||||
// readBuf points to the cached data at buf, which must be read in the next call to Read().
|
||||
readBuf []byte
|
||||
|
||||
// cannotRetry is set to true after Close() call on non-nil r.
|
||||
// cannotRetry is set to true when more than maxBodySize bytes are read from r.
|
||||
// In this case the read data cannot fit buf, so it cannot be re-read from buf.
|
||||
cannotRetry bool
|
||||
|
||||
// bufComplete is set to true when buf contains complete request body read from r.
|
||||
bufComplete bool
|
||||
}
|
||||
|
||||
func newBufferedBody(r io.ReadCloser, buf []byte, maxBufSize int) *bufferedBody {
|
||||
// Do not use sync.Pool here, since http.RoundTrip may still use request body after return.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
|
||||
|
||||
if len(buf) < maxBufSize {
|
||||
// The full request body has been already read into buf.
|
||||
r = nil
|
||||
func newReadTrackingBody(r io.ReadCloser, maxBodySize int) *readTrackingBody {
|
||||
// do not use sync.Pool there
|
||||
// since http.RoundTrip may still use request body after return
|
||||
// See this issue for details https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
|
||||
rtb := &readTrackingBody{}
|
||||
if maxBodySize < 0 {
|
||||
maxBodySize = 0
|
||||
}
|
||||
rtb.maxBodySize = maxBodySize
|
||||
|
||||
return &bufferedBody{
|
||||
r: r,
|
||||
buf: buf,
|
||||
if r == nil {
|
||||
// This is GET request without request body
|
||||
r = (*zeroReader)(nil)
|
||||
}
|
||||
rtb.r = r
|
||||
return rtb
|
||||
}
|
||||
|
||||
// Read implements io.Reader interface.
|
||||
func (bb *bufferedBody) Read(p []byte) (int, error) {
|
||||
if bb.cannotRetry {
|
||||
return 0, fmt.Errorf("cannot read already closed request body")
|
||||
}
|
||||
if bb.bufOffset < len(bb.buf) {
|
||||
n := copy(p, bb.buf[bb.bufOffset:])
|
||||
bb.bufOffset += n
|
||||
return n, nil
|
||||
}
|
||||
if bb.r == nil {
|
||||
return 0, io.EOF
|
||||
}
|
||||
return bb.r.Read(p)
|
||||
}
|
||||
type zeroReader struct{}
|
||||
|
||||
func (bb *bufferedBody) canRetry() bool {
|
||||
if bb.r != nil {
|
||||
return false
|
||||
}
|
||||
maxRetrySize := maxRequestBodySizeToRetry.IntN()
|
||||
return len(bb.buf) == 0 || (maxRetrySize > 0 && len(bb.buf) <= maxRetrySize)
|
||||
func (r *zeroReader) Read(_ []byte) (int, error) {
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
// Close implements io.Closer interface.
|
||||
func (bb *bufferedBody) Close() error {
|
||||
bb.resetReader()
|
||||
bb.cannotRetry = !bb.canRetry()
|
||||
if bb.r != nil {
|
||||
return bb.r.Close()
|
||||
}
|
||||
func (r *zeroReader) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (bb *bufferedBody) resetReader() {
|
||||
bb.bufOffset = 0
|
||||
// Read implements io.Reader interface.
|
||||
func (rtb *readTrackingBody) Read(p []byte) (int, error) {
|
||||
if len(rtb.readBuf) > 0 {
|
||||
n := copy(p, rtb.readBuf)
|
||||
rtb.readBuf = rtb.readBuf[n:]
|
||||
return n, nil
|
||||
}
|
||||
|
||||
if rtb.r == nil {
|
||||
if rtb.bufComplete {
|
||||
return 0, io.EOF
|
||||
}
|
||||
return 0, fmt.Errorf("cannot read client request body after closing client reader")
|
||||
}
|
||||
|
||||
n, err := rtb.r.Read(p)
|
||||
if rtb.cannotRetry {
|
||||
return n, err
|
||||
}
|
||||
|
||||
if len(rtb.buf)+n > rtb.maxBodySize {
|
||||
rtb.cannotRetry = true
|
||||
return n, err
|
||||
}
|
||||
rtb.buf = append(rtb.buf, p[:n]...)
|
||||
if err == io.EOF {
|
||||
rtb.bufComplete = true
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (rtb *readTrackingBody) canRetry() bool {
|
||||
if rtb.cannotRetry {
|
||||
return false
|
||||
}
|
||||
if rtb.bufComplete {
|
||||
return true
|
||||
}
|
||||
return rtb.r != nil
|
||||
}
|
||||
|
||||
// Close implements io.Closer interface.
|
||||
func (rtb *readTrackingBody) Close() error {
|
||||
if !rtb.cannotRetry {
|
||||
rtb.readBuf = rtb.buf
|
||||
} else {
|
||||
rtb.readBuf = nil
|
||||
}
|
||||
|
||||
// Close rtb.r only if the request body is completely read or if it is too big.
|
||||
// http.Roundtrip performs body.Close call even without any Read calls,
|
||||
// so this hack allows us to reuse request body.
|
||||
if rtb.bufComplete || rtb.cannotRetry {
|
||||
if rtb.r == nil {
|
||||
return nil
|
||||
}
|
||||
err := rtb.r.Close()
|
||||
rtb.r = nil
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func debugInfo(u *url.URL, r *http.Request) string {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,194 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/x509"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"encoding/pem"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func BenchmarkJWTRequestHandler(b *testing.B) {
|
||||
// Generate RSA key pair for testing
|
||||
privateKey, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
if err != nil {
|
||||
b.Fatalf("cannot generate RSA key: %s", err)
|
||||
}
|
||||
|
||||
// Generate public key PEM
|
||||
publicKeyBytes, err := x509.MarshalPKIXPublicKey(&privateKey.PublicKey)
|
||||
if err != nil {
|
||||
b.Fatalf("cannot marshal public key: %s", err)
|
||||
}
|
||||
publicKeyPEM := pem.EncodeToMemory(&pem.Block{
|
||||
Type: "PUBLIC KEY",
|
||||
Bytes: publicKeyBytes,
|
||||
})
|
||||
|
||||
genToken := func(t *testing.B, body map[string]any, valid bool) string {
|
||||
t.Helper()
|
||||
|
||||
headerJSON, err := json.Marshal(map[string]any{
|
||||
"alg": "RS256",
|
||||
"typ": "JWT",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("cannot marshal header: %s", err)
|
||||
}
|
||||
headerB64 := base64.RawURLEncoding.EncodeToString(headerJSON)
|
||||
|
||||
bodyJSON, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot marshal body: %s", err)
|
||||
}
|
||||
bodyB64 := base64.RawURLEncoding.EncodeToString(bodyJSON)
|
||||
|
||||
payload := headerB64 + "." + bodyB64
|
||||
|
||||
var signatureB64 string
|
||||
if valid {
|
||||
// Create real RSA signature
|
||||
hash := crypto.SHA256
|
||||
h := hash.New()
|
||||
h.Write([]byte(payload))
|
||||
digest := h.Sum(nil)
|
||||
|
||||
signature, err := rsa.SignPKCS1v15(rand.Reader, privateKey, hash, digest)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot sign token: %s", err)
|
||||
}
|
||||
signatureB64 = base64.RawURLEncoding.EncodeToString(signature)
|
||||
} else {
|
||||
signatureB64 = base64.RawURLEncoding.EncodeToString([]byte("invalid_signature"))
|
||||
}
|
||||
|
||||
return payload + "." + signatureB64
|
||||
}
|
||||
|
||||
f := func(name string, cfgStr string, r *http.Request, statusCodeExpected int) {
|
||||
b.Helper()
|
||||
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
if _, err := w.Write([]byte("path: " + r.URL.Path + "\n")); err != nil {
|
||||
panic(fmt.Errorf("cannot write response: %w", err))
|
||||
}
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
cfgStr = strings.ReplaceAll(cfgStr, "{BACKEND}", ts.URL)
|
||||
|
||||
cfgOrigP := authConfigData.Load()
|
||||
if _, err := reloadAuthConfigData([]byte(cfgStr)); err != nil {
|
||||
b.Fatalf("cannot load config data: %s", err)
|
||||
}
|
||||
defer func() {
|
||||
cfgOrig := []byte("unauthorized_user:\n url_prefix: http://foo/bar")
|
||||
if cfgOrigP != nil {
|
||||
cfgOrig = *cfgOrigP
|
||||
}
|
||||
_, err := reloadAuthConfigData(cfgOrig)
|
||||
if err != nil {
|
||||
b.Fatalf("cannot load the original config: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
b.Run(name, func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
w := &fakeResponseWriter{}
|
||||
for pb.Next() {
|
||||
w.reset()
|
||||
if !requestHandlerWithInternalRoutes(w, r) {
|
||||
b.Fatalf("unexpected false is returned from requestHandler")
|
||||
}
|
||||
if w.statusCode != statusCodeExpected {
|
||||
b.Fatalf("unexpected response code (-%d;+%d)", statusCodeExpected, w.statusCode)
|
||||
}
|
||||
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
simpleCfgStr := fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
url_prefix: {BACKEND}/foo`, string(publicKeyPEM))
|
||||
noVMAccessClaimToken := genToken(b, nil, true)
|
||||
expiredToken := genToken(b, map[string]any{
|
||||
"exp": 10,
|
||||
"vm_access": map[string]any{},
|
||||
}, true)
|
||||
|
||||
fullToken := genToken(b, map[string]any{
|
||||
"exp": time.Now().Add(10 * time.Minute).Unix(),
|
||||
"scope": "email id",
|
||||
"vm_access": map[string]any{
|
||||
"extra_labels": map[string]string{
|
||||
"label": "value1",
|
||||
"label2": "value3",
|
||||
},
|
||||
"extra_filters": []string{"stream_filter1", "stream_filter2"},
|
||||
"metrics_account_id": 123,
|
||||
"metrics_project_id": 234,
|
||||
"metrics_extra_labels": []string{
|
||||
"label1=value1",
|
||||
"label2=value2",
|
||||
},
|
||||
"metrics_extra_filters": []string{
|
||||
`{label3="value3"}`,
|
||||
`{label4="value4"}`,
|
||||
},
|
||||
"logs_account_id": 345,
|
||||
"logs_project_id": 456,
|
||||
"logs_extra_filters": []string{
|
||||
`{"namespace":"my-app","env":"prod"}`,
|
||||
},
|
||||
"logs_extra_stream_filters": []string{
|
||||
`{"team":"dev"}`,
|
||||
},
|
||||
},
|
||||
}, true)
|
||||
|
||||
// tenant headers are overwritten if set as placeholders
|
||||
// extra_filters extra_stream_filters from vm_access claim merged with statically defined
|
||||
request := httptest.NewRequest(`GET`, "http://some-host.com/query", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+fullToken)
|
||||
f("full_template",
|
||||
fmt.Sprintf(`
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- %q
|
||||
headers:
|
||||
- "AccountID: {{.LogsAccountID}}"
|
||||
- "ProjectID: {{.LogsProjectID}}"
|
||||
url_prefix: {BACKEND}/select/logsql/?extra_filters=aStaticFilter&extra_stream_filters=aStaticStreamFilter&extra_filters={{.LogsExtraFilters}}&extra_stream_filters={{.LogsExtraStreamFilters}}`, string(publicKeyPEM)),
|
||||
request,
|
||||
http.StatusOK,
|
||||
)
|
||||
|
||||
// token without vm_access claim
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+noVMAccessClaimToken)
|
||||
f("token_without_claim", simpleCfgStr, request, http.StatusUnauthorized)
|
||||
|
||||
// expired token
|
||||
request = httptest.NewRequest(`GET`, "http://some-host.com/abc", nil)
|
||||
request.Header.Set(`Authorization`, `Bearer `+expiredToken)
|
||||
f("expired_token", simpleCfgStr, request, http.StatusUnauthorized)
|
||||
}
|
||||
@@ -1,195 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/jwt"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
)
|
||||
|
||||
type oidcConfig struct {
|
||||
Issuer string `yaml:"issuer"`
|
||||
}
|
||||
|
||||
type oidcDiscovererPool struct {
|
||||
ds map[string]*oidcDiscoverer
|
||||
|
||||
context context.Context
|
||||
cancel func()
|
||||
wg *sync.WaitGroup
|
||||
}
|
||||
|
||||
func (dp *oidcDiscovererPool) createOrAdd(issuer string, vp *atomic.Pointer[jwt.VerifierPool]) {
|
||||
if dp.ds == nil {
|
||||
dp.ds = make(map[string]*oidcDiscoverer)
|
||||
dp.context, dp.cancel = context.WithCancel(context.Background())
|
||||
dp.wg = &sync.WaitGroup{}
|
||||
}
|
||||
|
||||
ds, found := dp.ds[issuer]
|
||||
if !found {
|
||||
ds = &oidcDiscoverer{
|
||||
issuer: issuer,
|
||||
}
|
||||
dp.ds[issuer] = ds
|
||||
}
|
||||
|
||||
ds.vps = append(ds.vps, vp)
|
||||
}
|
||||
|
||||
func (dp *oidcDiscovererPool) startDiscovery() {
|
||||
if len(dp.ds) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for _, d := range dp.ds {
|
||||
dp.wg.Go(func() {
|
||||
if err := d.refreshVerifierPools(dp.context); err != nil {
|
||||
logger.Errorf("failed to initialize OIDC verifier pool at start for issuer %q: %s", d.issuer, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
dp.wg.Wait()
|
||||
|
||||
for _, d := range dp.ds {
|
||||
dp.wg.Go(func() {
|
||||
d.run(dp.context)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (dp *oidcDiscovererPool) stopDiscovery() {
|
||||
if len(dp.ds) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
dp.cancel()
|
||||
dp.wg.Wait()
|
||||
}
|
||||
|
||||
type oidcDiscoverer struct {
|
||||
issuer string
|
||||
vps []*atomic.Pointer[jwt.VerifierPool]
|
||||
}
|
||||
|
||||
func (d *oidcDiscoverer) run(ctx context.Context) {
|
||||
t := time.NewTimer(timeutil.AddJitterToDuration(time.Second * 10))
|
||||
defer t.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-t.C:
|
||||
if err := d.refreshVerifierPools(ctx); errors.Is(err, context.Canceled) {
|
||||
return
|
||||
} else if err != nil {
|
||||
t.Reset(timeutil.AddJitterToDuration(time.Second * 10))
|
||||
logger.Errorf("failed to refresh OIDC verifier pool for issuer %q: %v", d.issuer, err)
|
||||
continue
|
||||
}
|
||||
// OIDC may return Cache-Control header with max-age directive.
|
||||
// It could be used as time range for next refresh.
|
||||
// https://openid.net/specs/openid-connect-core-1_0.html#RotateEncKeys
|
||||
t.Reset(timeutil.AddJitterToDuration(time.Minute * 5))
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *oidcDiscoverer) refreshVerifierPools(ctx context.Context) error {
|
||||
cfg, err := getOpenIDConfiguration(ctx, d.issuer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// The issuer in the OIDC configuration must match the expected issuer.
|
||||
// https://openid.net/specs/openid-connect-core-1_0.html#RotateEncKeys
|
||||
if cfg.Issuer != d.issuer {
|
||||
return fmt.Errorf("openid configuration issuer %q does not match expected issuer %q", cfg.Issuer, d.issuer)
|
||||
}
|
||||
|
||||
verifierPool, err := fetchAndParseJWKs(ctx, cfg.JWKsURI)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, vp := range d.vps {
|
||||
vp.Store(verifierPool)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// See https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderMetadata for details.
|
||||
type openidConfig struct {
|
||||
Issuer string `json:"issuer"`
|
||||
JWKsURI string `json:"jwks_uri"`
|
||||
}
|
||||
|
||||
var oidcHTTPClient = &http.Client{
|
||||
Timeout: time.Second * 5,
|
||||
}
|
||||
|
||||
func fetchAndParseJWKs(ctx context.Context, jwksURI string) (*jwt.VerifierPool, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, jwksURI, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request for fetching jwks keys from %q: %w", jwksURI, err)
|
||||
}
|
||||
|
||||
resp, err := oidcHTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch jwks keys from %q: %w", jwksURI, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code %d when fetching jwks keys from %q", resp.StatusCode, jwksURI)
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body from %q: %w", jwksURI, err)
|
||||
}
|
||||
|
||||
vp, err := jwt.ParseJWKs(b)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse jwks keys from %q: %v", jwksURI, err)
|
||||
}
|
||||
|
||||
return vp, nil
|
||||
}
|
||||
|
||||
func getOpenIDConfiguration(ctx context.Context, issuer string) (openidConfig, error) {
|
||||
issuer, _ = strings.CutSuffix(issuer, "/")
|
||||
configURL := fmt.Sprintf("%s/.well-known/openid-configuration", issuer)
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, configURL, nil)
|
||||
if err != nil {
|
||||
return openidConfig{}, fmt.Errorf("failed to create request for fetching openid config from %q: %w", configURL, err)
|
||||
}
|
||||
|
||||
resp, err := oidcHTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return openidConfig{}, fmt.Errorf("failed to fetch openid config from %q: %w", configURL, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return openidConfig{}, fmt.Errorf("unexpected status code %d when fetching openid config from %q", resp.StatusCode, configURL)
|
||||
}
|
||||
|
||||
var cfg openidConfig
|
||||
if err := json.NewDecoder(resp.Body).Decode(&cfg); err != nil {
|
||||
return openidConfig{}, fmt.Errorf("failed to decode openid config from %q: %s", configURL, err)
|
||||
}
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
@@ -174,7 +174,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
},
|
||||
RetryStatusCodes: []int{503, 501},
|
||||
LoadBalancingPolicy: "first_available",
|
||||
DropSrcPathPrefixParts: new(2),
|
||||
DropSrcPathPrefixParts: intp(2),
|
||||
}, "/a/b/c", "http://foo.bar/c", `bb: aaa`, `x: y`, []int{503, 501}, "first_available", 2)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/federate"),
|
||||
@@ -219,13 +219,13 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
},
|
||||
RetryStatusCodes: []int{503, 500, 501},
|
||||
LoadBalancingPolicy: "first_available",
|
||||
DropSrcPathPrefixParts: new(1),
|
||||
DropSrcPathPrefixParts: intp(1),
|
||||
},
|
||||
{
|
||||
SrcPaths: getRegexs([]string{"/api/v1/write"}),
|
||||
URLPrefix: mustParseURL("http://vminsert/0/prometheus"),
|
||||
RetryStatusCodes: []int{},
|
||||
DropSrcPathPrefixParts: new(0),
|
||||
DropSrcPathPrefixParts: intp(0),
|
||||
},
|
||||
{
|
||||
SrcPaths: getRegexs([]string{"/metrics"}),
|
||||
@@ -242,7 +242,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
},
|
||||
},
|
||||
RetryStatusCodes: []int{502},
|
||||
DropSrcPathPrefixParts: new(2),
|
||||
DropSrcPathPrefixParts: intp(2),
|
||||
}
|
||||
f(ui, "http://host42/vmsingle/api/v1/query?query=up&db=foo", "http://vmselect/0/prometheus/api/v1/query?db=foo&query=up",
|
||||
"xx: aa\nyy: asdf", "qwe: rty", []int{503, 500, 501}, "first_available", 1)
|
||||
@@ -259,7 +259,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
SrcPaths: getRegexs([]string{"/api/v1/write"}),
|
||||
URLPrefix: mustParseURL("http://vminsert/0/prometheus"),
|
||||
RetryStatusCodes: []int{},
|
||||
DropSrcPathPrefixParts: new(0),
|
||||
DropSrcPathPrefixParts: intp(0),
|
||||
},
|
||||
{
|
||||
SrcPaths: getRegexs([]string{"/metrics/a/b"}),
|
||||
@@ -275,7 +275,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
},
|
||||
},
|
||||
RetryStatusCodes: []int{502},
|
||||
DropSrcPathPrefixParts: new(2),
|
||||
DropSrcPathPrefixParts: intp(2),
|
||||
}
|
||||
f(ui, "https://foo-host/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "", "", []int{}, "least_loaded", 0)
|
||||
f(ui, "https://foo-host/metrics/a/b", "http://metrics-server/b", "", "", []int{502}, "least_loaded", 2)
|
||||
|
||||
@@ -21,7 +21,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/snapshot"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/snapshot/snapshotutil"
|
||||
)
|
||||
@@ -213,7 +212,7 @@ func newSrcFS() (*fslocal.FS, error) {
|
||||
}
|
||||
|
||||
func newDstFS(ctx context.Context) (common.RemoteFS, error) {
|
||||
fs, err := actions.NewRemoteFS(ctx, *dst, nil)
|
||||
fs, err := actions.NewRemoteFS(ctx, *dst)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `-dst`=%q: %w", *dst, err)
|
||||
}
|
||||
@@ -256,7 +255,7 @@ func newOriginFS(ctx context.Context) (common.OriginFS, error) {
|
||||
if len(*origin) == 0 {
|
||||
return &fsnil.FS{}, nil
|
||||
}
|
||||
fs, err := actions.NewRemoteFS(ctx, *origin, nil)
|
||||
fs, err := actions.NewRemoteFS(ctx, *origin)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `-origin`=%q: %w", *origin, err)
|
||||
}
|
||||
@@ -267,7 +266,7 @@ func newRemoteOriginFS(ctx context.Context) (common.RemoteFS, error) {
|
||||
if len(*origin) == 0 {
|
||||
return nil, fmt.Errorf("-origin cannot be empty when -snapshotName and -snapshot.createURL aren't set")
|
||||
}
|
||||
fs, err := actions.NewRemoteFS(ctx, *origin, nil)
|
||||
fs, err := actions.NewRemoteFS(ctx, *origin)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `-origin`=%q: %w", *origin, err)
|
||||
}
|
||||
|
||||
@@ -7,8 +7,6 @@ import (
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@@ -47,7 +45,7 @@ func New(retries int, factor float64, minDuration time.Duration) (*Backoff, erro
|
||||
// Retry process retries until all attempts are completed
|
||||
func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
|
||||
var attempt uint64
|
||||
for i := range b.retries {
|
||||
for i := 0; i < b.retries; i++ {
|
||||
err := cb()
|
||||
if err == nil {
|
||||
return attempt, nil
|
||||
@@ -57,7 +55,6 @@ func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
|
||||
return attempt, err // fail fast if not recoverable
|
||||
}
|
||||
attempt++
|
||||
retriesTotal.Inc()
|
||||
backoff := float64(b.minDuration) * math.Pow(b.factor, float64(i))
|
||||
dur := time.Duration(backoff)
|
||||
logger.Errorf("got error: %s on attempt: %d; will retry in %v", err, attempt, dur)
|
||||
@@ -77,7 +74,3 @@ func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
|
||||
}
|
||||
return attempt, fmt.Errorf("execution failed after %d retry attempts", b.retries)
|
||||
}
|
||||
|
||||
var (
|
||||
retriesTotal = metrics.NewCounter(`vmctl_backoff_retries_total`)
|
||||
)
|
||||
|
||||
@@ -14,12 +14,6 @@ const (
|
||||
globalSilent = "s"
|
||||
globalVerbose = "verbose"
|
||||
globalDisableProgressBar = "disable-progress-bar"
|
||||
|
||||
globalPushMetricsURL = "pushmetrics.url"
|
||||
globalPushMetricsInterval = "pushmetrics.interval"
|
||||
globalPushExtraLabels = "pushmetrics.extraLabel"
|
||||
globalPushHeaders = "pushmetrics.header"
|
||||
globalPushDisableCompression = "pushmetrics.disableCompression"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -39,29 +33,6 @@ var (
|
||||
Value: false,
|
||||
Usage: "Whether to disable progress bar during the import.",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: globalPushMetricsURL,
|
||||
Usage: "Optional URL to push metrics. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics",
|
||||
},
|
||||
&cli.DurationFlag{
|
||||
Name: globalPushMetricsInterval,
|
||||
Value: 10 * time.Second,
|
||||
Usage: "Interval for pushing metrics to every -pushmetrics.url",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: globalPushExtraLabels,
|
||||
Usage: "Extra labels to add to pushed metrics. In case of collision, label value defined by flag will have priority. " +
|
||||
"Flag can be set multiple times, to add few additional labels. " +
|
||||
"For example, -pushmetrics.extraLabel='instance=\"foo\"' adds instance=\"foo\" label to all the metrics pushed to every -pushmetrics.url",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: globalPushHeaders,
|
||||
Usage: "Optional HTTP headers to add to pushed metrics. Flag can be set multiple times, to add few additional headers.",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: globalPushDisableCompression,
|
||||
Usage: "Whether to disable compression when pushing metrics.",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
@@ -152,32 +123,32 @@ var (
|
||||
Name: vmExtraLabel,
|
||||
Value: nil,
|
||||
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
|
||||
" will have priority. Flag can be set multiple times, to add few additional labels.",
|
||||
"will have priority. Flag can be set multiple times, to add few additional labels.",
|
||||
},
|
||||
&cli.Int64Flag{
|
||||
Name: vmRateLimit,
|
||||
Usage: "Optional data transfer rate limit in bytes per second.\n" +
|
||||
"By default, the rate limit is disabled. It can be useful for limiting load on configured via '--vm-addr' destination.",
|
||||
"By default, the rate limit is disabled. It can be useful for limiting load on configured via '--vmAddr' destination.",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmCertFile,
|
||||
Usage: "Optional path to client-side TLS certificate file to use when connecting to '--vm-addr'",
|
||||
Usage: "Optional path to client-side TLS certificate file to use when connecting to '--vmAddr'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmKeyFile,
|
||||
Usage: "Optional path to client-side TLS key to use when connecting to '--vm-addr'",
|
||||
Usage: "Optional path to client-side TLS key to use when connecting to '--vmAddr'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmCAFile,
|
||||
Usage: "Optional path to TLS CA file to use for verifying connections to '--vm-addr'. By default, system CA is used",
|
||||
Usage: "Optional path to TLS CA file to use for verifying connections to '--vmAddr'. By default, system CA is used",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmServerName,
|
||||
Usage: "Optional TLS server name to use for connections to '--vm-addr'. By default, the server name from '--vm-addr' is used",
|
||||
Usage: "Optional TLS server name to use for connections to '--vmAddr'. By default, the server name from '--vmAddr' is used",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmInsecureSkipVerify,
|
||||
Usage: "Whether to skip tls verification when connecting to '--vm-addr'",
|
||||
Usage: "Whether to skip tls verification when connecting to '--vmAddr'",
|
||||
Value: false,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
@@ -416,16 +387,6 @@ const (
|
||||
promTemporaryDirPath = "prom-tmp-dir-path"
|
||||
)
|
||||
|
||||
const (
|
||||
thanosSnapshot = "thanos-snapshot"
|
||||
thanosConcurrency = "thanos-concurrency"
|
||||
thanosFilterTimeStart = "thanos-filter-time-start"
|
||||
thanosFilterTimeEnd = "thanos-filter-time-end"
|
||||
thanosFilterLabel = "thanos-filter-label"
|
||||
thanosFilterLabelValue = "thanos-filter-label-value"
|
||||
thanosAggrTypes = "thanos-aggr-types"
|
||||
)
|
||||
|
||||
var (
|
||||
promFlags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
@@ -461,43 +422,6 @@ var (
|
||||
Value: os.TempDir(),
|
||||
},
|
||||
}
|
||||
|
||||
thanosFlags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: thanosSnapshot,
|
||||
Usage: "Path to Thanos snapshot directory containing raw and/or downsampled blocks.",
|
||||
Required: true,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: thanosConcurrency,
|
||||
Usage: "Number of concurrently running snapshot readers",
|
||||
Value: 1,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: thanosFilterTimeStart,
|
||||
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or higher than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: thanosFilterTimeEnd,
|
||||
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: thanosFilterLabel,
|
||||
Usage: "Thanos label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: thanosFilterLabelValue,
|
||||
Usage: fmt.Sprintf("Thanos regular expression to filter label from %q flag.", thanosFilterLabel),
|
||||
Value: ".*",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: thanosAggrTypes,
|
||||
Usage: "Aggregate types to import from Thanos downsampled blocks. Supported values: count, sum, min, max, counter. " +
|
||||
"Each aggregate will be imported as a separate metric with the aggregate type as suffix (e.g., metric_name:5m:count). " +
|
||||
"If not specified, all aggregate types will be imported from downsampled blocks.",
|
||||
Value: nil,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -544,7 +468,7 @@ var (
|
||||
Name: vmNativeFilterMatch,
|
||||
Usage: "Time series selector to match series for export. For example, select {instance!=\"localhost\"} will " +
|
||||
"match all series with \"instance\" label different to \"localhost\".\n" +
|
||||
" See more details here https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format",
|
||||
" See more details here https://github.com/VictoriaMetrics/VictoriaMetrics#how-to-export-data-in-native-format",
|
||||
Value: `{__name__!=""}`,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
@@ -674,7 +598,7 @@ var (
|
||||
Name: vmExtraLabel,
|
||||
Value: nil,
|
||||
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
|
||||
" will have priority. Flag can be set multiple times, to add few additional labels.",
|
||||
"will have priority. Flag can be set multiple times, to add few additional labels.",
|
||||
},
|
||||
&cli.Int64Flag{
|
||||
Name: vmRateLimit,
|
||||
@@ -701,8 +625,8 @@ var (
|
||||
&cli.BoolFlag{
|
||||
Name: vmNativeDisableBinaryProtocol,
|
||||
Usage: "Whether to use https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-json-line-format " +
|
||||
"instead of https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format API. " +
|
||||
"Binary export/import API protocol implies less network and resource usage, as it transfers compressed binary data blocks. " +
|
||||
"instead of https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format API." +
|
||||
"Binary export/import API protocol implies less network and resource usage, as it transfers compressed binary data blocks." +
|
||||
"Non-binary export/import API is less efficient, but supports deduplication if it is configured on vm-native-src-addr side.",
|
||||
Value: false,
|
||||
},
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
@@ -40,7 +37,7 @@ func newInfluxProcessor(ic *influx.Client, im *vm.Importer, cc int, separator st
|
||||
}
|
||||
}
|
||||
|
||||
func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
func (ip *influxProcessor) run() error {
|
||||
series, err := ip.ic.Explore()
|
||||
if err != nil {
|
||||
return fmt.Errorf("explore query failed: %s", err)
|
||||
@@ -50,11 +47,10 @@ func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
}
|
||||
|
||||
question := fmt.Sprintf("Found %d timeseries to import. Continue?", len(series))
|
||||
if !prompt(ctx, question) {
|
||||
if !prompt(question) {
|
||||
return nil
|
||||
}
|
||||
|
||||
influxSeriesTotal.Add(len(series))
|
||||
bar := barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing series"), len(series))
|
||||
if err := barpool.Start(); err != nil {
|
||||
return err
|
||||
@@ -66,18 +62,18 @@ func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
ip.im.ResetStats()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for range ip.cc {
|
||||
wg.Go(func() {
|
||||
wg.Add(ip.cc)
|
||||
for i := 0; i < ip.cc; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for s := range seriesCh {
|
||||
if err := ip.do(s); err != nil {
|
||||
influxErrorsTotal.Inc()
|
||||
errCh <- fmt.Errorf("request failed for %q.%q: %s", s.Measurement, s.Field, err)
|
||||
return
|
||||
}
|
||||
influxSeriesProcessed.Inc()
|
||||
bar.Increment()
|
||||
}
|
||||
})
|
||||
}()
|
||||
}
|
||||
|
||||
// any error breaks the import
|
||||
@@ -86,7 +82,6 @@ func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
case infErr := <-errCh:
|
||||
return fmt.Errorf("influx error: %s", infErr)
|
||||
case vmErr := <-ip.im.Errors():
|
||||
influxErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, ip.isVerbose))
|
||||
case seriesCh <- s:
|
||||
}
|
||||
@@ -99,7 +94,6 @@ func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
// drain import errors channel
|
||||
for vmErr := range ip.im.Errors() {
|
||||
if vmErr.Err != nil {
|
||||
influxErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, ip.isVerbose))
|
||||
}
|
||||
}
|
||||
@@ -174,9 +168,3 @@ func (ip *influxProcessor) do(s *influx.Series) error {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
influxSeriesTotal = metrics.NewCounter(`vmctl_influx_migration_series_total`)
|
||||
influxSeriesProcessed = metrics.NewCounter(`vmctl_influx_migration_series_processed`)
|
||||
influxErrorsTotal = metrics.NewCounter(`vmctl_influx_migration_errors_total`)
|
||||
)
|
||||
|
||||
@@ -4,8 +4,6 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
)
|
||||
|
||||
@@ -47,16 +45,9 @@ func (l *Limiter) Register(dataLen int) {
|
||||
t := timerpool.Get(d)
|
||||
<-t.C
|
||||
timerpool.Put(t)
|
||||
limiterThrottleEventsTotal.Inc()
|
||||
}
|
||||
l.budget += limit
|
||||
l.deadline = time.Now().Add(time.Second)
|
||||
}
|
||||
l.budget -= int64(dataLen)
|
||||
limiterBytesProcessed.Add(dataLen)
|
||||
}
|
||||
|
||||
var (
|
||||
limiterBytesProcessed = metrics.NewCounter(`vmctl_limiter_bytes_processed_total`)
|
||||
limiterThrottleEventsTotal = metrics.NewCounter(`vmctl_limiter_throttle_events_total`)
|
||||
)
|
||||
|
||||
@@ -2,7 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
@@ -20,14 +19,11 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/thanos"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
@@ -45,20 +41,11 @@ func main() {
|
||||
ctx, cancelCtx := context.WithCancel(context.Background())
|
||||
start := time.Now()
|
||||
beforeFn := func(c *cli.Context) error {
|
||||
flag.Parse()
|
||||
logger.Init()
|
||||
isSilent = c.Bool(globalSilent)
|
||||
if c.Bool(globalDisableProgressBar) {
|
||||
barpool.Disable(true)
|
||||
}
|
||||
netutil.EnableIPv6()
|
||||
pushmetrics.InitWith(&pushmetrics.Config{
|
||||
URLs: c.StringSlice(globalPushMetricsURL),
|
||||
Interval: c.Duration(globalPushMetricsInterval),
|
||||
ExtraLabels: c.StringSlice(globalPushExtraLabels),
|
||||
DisableCompression: c.Bool(globalPushDisableCompression),
|
||||
Headers: c.StringSlice(globalPushHeaders),
|
||||
})
|
||||
return nil
|
||||
}
|
||||
app := &cli.App{
|
||||
@@ -116,7 +103,7 @@ func main() {
|
||||
}
|
||||
|
||||
otsdbProcessor := newOtsdbProcessor(otsdbClient, importer, c.Int(otsdbConcurrency), c.Bool(globalVerbose))
|
||||
return otsdbProcessor.run(ctx)
|
||||
return otsdbProcessor.run()
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -177,7 +164,7 @@ func main() {
|
||||
c.Bool(influxSkipDatabaseLabel),
|
||||
c.Bool(influxPrometheusMode),
|
||||
c.Bool(globalVerbose))
|
||||
return processor.run(ctx)
|
||||
return processor.run()
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -286,67 +273,13 @@ func main() {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create prometheus client: %s", err)
|
||||
}
|
||||
|
||||
pp := prometheusProcessor{
|
||||
cl: cl,
|
||||
im: importer,
|
||||
cc: c.Int(promConcurrency),
|
||||
isVerbose: c.Bool(globalVerbose),
|
||||
}
|
||||
return pp.run(ctx)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "thanos",
|
||||
Usage: "Migrate time series from Thanos blocks (supports raw and downsampled data)",
|
||||
Flags: mergeFlags(globalFlags, thanosFlags, vmFlags),
|
||||
Before: beforeFn,
|
||||
Action: func(c *cli.Context) error {
|
||||
fmt.Println("Thanos import mode")
|
||||
|
||||
vmCfg, err := initConfigVM(c)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to init VM configuration: %s", err)
|
||||
}
|
||||
|
||||
importer, err = vm.NewImporter(ctx, vmCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create VM importer: %s", err)
|
||||
}
|
||||
|
||||
thanosCfg := thanos.Config{
|
||||
Snapshot: c.String(thanosSnapshot),
|
||||
Filter: thanos.Filter{
|
||||
TimeMin: c.String(thanosFilterTimeStart),
|
||||
TimeMax: c.String(thanosFilterTimeEnd),
|
||||
Label: c.String(thanosFilterLabel),
|
||||
LabelValue: c.String(thanosFilterLabelValue),
|
||||
},
|
||||
}
|
||||
cl, err := thanos.NewClient(thanosCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create thanos client: %s", err)
|
||||
}
|
||||
|
||||
var aggrTypes []thanos.AggrType
|
||||
if aggrTypesStr := c.StringSlice(thanosAggrTypes); len(aggrTypesStr) > 0 {
|
||||
for _, typeStr := range aggrTypesStr {
|
||||
aggrType, err := thanos.ParseAggrType(typeStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse aggregate type %q: %s", typeStr, err)
|
||||
}
|
||||
aggrTypes = append(aggrTypes, aggrType)
|
||||
}
|
||||
}
|
||||
|
||||
tp := thanosProcessor{
|
||||
cl: cl,
|
||||
im: importer,
|
||||
cc: c.Int(thanosConcurrency),
|
||||
isVerbose: c.Bool(globalVerbose),
|
||||
aggrTypes: aggrTypes,
|
||||
}
|
||||
return tp.run(ctx)
|
||||
return pp.run()
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -518,7 +451,6 @@ func main() {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
log.Printf("Total time: %v", time.Since(start))
|
||||
pushmetrics.StopAndPush()
|
||||
}
|
||||
|
||||
func initConfigVM(c *cli.Context) (vm.Config, error) {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user