mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-17 16:59:40 +03:00
Compare commits
1 Commits
v1.135.0
...
weakpointe
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e9261be945 |
2
.github/ISSUE_TEMPLATE/question.yml
vendored
2
.github/ISSUE_TEMPLATE/question.yml
vendored
@@ -5,7 +5,7 @@ body:
|
||||
- type: textarea
|
||||
id: describe-the-component
|
||||
attributes:
|
||||
label: Is your question related to a specific component?
|
||||
label: Is your question request related to a specific component?
|
||||
placeholder: |
|
||||
VictoriaMetrics, vmagent, vmalert, vmui, etc...
|
||||
validations:
|
||||
|
||||
48
.github/scripts/lint-changelog-tip.sh
vendored
48
.github/scripts/lint-changelog-tip.sh
vendored
@@ -1,48 +0,0 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
set -e
|
||||
|
||||
CHANGELOG_FILE="docs/victoriametrics/changelog/CHANGELOG.md"
|
||||
|
||||
GITHUB_BASE_REF=${GITHUB_BASE_REF:-"master"}
|
||||
GIT_REMOTE=${GIT_REMOTE:-"origin"}
|
||||
|
||||
git diff "${GIT_REMOTE}/${GITHUB_BASE_REF}"...HEAD -- $CHANGELOG_FILE > diff.txt
|
||||
if ! grep -q "^+" diff.txt; then
|
||||
echo "No additions in CHANGELOG.md"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
ADDED_LINES=$(grep "^+\S" diff.txt | sed 's/^+//')
|
||||
|
||||
START_TIP=$(grep -n "^## tip" "$CHANGELOG_FILE" | head -1 | cut -d: -f1)
|
||||
if [ -z "$START_TIP" ]; then
|
||||
echo "ERROR: ${CHANGELOG_FILE} does not contain a ## tip section"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
END_TIP=$(awk "NR>$START_TIP && /^## / {print NR; exit}" "${CHANGELOG_FILE}")
|
||||
if [ -z "$END_TIP" ]; then
|
||||
END_TIP=$(wc -l < "$CHANGELOG_FILE")
|
||||
fi
|
||||
|
||||
BAD=0
|
||||
while IFS= read -r line; do
|
||||
# Grep exact line inside the file and get line numbers
|
||||
MATCHES=$(grep -n -F "$line" "$CHANGELOG_FILE" | cut -d: -f1)
|
||||
for m in $MATCHES; do
|
||||
if [ "$m" -lt "$START_TIP" ] || [ "$m" -gt "$END_TIP" ]; then
|
||||
echo "'$line' on line ${m} is outside ## tip section (lines ${START_TIP}-${END_TIP})"
|
||||
BAD=1
|
||||
fi
|
||||
done
|
||||
done << EOF
|
||||
$ADDED_LINES
|
||||
EOF
|
||||
|
||||
if [ "$BAD" -ne 0 ]; then
|
||||
echo "CHANGELOG modifications must be placed inside the ## tip section."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "CHANGELOG modifications are valid."
|
||||
6
.github/workflows/build.yml
vendored
6
.github/workflows/build.yml
vendored
@@ -47,8 +47,6 @@ jobs:
|
||||
arch: arm
|
||||
- os: linux
|
||||
arch: ppc64le
|
||||
- os: linux
|
||||
arch: s390x
|
||||
- os: darwin
|
||||
arch: amd64
|
||||
- os: darwin
|
||||
@@ -61,11 +59,11 @@ jobs:
|
||||
arch: amd64
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v6
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
cache-dependency-path: |
|
||||
go.sum
|
||||
|
||||
19
.github/workflows/changelog-linter.yml
vendored
19
.github/workflows/changelog-linter.yml
vendored
@@ -1,19 +0,0 @@
|
||||
name: 'changelog-linter'
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- "docs/victoriametrics/changelog/CHANGELOG.md"
|
||||
|
||||
jobs:
|
||||
tip-lint:
|
||||
runs-on: 'ubuntu-latest'
|
||||
steps:
|
||||
- uses: 'actions/checkout@v6'
|
||||
with:
|
||||
# needed for proper diff
|
||||
fetch-depth: 0
|
||||
|
||||
- name: 'Validate that changelog changes are under ## tip'
|
||||
run: |
|
||||
GITHUB_BASE_REF=${{ github.base_ref }} ./.github/scripts/lint-changelog-tip.sh
|
||||
37
.github/workflows/check-commit-signed.yml
vendored
37
.github/workflows/check-commit-signed.yml
vendored
@@ -1,37 +0,0 @@
|
||||
name: check-commit-signed
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
check-commit-signed:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0 # we need full history for commit verification
|
||||
|
||||
- name: Check commit signatures
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" != "pull_request" ]; then
|
||||
echo "Not a PR event, skipping signature check"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
RANGE="${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
|
||||
echo "Checking commits in PR range: $RANGE"
|
||||
|
||||
if [ -z "$(git rev-list $RANGE)" ]; then
|
||||
echo "No new commits in this PR, skipping signature check"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
unsigned=$(git log --pretty="%H %G?" $RANGE | grep -vE " (G|E)$" || true)
|
||||
if [ -n "$unsigned" ]; then
|
||||
echo "Found unsigned commits:"
|
||||
echo "$unsigned"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "All commits in PR are signed (G or E)"
|
||||
2
.github/workflows/check-licenses.yml
vendored
2
.github/workflows/check-licenses.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v6
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
10
.github/workflows/codeql-analysis-go.yml
vendored
10
.github/workflows/codeql-analysis-go.yml
vendored
@@ -29,11 +29,11 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Go
|
||||
id: go
|
||||
uses: actions/setup-go@v6
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
cache: false
|
||||
go-version: stable
|
||||
@@ -49,14 +49,14 @@ jobs:
|
||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v4
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: go
|
||||
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v4
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v4
|
||||
uses: github/codeql-action/analyze@v3
|
||||
with:
|
||||
category: 'language:go'
|
||||
|
||||
4
.github/workflows/docs.yaml
vendored
4
.github/workflows/docs.yaml
vendored
@@ -16,12 +16,12 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
path: __vm
|
||||
|
||||
- name: Checkout private code
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
repository: VictoriaMetrics/vmdocs
|
||||
token: ${{ secrets.VM_BOT_GH_TOKEN }}
|
||||
|
||||
12
.github/workflows/test.yml
vendored
12
.github/workflows/test.yml
vendored
@@ -32,11 +32,11 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v6
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
cache-dependency-path: |
|
||||
go.sum
|
||||
@@ -71,11 +71,11 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v6
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
cache-dependency-path: |
|
||||
go.sum
|
||||
@@ -97,11 +97,11 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v6
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
cache-dependency-path: |
|
||||
go.sum
|
||||
|
||||
28
.github/workflows/vmui.yml
vendored
28
.github/workflows/vmui.yml
vendored
@@ -32,41 +32,35 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v6
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Cache node_modules
|
||||
id: cache
|
||||
uses: actions/cache@v5
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
path: app/vmui/packages/vmui/node_modules
|
||||
key: vmui-deps-${{ runner.os }}-${{ hashFiles('app/vmui/packages/vmui/package-lock.json', 'app/vmui/Dockerfile-build') }}
|
||||
restore-keys: |
|
||||
vmui-deps-${{ runner.os }}-
|
||||
node-version: '24.x'
|
||||
|
||||
- name: Install dependencies
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
run: make vmui-install
|
||||
- name: Cache node-modules
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
app/vmui/packages/vmui/node_modules
|
||||
key: vmui-artifacts-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
||||
restore-keys: vmui-artifacts-${{ runner.os }}-
|
||||
|
||||
- name: Run lint
|
||||
id: lint
|
||||
run: make vmui-lint
|
||||
continue-on-error: true
|
||||
env:
|
||||
VMUI_SKIP_INSTALL: true
|
||||
|
||||
- name: Run tests
|
||||
id: test
|
||||
run: make vmui-test
|
||||
continue-on-error: true
|
||||
env:
|
||||
VMUI_SKIP_INSTALL: true
|
||||
|
||||
- name: Run typecheck
|
||||
id: typecheck
|
||||
run: make vmui-typecheck
|
||||
continue-on-error: true
|
||||
env:
|
||||
VMUI_SKIP_INSTALL: true
|
||||
|
||||
- name: Annotate Code Linting Results
|
||||
uses: ataylorme/eslint-annotate-action@v3
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -175,7 +175,7 @@
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2019-2026 VictoriaMetrics, Inc.
|
||||
Copyright 2019-2025 VictoriaMetrics, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
42
Makefile
42
Makefile
@@ -17,7 +17,7 @@ EXTRA_GO_BUILD_TAGS ?=
|
||||
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
|
||||
TAR_OWNERSHIP ?= --owner=1000 --group=1000
|
||||
|
||||
GOLANGCI_LINT_VERSION := 2.7.2
|
||||
GOLANGCI_LINT_VERSION := 2.4.0
|
||||
|
||||
.PHONY: $(MAKECMDGOALS)
|
||||
|
||||
@@ -125,15 +125,6 @@ vmutils-linux-ppc64le: \
|
||||
vmrestore-linux-ppc64le \
|
||||
vmctl-linux-ppc64le
|
||||
|
||||
vmutils-linux-s390x: \
|
||||
vmagent-linux-s390x \
|
||||
vmalert-linux-s390x \
|
||||
vmalert-tool-linux-s390x \
|
||||
vmauth-linux-s390x \
|
||||
vmbackup-linux-s390x \
|
||||
vmrestore-linux-s390x \
|
||||
vmctl-linux-s390x
|
||||
|
||||
vmutils-darwin-amd64: \
|
||||
vmagent-darwin-amd64 \
|
||||
vmalert-darwin-amd64 \
|
||||
@@ -266,7 +257,6 @@ release-victoria-metrics: \
|
||||
release-victoria-metrics-linux-amd64 \
|
||||
release-victoria-metrics-linux-arm \
|
||||
release-victoria-metrics-linux-arm64 \
|
||||
release-victoria-metrics-linux-s390x \
|
||||
release-victoria-metrics-darwin-amd64 \
|
||||
release-victoria-metrics-darwin-arm64 \
|
||||
release-victoria-metrics-freebsd-amd64 \
|
||||
@@ -285,9 +275,6 @@ release-victoria-metrics-linux-arm:
|
||||
release-victoria-metrics-linux-arm64:
|
||||
GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch
|
||||
|
||||
release-victoria-metrics-linux-s390x:
|
||||
GOOS=linux GOARCH=s390x $(MAKE) release-victoria-metrics-goos-goarch
|
||||
|
||||
release-victoria-metrics-darwin-amd64:
|
||||
GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
|
||||
|
||||
@@ -327,7 +314,6 @@ release-vmutils: \
|
||||
release-vmutils-linux-amd64 \
|
||||
release-vmutils-linux-arm64 \
|
||||
release-vmutils-linux-arm \
|
||||
release-vmutils-linux-s390x \
|
||||
release-vmutils-darwin-amd64 \
|
||||
release-vmutils-darwin-arm64 \
|
||||
release-vmutils-freebsd-amd64 \
|
||||
@@ -346,9 +332,6 @@ release-vmutils-linux-arm64:
|
||||
release-vmutils-linux-arm:
|
||||
GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch
|
||||
|
||||
release-vmutils-linux-s390x:
|
||||
GOOS=linux GOARCH=s390x $(MAKE) release-vmutils-goos-goarch
|
||||
|
||||
release-vmutils-darwin-amd64:
|
||||
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
|
||||
|
||||
@@ -435,7 +418,7 @@ release-vmutils-windows-goarch: \
|
||||
vmctl-windows-$(GOARCH)-prod.exe
|
||||
|
||||
pprof-cpu:
|
||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics $(PPROF_FILE)
|
||||
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
|
||||
|
||||
fmt:
|
||||
gofmt -l -w -s ./lib
|
||||
@@ -471,23 +454,7 @@ integration-test:
|
||||
|
||||
apptest:
|
||||
$(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
|
||||
go test ./apptest/... -skip="^Test(Cluster|Legacy).*"
|
||||
|
||||
integration-test-legacy: victoria-metrics vmbackup vmrestore
|
||||
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
|
||||
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
|
||||
VERSION=v1.132.0; \
|
||||
VMSINGLE=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}.tar.gz; \
|
||||
VMCLUSTER=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}-cluster.tar.gz; \
|
||||
URL=https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/$${VERSION}; \
|
||||
DIR=/tmp/$${VERSION}; \
|
||||
test -d $${DIR} || (mkdir $${DIR} && \
|
||||
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
|
||||
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
|
||||
); \
|
||||
VM_LEGACY_VMSINGLE_PATH=$${DIR}/victoria-metrics-prod \
|
||||
VM_LEGACY_VMSTORAGE_PATH=$${DIR}/vmstorage-prod \
|
||||
go test ./apptest/tests -run="^TestLegacySingle.*"
|
||||
go test ./apptest/... -skip="^TestCluster.*"
|
||||
|
||||
benchmark:
|
||||
GOEXPERIMENT=synctest go test -bench=. ./lib/...
|
||||
@@ -516,8 +483,7 @@ app-local-windows-goarch:
|
||||
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
quicktemplate-gen: install-qtc
|
||||
qtc -dir=lib
|
||||
qtc -dir=app
|
||||
qtc
|
||||
|
||||
install-qtc:
|
||||
which qtc || go install github.com/valyala/quicktemplate/qtc@latest
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||

|
||||
[](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml)
|
||||
[](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
|
||||

|
||||
|
||||
11
SECURITY.md
11
SECURITY.md
@@ -4,11 +4,12 @@
|
||||
|
||||
The following versions of VictoriaMetrics receive regular security fixes:
|
||||
|
||||
| Version | Supported |
|
||||
|--------------------------------------------------------------------------------|--------------------|
|
||||
| [Latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
|
||||
| [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
|
||||
| other releases | :x: |
|
||||
| Version | Supported |
|
||||
|---------|--------------------|
|
||||
| [latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
|
||||
| v1.102.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
|
||||
| v1.110.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
|
||||
| other releases | :x: |
|
||||
|
||||
See [this page](https://victoriametrics.com/security/) for more details.
|
||||
|
||||
|
||||
@@ -27,9 +27,6 @@ victoria-metrics-linux-ppc64le-prod:
|
||||
victoria-metrics-linux-386-prod:
|
||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386
|
||||
|
||||
victoria-metrics-linux-s390x-prod:
|
||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
victoria-metrics-darwin-amd64-prod:
|
||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -134,7 +134,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>")
|
||||
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
|
||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>")
|
||||
fmt.Fprintf(w, "Useful endpoints:</br>")
|
||||
httpserver.WriteAPIHelp(w, [][2]string{
|
||||
@@ -170,7 +169,7 @@ func usage() {
|
||||
const s = `
|
||||
victoria-metrics is a time series database and monitoring solution.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/victoriametrics/
|
||||
See the docs at https://docs.victoriametrics.com/
|
||||
`
|
||||
flagutil.Usage(s)
|
||||
}
|
||||
|
||||
@@ -10,11 +10,9 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
||||
)
|
||||
|
||||
@@ -29,9 +27,11 @@ var selfScraperWG sync.WaitGroup
|
||||
|
||||
func startSelfScraper() {
|
||||
selfScraperStopCh = make(chan struct{})
|
||||
selfScraperWG.Go(func() {
|
||||
selfScraperWG.Add(1)
|
||||
go func() {
|
||||
defer selfScraperWG.Done()
|
||||
selfScraper(*selfScrapeInterval)
|
||||
})
|
||||
}()
|
||||
}
|
||||
|
||||
func stopSelfScraper() {
|
||||
@@ -48,7 +48,6 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
|
||||
var bb bytesutil.ByteBuffer
|
||||
var rows prometheus.Rows
|
||||
var metadataRows prometheus.MetadataRows
|
||||
var mrs []storage.MetricRow
|
||||
var labels []prompb.Label
|
||||
t := time.NewTicker(scrapeInterval)
|
||||
@@ -58,12 +57,8 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
appmetrics.WritePrometheusMetrics(&bb)
|
||||
s := bytesutil.ToUnsafeString(bb.B)
|
||||
rows.Reset()
|
||||
// Parse metrics and optionally metadata when enabled
|
||||
if prommetadata.IsEnabled() {
|
||||
rows, metadataRows = prometheus.UnmarshalWithMetadata(rows, metadataRows, s, nil)
|
||||
} else {
|
||||
rows.UnmarshalWithErrLogger(s, nil)
|
||||
}
|
||||
// VictoriaMetrics components don't expose metadata yet, only need to parse samples
|
||||
rows.UnmarshalWithErrLogger(s, nil)
|
||||
mrs = mrs[:0]
|
||||
for i := range rows.Rows {
|
||||
r := &rows.Rows[i]
|
||||
@@ -96,19 +91,6 @@ func selfScraper(scrapeInterval time.Duration) {
|
||||
if err := vmstorage.AddRows(mrs); err != nil {
|
||||
logger.Errorf("cannot store self-scraped metrics: %s", err)
|
||||
}
|
||||
if len(metadataRows.Rows) > 0 {
|
||||
mms := make([]metricsmetadata.Row, 0, len(metadataRows.Rows))
|
||||
for _, mm := range metadataRows.Rows {
|
||||
mms = append(mms, metricsmetadata.Row{
|
||||
MetricFamilyName: bytesutil.ToUnsafeBytes(mm.Metric),
|
||||
Help: bytesutil.ToUnsafeBytes(mm.Help),
|
||||
Type: mm.Type,
|
||||
})
|
||||
}
|
||||
if err := vmstorage.AddMetadataRows(mms); err != nil {
|
||||
logger.Errorf("cannot store self-scraped metrics metadata: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for {
|
||||
select {
|
||||
|
||||
@@ -27,9 +27,6 @@ vmagent-linux-ppc64le-prod:
|
||||
vmagent-linux-386-prod:
|
||||
APP_NAME=vmagent $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmagent-linux-s390x-prod:
|
||||
APP_NAME=vmagent $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmagent-darwin-amd64-prod:
|
||||
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -27,7 +27,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/zabbixconnector"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
@@ -75,7 +74,7 @@ var (
|
||||
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
|
||||
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
|
||||
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config and /remotewrite-.*-config pages. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config page. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
|
||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
|
||||
@@ -245,7 +244,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprintf(w, "<h2>vmagent</h2>")
|
||||
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
|
||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
|
||||
fmt.Fprintf(w, "Useful endpoints:</br>")
|
||||
httpserver.WriteAPIHelp(w, [][2]string{
|
||||
@@ -254,8 +252,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
{"metric-relabel-debug", "debug metric relabeling"},
|
||||
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
||||
{"config", "-promscrape.config contents"},
|
||||
{"remotewrite-relabel-config", "-remoteWrite.relabelConfig contents"},
|
||||
{"remotewrite-url-relabel-config", "-remoteWrite.urlRelabelConfig contents"},
|
||||
{"metrics", "available service metrics"},
|
||||
{"flags", "command-line flags"},
|
||||
{"-/reload", "reload configuration"},
|
||||
@@ -352,17 +348,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
firehose.WriteSuccessResponse(w, r)
|
||||
return true
|
||||
case "/zabbixconnector/api/v1/history":
|
||||
zabbixconnectorHistoryRequests.Inc()
|
||||
if err := zabbixconnector.InsertHandlerForHTTP(nil, r); err != nil {
|
||||
zabbixconnectorHistoryErrors.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
fmt.Fprintf(w, `{"error":%q}`, err.Error())
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
case "/newrelic":
|
||||
newrelicCheckRequest.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -492,42 +477,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
promscrape.WriteConfigData(&bb)
|
||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
||||
return true
|
||||
case "/remotewrite-relabel-config":
|
||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
||||
return true
|
||||
}
|
||||
remoteWriteRelabelConfigRequests.Inc()
|
||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
remotewrite.WriteRelabelConfigData(w)
|
||||
return true
|
||||
case "/api/v1/status/remotewrite-relabel-config":
|
||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
||||
return true
|
||||
}
|
||||
remoteWriteStatusRelabelConfigRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
var bb bytesutil.ByteBuffer
|
||||
remotewrite.WriteRelabelConfigData(&bb)
|
||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
||||
return true
|
||||
case "/remotewrite-url-relabel-config":
|
||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
||||
return true
|
||||
}
|
||||
remoteWriteURLRelabelConfigRequests.Inc()
|
||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
remotewrite.WriteURLRelabelConfigData(w)
|
||||
return true
|
||||
case "/api/v1/status/remotewrite-url-relabel-config":
|
||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
||||
return true
|
||||
}
|
||||
remoteWriteStatusURLRelabelConfigRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
var bb bytesutil.ByteBuffer
|
||||
remotewrite.WriteURLRelabelConfigData(&bb)
|
||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
||||
return true
|
||||
case "/prometheus/-/reload", "/-/reload":
|
||||
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
|
||||
return true
|
||||
@@ -657,17 +606,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
|
||||
}
|
||||
firehose.WriteSuccessResponse(w, r)
|
||||
return true
|
||||
case "zabbixconnector/api/v1/history":
|
||||
zabbixconnectorHistoryRequests.Inc()
|
||||
if err := zabbixconnector.InsertHandlerForHTTP(at, r); err != nil {
|
||||
zabbixconnectorHistoryErrors.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
fmt.Fprintf(w, `{"error":%q}`, err.Error())
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return true
|
||||
case "newrelic":
|
||||
newrelicCheckRequest.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -789,9 +727,6 @@ var (
|
||||
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
||||
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
|
||||
|
||||
zabbixconnectorHistoryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
|
||||
zabbixconnectorHistoryErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
|
||||
|
||||
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
||||
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
|
||||
|
||||
@@ -812,12 +747,6 @@ var (
|
||||
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
|
||||
promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`)
|
||||
|
||||
remoteWriteRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-relabel-config"}`)
|
||||
remoteWriteStatusRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-relabel-config"}`)
|
||||
|
||||
remoteWriteURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-url-relabel-config"}`)
|
||||
remoteWriteStatusURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-url-relabel-config"}`)
|
||||
|
||||
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
|
||||
)
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompb.Label)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(samplesCount)
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(samplesCount)
|
||||
}
|
||||
|
||||
@@ -2,14 +2,13 @@ package opentelemetry
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
@@ -25,13 +24,6 @@ var (
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForReader processes metrics from given reader.
|
||||
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
|
||||
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
|
||||
return insertRows(at, tss, mms, nil)
|
||||
})
|
||||
}
|
||||
|
||||
// InsertHandler processes opentelemetry metrics.
|
||||
func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
@@ -76,7 +68,7 @@ func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMeta
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
|
||||
var metadataTotal int
|
||||
if prommetadata.IsEnabled() {
|
||||
if promscrape.IsMetadataEnabled() {
|
||||
var accountID, projectID uint32
|
||||
if at != nil {
|
||||
accountID = at.AccountID
|
||||
|
||||
@@ -7,8 +7,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
@@ -36,7 +36,7 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
return err
|
||||
}
|
||||
encoding := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, defaultTimestamp, encoding, true, prommetadata.IsEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
|
||||
return stream.Parse(req.Body, defaultTimestamp, encoding, true, promscrape.IsMetadataEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
|
||||
return insertRows(at, rows, mms, extraLabels)
|
||||
}, func(s string) {
|
||||
httpserver.LogError(req, s)
|
||||
|
||||
@@ -6,8 +6,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
@@ -71,7 +71,7 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
|
||||
var metadataTotal int
|
||||
if prommetadata.IsEnabled() {
|
||||
if promscrape.IsMetadataEnabled() {
|
||||
var accountID, projectID uint32
|
||||
if at != nil {
|
||||
accountID = at.AccountID
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
@@ -202,10 +203,14 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
|
||||
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
|
||||
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
|
||||
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
|
||||
return float64(concurrency)
|
||||
return float64(*queues)
|
||||
})
|
||||
for range concurrency {
|
||||
c.wg.Go(c.runWorker)
|
||||
for i := 0; i < concurrency; i++ {
|
||||
c.wg.Add(1)
|
||||
go func() {
|
||||
defer c.wg.Done()
|
||||
c.runWorker()
|
||||
}()
|
||||
}
|
||||
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
|
||||
}
|
||||
@@ -458,6 +463,12 @@ again:
|
||||
// - Real-world implementations of v1 use both 400 and 415 status codes.
|
||||
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
|
||||
case 415, 400:
|
||||
if c.canDowngradeVMProto.Swap(false) {
|
||||
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
|
||||
c.useVMProto.Store(false)
|
||||
}
|
||||
|
||||
if encoding.IsZstd(block) {
|
||||
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
|
||||
@@ -549,9 +560,9 @@ func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.D
|
||||
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
|
||||
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
|
||||
plainBlock := make([]byte, 0, len(zstdBlock)*2)
|
||||
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock)
|
||||
plainBlock, err := zstd.Decompress(plainBlock, zstdBlock)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("zstd: decompress: %s", err)
|
||||
}
|
||||
|
||||
return snappy.Encode(nil, plainBlock), nil
|
||||
|
||||
@@ -93,7 +93,10 @@ func TestParseRetryAfterHeader(t *testing.T) {
|
||||
|
||||
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
|
||||
func helper(d time.Duration) time.Duration {
|
||||
dv := min(d/10, 10*time.Second)
|
||||
dv := d / 10
|
||||
if dv > 10*time.Second {
|
||||
dv = 10 * time.Second
|
||||
}
|
||||
|
||||
return d + dv
|
||||
}
|
||||
|
||||
@@ -48,7 +48,11 @@ func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite *atomic.Boo
|
||||
ps.wr.significantFigures = significantFigures
|
||||
ps.wr.roundDigits = roundDigits
|
||||
ps.stopCh = make(chan struct{})
|
||||
ps.periodicFlusherWG.Go(ps.periodicFlusher)
|
||||
ps.periodicFlusherWG.Add(1)
|
||||
go func() {
|
||||
defer ps.periodicFlusherWG.Done()
|
||||
ps.periodicFlusher()
|
||||
}()
|
||||
return &ps
|
||||
}
|
||||
|
||||
|
||||
@@ -3,20 +3,17 @@ package remotewrite
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -35,12 +32,9 @@ var (
|
||||
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
|
||||
)
|
||||
|
||||
var labelsGlobal []prompb.Label
|
||||
|
||||
var (
|
||||
labelsGlobal []prompb.Label
|
||||
|
||||
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
|
||||
remoteWriteURLRelabelConfigData atomic.Pointer[[]interface{}]
|
||||
|
||||
relabelConfigReloads *metrics.Counter
|
||||
relabelConfigReloadErrors *metrics.Counter
|
||||
relabelConfigSuccess *metrics.Gauge
|
||||
@@ -73,42 +67,6 @@ func initRelabelConfigs() {
|
||||
}
|
||||
}
|
||||
|
||||
// WriteRelabelConfigData writes -remoteWrite.relabelConfig contents to w
|
||||
func WriteRelabelConfigData(w io.Writer) {
|
||||
p := remoteWriteRelabelConfigData.Load()
|
||||
if p == nil {
|
||||
// Nothing to write to w
|
||||
return
|
||||
}
|
||||
_, _ = w.Write(*p)
|
||||
}
|
||||
|
||||
// WriteURLRelabelConfigData writes -remoteWrite.urlRelabelConfig contents to w
|
||||
func WriteURLRelabelConfigData(w io.Writer) {
|
||||
p := remoteWriteURLRelabelConfigData.Load()
|
||||
if p == nil {
|
||||
// Nothing to write to w
|
||||
return
|
||||
}
|
||||
type urlRelabelCfg struct {
|
||||
Url string `yaml:"url"`
|
||||
RelabelConfig interface{} `yaml:"relabel_config"`
|
||||
}
|
||||
var cs []urlRelabelCfg
|
||||
for i, url := range *remoteWriteURLs {
|
||||
cfgData := (*p)[i]
|
||||
if !*showRemoteWriteURL {
|
||||
url = fmt.Sprintf("%d:secret-url", i+1)
|
||||
}
|
||||
cs = append(cs, urlRelabelCfg{
|
||||
Url: url,
|
||||
RelabelConfig: cfgData,
|
||||
})
|
||||
}
|
||||
d, _ := yaml.Marshal(cs)
|
||||
_, _ = w.Write(d)
|
||||
}
|
||||
|
||||
func reloadRelabelConfigs() {
|
||||
rcs := allRelabelConfigs.Load()
|
||||
if !rcs.isSet() {
|
||||
@@ -132,43 +90,28 @@ func reloadRelabelConfigs() {
|
||||
func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
var rcs relabelConfigs
|
||||
if *relabelConfigPathGlobal != "" {
|
||||
global, rawCfg, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
|
||||
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
|
||||
}
|
||||
remoteWriteRelabelConfigData.Store(&rawCfg)
|
||||
rcs.global = global
|
||||
}
|
||||
|
||||
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
|
||||
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
|
||||
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
|
||||
}
|
||||
|
||||
var urlRelabelCfgs []interface{}
|
||||
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
|
||||
for i, path := range *relabelConfigPaths {
|
||||
if len(path) == 0 {
|
||||
urlRelabelCfgs = append(urlRelabelCfgs, nil)
|
||||
// Skip empty relabel config.
|
||||
continue
|
||||
}
|
||||
prc, rawCfg, err := promrelabel.LoadRelabelConfigs(path)
|
||||
prc, err := promrelabel.LoadRelabelConfigs(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
|
||||
}
|
||||
rcs.perURL[i] = prc
|
||||
|
||||
var parsedCfg interface{}
|
||||
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
|
||||
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
|
||||
}
|
||||
if len(*remoteWriteURLs) > len(*relabelConfigPaths) {
|
||||
// fill the urlRelabelCfgs with empty relabel configs if not set
|
||||
for i := len(*relabelConfigPaths); i < len(*remoteWriteURLs); i++ {
|
||||
urlRelabelCfgs = append(urlRelabelCfgs, nil)
|
||||
}
|
||||
}
|
||||
remoteWriteURLRelabelConfigData.Store(&urlRelabelCfgs)
|
||||
return &rcs, nil
|
||||
}
|
||||
|
||||
@@ -177,9 +120,19 @@ type relabelConfigs struct {
|
||||
perURL []*promrelabel.ParsedConfigs
|
||||
}
|
||||
|
||||
// isSet indicates whether (global or per-URL) command-line flags is set
|
||||
func (rcs *relabelConfigs) isSet() bool {
|
||||
return *relabelConfigPathGlobal != "" || len(*relabelConfigPaths) > 0
|
||||
if rcs == nil {
|
||||
return false
|
||||
}
|
||||
if rcs.global.Len() > 0 {
|
||||
return true
|
||||
}
|
||||
for _, pc := range rcs.perURL {
|
||||
if pc.Len() > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// initLabelsGlobal must be called after parsing command-line flags.
|
||||
|
||||
@@ -27,7 +27,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -59,7 +58,7 @@ var (
|
||||
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
|
||||
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
||||
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
|
||||
queues = flagutil.NewArrayInt("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
"isn't enough for sending high volume of collected data to remote storage. "+
|
||||
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
|
||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||
@@ -176,6 +175,13 @@ func Init() {
|
||||
})
|
||||
}
|
||||
|
||||
if *queues > maxQueues {
|
||||
*queues = maxQueues
|
||||
}
|
||||
if *queues <= 0 {
|
||||
*queues = 1
|
||||
}
|
||||
|
||||
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
|
||||
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
|
||||
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
|
||||
@@ -208,7 +214,9 @@ func Init() {
|
||||
dropDanglingQueues()
|
||||
|
||||
// Start config reloader.
|
||||
configReloaderWG.Go(func() {
|
||||
configReloaderWG.Add(1)
|
||||
go func() {
|
||||
defer configReloaderWG.Done()
|
||||
for {
|
||||
select {
|
||||
case <-configReloaderStopCh:
|
||||
@@ -218,7 +226,7 @@ func Init() {
|
||||
reloadRelabelConfigs()
|
||||
reloadStreamAggrConfigs()
|
||||
}
|
||||
})
|
||||
}()
|
||||
}
|
||||
|
||||
func dropDanglingQueues() {
|
||||
@@ -258,6 +266,17 @@ func initRemoteWriteCtxs(urls []string) {
|
||||
if len(urls) == 0 {
|
||||
logger.Panicf("BUG: urls must be non-empty")
|
||||
}
|
||||
|
||||
maxInmemoryBlocks := memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
|
||||
if maxInmemoryBlocks / *queues > 100 {
|
||||
// There is no much sense in keeping higher number of blocks in memory,
|
||||
// since this means that the producer outperforms consumer and the queue
|
||||
// will continue growing. It is better storing the queue to file.
|
||||
maxInmemoryBlocks = 100 * *queues
|
||||
}
|
||||
if maxInmemoryBlocks < 2 {
|
||||
maxInmemoryBlocks = 2
|
||||
}
|
||||
rwctxs := make([]*remoteWriteCtx, len(urls))
|
||||
rwctxIdx := make([]int, len(urls))
|
||||
if retryMaxTime.String() != "" {
|
||||
@@ -272,7 +291,7 @@ func initRemoteWriteCtxs(urls []string) {
|
||||
if *showRemoteWriteURL {
|
||||
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
|
||||
}
|
||||
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, sanitizedURL)
|
||||
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
|
||||
rwctxIdx[i] = i
|
||||
}
|
||||
|
||||
@@ -466,9 +485,6 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
|
||||
matchIdxs.B = sas.Push(tssBlock, matchIdxs.B)
|
||||
if !*streamAggrGlobalKeepInput {
|
||||
tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput)
|
||||
} else if *streamAggrGlobalDropInput {
|
||||
// if both keep_input and drop_input are true, we keep only the aggregated series
|
||||
tssBlock = dropUnaggregatedSeries(tssBlock, matchIdxs.B)
|
||||
}
|
||||
matchIdxsPool.Put(matchIdxs)
|
||||
}
|
||||
@@ -538,9 +554,11 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
|
||||
// Push metadata to remote storage systems in parallel to reduce
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(rwctxs))
|
||||
var anyPushFailed atomic.Bool
|
||||
for _, rwctx := range rwctxs {
|
||||
wg.Go(func() {
|
||||
go func(rwctx *remoteWriteCtx) {
|
||||
defer wg.Done()
|
||||
if !rwctx.tryPushMetadataInternal(mms) {
|
||||
rwctx.pushFailures.Inc()
|
||||
if forceDropSamplesOnFailure {
|
||||
@@ -549,7 +567,7 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
|
||||
}
|
||||
anyPushFailed.Store(true)
|
||||
}
|
||||
})
|
||||
}(rwctx)
|
||||
}
|
||||
wg.Wait()
|
||||
return !anyPushFailed.Load()
|
||||
@@ -581,13 +599,15 @@ func tryPushTimeSeriesToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prom
|
||||
// Push tssBlock to remote storage systems in parallel to reduce
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(rwctxs))
|
||||
var anyPushFailed atomic.Bool
|
||||
for _, rwctx := range rwctxs {
|
||||
wg.Go(func() {
|
||||
go func(rwctx *remoteWriteCtx) {
|
||||
defer wg.Done()
|
||||
if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) {
|
||||
anyPushFailed.Store(true)
|
||||
}
|
||||
})
|
||||
}(rwctx)
|
||||
}
|
||||
wg.Wait()
|
||||
return !anyPushFailed.Load()
|
||||
@@ -609,11 +629,13 @@ func tryShardingTimeSeriesAmongRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock
|
||||
if len(shard) == 0 {
|
||||
continue
|
||||
}
|
||||
wg.Go(func() {
|
||||
if !rwctx.TryPushTimeSeries(shard, forceDropSamplesOnFailure) {
|
||||
wg.Add(1)
|
||||
go func(rwctx *remoteWriteCtx, tss []prompb.TimeSeries) {
|
||||
defer wg.Done()
|
||||
if !rwctx.TryPushTimeSeries(tss, forceDropSamplesOnFailure) {
|
||||
anyPushFailed.Store(true)
|
||||
}
|
||||
})
|
||||
}(rwctx, shard)
|
||||
}
|
||||
wg.Wait()
|
||||
return !anyPushFailed.Load()
|
||||
@@ -822,7 +844,7 @@ type remoteWriteCtx struct {
|
||||
rowsDroppedOnPushFailure *metrics.Counter
|
||||
}
|
||||
|
||||
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string) *remoteWriteCtx {
|
||||
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
|
||||
// strip query params, otherwise changing params resets pq
|
||||
pqURL := *remoteWriteURL
|
||||
pqURL.RawQuery = ""
|
||||
@@ -837,23 +859,6 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
}
|
||||
|
||||
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
|
||||
queuesSize := queues.GetOptionalArg(argIdx)
|
||||
if queuesSize > maxQueues {
|
||||
queuesSize = maxQueues
|
||||
} else if queuesSize <= 0 {
|
||||
queuesSize = 1
|
||||
}
|
||||
|
||||
maxInmemoryBlocks := memory.Allowed() / len(*remoteWriteURLs) / *maxRowsPerBlock / 100
|
||||
if maxInmemoryBlocks/queuesSize > 100 {
|
||||
// There is no much sense in keeping higher number of blocks in memory,
|
||||
// since this means that the producer outperforms consumer and the queue
|
||||
// will continue growing. It is better storing the queue to file.
|
||||
maxInmemoryBlocks = 100 * queuesSize
|
||||
}
|
||||
if maxInmemoryBlocks < 2 {
|
||||
maxInmemoryBlocks = 2
|
||||
}
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
@@ -871,16 +876,16 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
var c *client
|
||||
switch remoteWriteURL.Scheme {
|
||||
case "http", "https":
|
||||
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, queuesSize)
|
||||
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
|
||||
default:
|
||||
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
|
||||
}
|
||||
c.init(argIdx, queuesSize, sanitizedURL)
|
||||
c.init(argIdx, *queues, sanitizedURL)
|
||||
|
||||
// Initialize pss
|
||||
sf := significantFigures.GetOptionalArg(argIdx)
|
||||
rd := roundDigits.GetOptionalArg(argIdx)
|
||||
pssLen := queuesSize
|
||||
pssLen := *queues
|
||||
if n := cgroup.AvailableCPUs(); pssLen > n {
|
||||
// There is no sense in running more than availableCPUs concurrent pendingSeries,
|
||||
// since every pendingSeries can saturate up to a single CPU.
|
||||
@@ -983,17 +988,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
||||
tss = append(*v, tss...)
|
||||
}
|
||||
tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput)
|
||||
} else if rwctx.streamAggrDropInput {
|
||||
// if both keep_input and drop_input are true, we keep only the aggregated series
|
||||
if rctx == nil {
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss before dropping aggregated series
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
}
|
||||
tss = dropUnaggregatedSeries(tss, matchIdxs.B)
|
||||
}
|
||||
|
||||
matchIdxsPool.Put(matchIdxs)
|
||||
}
|
||||
if rwctx.deduplicator != nil {
|
||||
@@ -1016,10 +1011,9 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
||||
return false
|
||||
}
|
||||
|
||||
var matchIdxsPool slicesutil.BufferPool[uint32]
|
||||
var matchIdxsPool bytesutil.ByteBufferPool
|
||||
|
||||
// dropAggregatedSeries drops matched series, also the unmatched if dropInput is true.
|
||||
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput bool) []prompb.TimeSeries {
|
||||
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []byte, dropInput bool) []prompb.TimeSeries {
|
||||
dst := src[:0]
|
||||
if !dropInput {
|
||||
for i, match := range matchIdxs {
|
||||
@@ -1034,20 +1028,6 @@ func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput
|
||||
return dst
|
||||
}
|
||||
|
||||
// dropUnaggregatedSeries drops unmatched series.
|
||||
func dropUnaggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32) []prompb.TimeSeries {
|
||||
dst := src[:0]
|
||||
for i, match := range matchIdxs {
|
||||
if match == 0 {
|
||||
continue
|
||||
}
|
||||
dst = append(dst, src[i])
|
||||
}
|
||||
tail := src[len(dst):]
|
||||
clear(tail)
|
||||
return dst
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) {
|
||||
if rwctx.tryPushTimeSeriesInternal(tss) {
|
||||
return
|
||||
|
||||
@@ -10,8 +10,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
@@ -59,8 +57,8 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
|
||||
f(10)
|
||||
}
|
||||
|
||||
func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
||||
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string, expectedRowsPushedAfterRelabel, expectedPushedSample int) {
|
||||
func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
|
||||
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string) {
|
||||
t.Helper()
|
||||
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
|
||||
if err != nil {
|
||||
@@ -73,16 +71,10 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
||||
}
|
||||
allRelabelConfigs.Store(rcs)
|
||||
|
||||
path := "fast-queue-write-test"
|
||||
fs.MustRemoveDir(path)
|
||||
fq := persistentqueue.MustOpenFastQueue(path, "test", 100, 0, false)
|
||||
defer fs.MustRemoveDir(path)
|
||||
defer fq.MustClose()
|
||||
|
||||
pss := make([]*pendingSeries, 1)
|
||||
isVMProto := &atomic.Bool{}
|
||||
isVMProto.Store(true)
|
||||
pss[0] = newPendingSeries(fq, isVMProto, 0, 100)
|
||||
pss[0] = newPendingSeries(nil, isVMProto, 0, 100)
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: 0,
|
||||
streamAggrKeepInput: keepInput,
|
||||
@@ -91,8 +83,6 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
||||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
|
||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
|
||||
}
|
||||
defer metrics.UnregisterAllMetrics()
|
||||
|
||||
if dedupInterval > 0 {
|
||||
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
|
||||
}
|
||||
@@ -114,27 +104,23 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
||||
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
|
||||
expectedTss := make([]prompb.TimeSeries, len(inputTss))
|
||||
|
||||
// check inputTss is not modified after TryPushTimeSeries
|
||||
// copy inputTss to make sure it is not mutated during TryPush call
|
||||
copy(expectedTss, inputTss)
|
||||
if !rwctx.TryPushTimeSeries(inputTss, false) {
|
||||
t.Fatalf("cannot push samples to rwctx")
|
||||
}
|
||||
|
||||
if int(rwctx.rowsPushedAfterRelabel.Get()) != expectedRowsPushedAfterRelabel {
|
||||
t.Fatalf("unexpected number of rows after relabel; got %d; want %d", rwctx.rowsPushedAfterRelabel.Get(), expectedRowsPushedAfterRelabel)
|
||||
}
|
||||
|
||||
if len(pss[0].wr.tss) != expectedPushedSample {
|
||||
t.Fatalf("unexpected number of pushed samples; got %d; want %d", len(pss[0].wr.tss), expectedPushedSample)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(expectedTss, inputTss) {
|
||||
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
|
||||
}
|
||||
}
|
||||
|
||||
// relabeling
|
||||
f(``, `
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [sum_samples]
|
||||
- interval: 2m
|
||||
outputs: [count_series]
|
||||
`, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
@@ -143,66 +129,53 @@ metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`, 2, 2)
|
||||
|
||||
// relabeling + aggregation
|
||||
f(`
|
||||
- match: '{env="dev"}'
|
||||
interval: 1m
|
||||
outputs: [sum_samples]
|
||||
`, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: ".*"
|
||||
`, false, 0, false, false, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`, 4, 2)
|
||||
|
||||
// aggregation + keepInput
|
||||
f(`
|
||||
- match: '{env="dev"}'
|
||||
interval: 1m
|
||||
outputs: [sum_samples]
|
||||
`, ``, false, 0, true, false, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`, 4, 4)
|
||||
|
||||
// aggregation + dropInput
|
||||
f(`
|
||||
- match: '{env="dev"}'
|
||||
interval: 1m
|
||||
outputs: [sum_samples]
|
||||
`, ``, false, 0, false, true, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`, 4, 0)
|
||||
|
||||
// aggregation + keepInput + dropInput
|
||||
f(`
|
||||
- match: '{env="dev"}'
|
||||
interval: 1m
|
||||
outputs: [sum_samples]
|
||||
`, ``, false, 0, true, true, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="bar"} 25
|
||||
`, 3, 1)
|
||||
|
||||
// aggregation + deduplication
|
||||
`)
|
||||
f(``, ``, true, time.Hour, false, false, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="foo"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="foo"} 25
|
||||
`, 4, 0)
|
||||
`)
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, true, time.Hour, false, false, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`)
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, true, time.Hour, true, false, `
|
||||
metric{env="test"} 10
|
||||
metric{env="dev"} 20
|
||||
metric{env="foo"} 15
|
||||
metric{env="dev"} 25
|
||||
`)
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, true, time.Hour, false, true, `
|
||||
metric{env="foo"} 10
|
||||
metric{env="dev"} 20
|
||||
metric{env="foo"} 15
|
||||
metric{env="dev"} 25
|
||||
`)
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, true, time.Hour, true, true, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="test"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`)
|
||||
}
|
||||
|
||||
func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
||||
|
||||
@@ -18,12 +18,12 @@ var (
|
||||
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
||||
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
|
||||
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples that match any rule in "+
|
||||
"-streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
|
||||
"are written to the remote storage. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop input samples that not matching any rule in "+
|
||||
"-streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
|
||||
"are written to the remote storage. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation "+
|
||||
"with -streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||
"are written to remote storages write. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation "+
|
||||
"with -remoteWrite.streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||
"are written to remote storages write. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
|
||||
"aggregator before optional aggregation with -streamAggr.config . "+
|
||||
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
||||
@@ -43,11 +43,11 @@ var (
|
||||
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
||||
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
|
||||
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop input samples that not matching any rule in "+
|
||||
"the corresponding -remoteWrite.streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
|
||||
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop all the input samples after the aggregation "+
|
||||
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples that match any rule in "+
|
||||
"the corresponding -remoteWrite.streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
|
||||
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep all the input samples after the aggregation "+
|
||||
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
|
||||
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
package zabbixconnector
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector/stream"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="zabbixconnector"}`)
|
||||
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="zabbixconnector"}`)
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="zabbixconnector"}`)
|
||||
)
|
||||
|
||||
// InsertHandlerForHTTP processes remote write for ZabbixConnector POST /zabbixconnector/v1/history request.
|
||||
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
encoding := req.Header.Get("Content-Encoding")
|
||||
return stream.Parse(req.Body, encoding, func(rows []zabbixconnector.Row) error {
|
||||
return insertRows(at, rows, extraLabels)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(at *auth.Token, rows []zabbixconnector.Row, extraLabels []prompb.Label) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
rowsTotal := len(rows)
|
||||
tssDst := ctx.WriteRequest.Timeseries[:0]
|
||||
labels := ctx.Labels[:0]
|
||||
samples := ctx.Samples[:0]
|
||||
for i := range rows {
|
||||
r := &rows[i]
|
||||
|
||||
labelsLen := len(labels)
|
||||
for j := range r.Tags {
|
||||
tag := &r.Tags[j]
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: bytesutil.ToUnsafeString(tag.Key),
|
||||
Value: bytesutil.ToUnsafeString(tag.Value),
|
||||
})
|
||||
}
|
||||
labels = append(labels, extraLabels...)
|
||||
|
||||
samplesLen := len(samples)
|
||||
samples = append(samples, prompb.Sample{
|
||||
Value: r.Value,
|
||||
Timestamp: r.Timestamp,
|
||||
})
|
||||
|
||||
tssDst = append(tssDst, prompb.TimeSeries{
|
||||
Labels: labels[labelsLen:],
|
||||
Samples: samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
}
|
||||
rowsPerInsert.Update(float64(rowsTotal))
|
||||
return nil
|
||||
}
|
||||
@@ -27,9 +27,6 @@ vmalert-tool-linux-ppc64le-prod:
|
||||
vmalert-tool-linux-386-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmalert-tool-linux-s390x-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmalert-tool-darwin-amd64-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -34,7 +34,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -85,8 +84,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
|
||||
defer server.Close()
|
||||
} else {
|
||||
httpListenAddr = httpListenPort
|
||||
|
||||
ln, err := net.Listen(netutil.GetTCPNetwork(), fmt.Sprintf(":%s", httpListenPort))
|
||||
ln, err := net.Listen("tcp", fmt.Sprintf(":%s", httpListenPort))
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot listen on port %s: %v", httpListenPort, err)
|
||||
}
|
||||
@@ -132,7 +130,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
|
||||
}
|
||||
labels[s[:n]] = s[n+1:]
|
||||
}
|
||||
err = notifier.Init(labels, externalURL)
|
||||
_, err = notifier.Init(nil, labels, externalURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to init notifier: %v", err)
|
||||
}
|
||||
@@ -379,7 +377,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
||||
if len(g.Rules) == 0 {
|
||||
continue
|
||||
}
|
||||
errs := g.ExecOnce(context.Background(), rw, ts)
|
||||
errs := g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, rw, ts)
|
||||
for err := range errs {
|
||||
if err != nil {
|
||||
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,
|
||||
|
||||
@@ -27,9 +27,6 @@ vmalert-linux-ppc64le-prod:
|
||||
vmalert-linux-386-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmalert-linux-s390x-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmalert-darwin-amd64-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ type Group struct {
|
||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
|
||||
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
|
||||
Limit *int `yaml:"limit,omitempty"`
|
||||
Limit int `yaml:"limit,omitempty"`
|
||||
Rules []Rule `yaml:"rules"`
|
||||
Concurrency int `yaml:"concurrency"`
|
||||
// Labels is a set of label value pairs, that will be added to every rule.
|
||||
@@ -91,8 +91,8 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
|
||||
if g.EvalOffset != nil && g.EvalDelay != nil {
|
||||
return fmt.Errorf("eval_offset cannot be used with eval_delay")
|
||||
}
|
||||
if g.Limit != nil && *g.Limit < 0 {
|
||||
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", *g.Limit)
|
||||
if g.Limit < 0 {
|
||||
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
|
||||
}
|
||||
if g.Concurrency < 0 {
|
||||
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)
|
||||
|
||||
@@ -116,7 +116,7 @@ func TestParse_Failure(t *testing.T) {
|
||||
|
||||
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
|
||||
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
|
||||
f([]string{"testdata/dir/rules0-bad.rules"}, "invalid annotations")
|
||||
f([]string{"testdata/dir/rules0-bad.rules"}, "error parsing annotation")
|
||||
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
|
||||
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
|
||||
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
|
||||
@@ -181,10 +181,9 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
EvalOffset: promutil.NewDuration(2 * time.Minute),
|
||||
}, false, "eval_offset should be smaller than interval")
|
||||
|
||||
limit := -1
|
||||
f(&Group{
|
||||
Name: "wrong limit",
|
||||
Limit: &limit,
|
||||
Limit: -1,
|
||||
}, false, "invalid limit")
|
||||
|
||||
f(&Group{
|
||||
@@ -343,6 +342,7 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}, true, "bad prometheus expr")
|
||||
|
||||
}
|
||||
|
||||
func TestGroupValidate_Success(t *testing.T) {
|
||||
|
||||
@@ -76,14 +76,11 @@ func (t *Type) ValidateExpr(expr string) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
|
||||
}
|
||||
labels, err := q.GetStatsLabels()
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot obtain labels from LogsQL expr: %q, err: %w", expr, err)
|
||||
}
|
||||
for i := range labels {
|
||||
fields, _ := q.GetStatsByFields()
|
||||
for i := range fields {
|
||||
// VictoriaLogs inserts `_time` field as a label in result when query with `stats by (_time:step)`,
|
||||
// making the result meaningless and may lead to cardinality issues.
|
||||
if labels[i] == "_time" {
|
||||
if fields[i] == "_time" {
|
||||
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,26 +173,22 @@ func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result,
|
||||
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
// Process the received response.
|
||||
var parseFn func(resp *http.Response) (Result, error)
|
||||
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
|
||||
switch c.dataSourceType {
|
||||
case datasourcePrometheus:
|
||||
parseFn = parsePrometheusInstantResponse
|
||||
parseFn = parsePrometheusResponse
|
||||
case datasourceGraphite:
|
||||
parseFn = parseGraphiteResponse
|
||||
case datasourceVLogs:
|
||||
parseFn = parseVLogsInstantResponse
|
||||
parseFn = parseVLogsResponse
|
||||
default:
|
||||
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
|
||||
}
|
||||
|
||||
result, err := parseFn(resp)
|
||||
if err != nil {
|
||||
return Result{}, nil, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
|
||||
}
|
||||
return result, req, nil
|
||||
result, err := parseFn(req, resp)
|
||||
_ = resp.Body.Close()
|
||||
return result, req, err
|
||||
}
|
||||
|
||||
// QueryRange executes the given query on the given time range.
|
||||
@@ -233,23 +229,19 @@ func (c *Client) QueryRange(ctx context.Context, query string, start, end time.T
|
||||
return res, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
// Process the received response.
|
||||
var parseFn func(resp *http.Response) (Result, error)
|
||||
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
|
||||
switch c.dataSourceType {
|
||||
case datasourcePrometheus:
|
||||
parseFn = parsePrometheusRangeResponse
|
||||
parseFn = parsePrometheusResponse
|
||||
case datasourceVLogs:
|
||||
parseFn = parseVLogsRangeResponse
|
||||
parseFn = parseVLogsResponse
|
||||
default:
|
||||
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
|
||||
}
|
||||
|
||||
res, err = parseFn(resp)
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
|
||||
}
|
||||
res, err = parseFn(req, resp)
|
||||
_ = resp.Body.Close()
|
||||
return res, err
|
||||
}
|
||||
|
||||
|
||||
@@ -33,10 +33,10 @@ func (r graphiteResponse) metrics() []Metric {
|
||||
return ms
|
||||
}
|
||||
|
||||
func parseGraphiteResponse(resp *http.Response) (Result, error) {
|
||||
func parseGraphiteResponse(req *http.Request, resp *http.Response) (Result, error) {
|
||||
r := &graphiteResponse{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return Result{}, fmt.Errorf("error parsing graphite metrics: %w", err)
|
||||
return Result{}, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
|
||||
}
|
||||
return Result{Data: r.metrics()}, nil
|
||||
}
|
||||
|
||||
@@ -172,26 +172,17 @@ const (
|
||||
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
|
||||
)
|
||||
|
||||
func parsePromResponse(resp *http.Response) (*promResponse, error) {
|
||||
func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
|
||||
r := &promResponse{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||
if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return res, fmt.Errorf("error parsing response from %s: %w", req.URL.Redacted(), err)
|
||||
}
|
||||
if r.Status == statusError {
|
||||
return nil, fmt.Errorf("response error %q: %s", r.ErrorType, r.Error)
|
||||
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
|
||||
}
|
||||
if r.Status != statusSuccess {
|
||||
return nil, fmt.Errorf("unknown response status %q", r.Status)
|
||||
return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error) {
|
||||
r, err := parsePromResponse(resp)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
var parseFn func() ([]Metric, error)
|
||||
switch r.Data.ResultType {
|
||||
case rtVector:
|
||||
@@ -200,6 +191,12 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
|
||||
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
|
||||
}
|
||||
parseFn = pi.metrics
|
||||
case rtMatrix:
|
||||
var pr promRange
|
||||
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
|
||||
return res, err
|
||||
}
|
||||
parseFn = pr.metrics
|
||||
case rScalar:
|
||||
var ps promScalar
|
||||
if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
|
||||
@@ -209,6 +206,7 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
|
||||
default:
|
||||
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
|
||||
}
|
||||
|
||||
ms, err := parseFn()
|
||||
if err != nil {
|
||||
return res, err
|
||||
@@ -224,34 +222,6 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func parsePrometheusRangeResponse(resp *http.Response) (res Result, err error) {
|
||||
r, err := parsePromResponse(resp)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
if r.Data.ResultType != rtMatrix {
|
||||
return res, fmt.Errorf("unexpected result type %q; expected result type %q", r.Data.ResultType, rtMatrix)
|
||||
}
|
||||
|
||||
var pr promRange
|
||||
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
|
||||
return res, err
|
||||
}
|
||||
ms, err := pr.metrics()
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
res = Result{Data: ms, IsPartial: r.IsPartial}
|
||||
if r.Stats.SeriesFetched != nil {
|
||||
intV, err := strconv.Atoi(*r.Stats.SeriesFetched)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("failed to convert stats.seriesFetched to int: %w", err)
|
||||
}
|
||||
res.SeriesFetched = &intV
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
|
||||
if c.appendTypePrefix {
|
||||
r.URL.Path += "/prometheus"
|
||||
|
||||
@@ -65,23 +65,21 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
case 3:
|
||||
w.Write([]byte(`{"status":"unknown"}`))
|
||||
case 4:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector"}}`))
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
|
||||
case 5:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
|
||||
case 6:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||
case 7:
|
||||
case 6:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||
case 8:
|
||||
case 7:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
|
||||
case 9:
|
||||
case 8:
|
||||
w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) {
|
||||
c++
|
||||
switch c {
|
||||
case 10:
|
||||
case 9:
|
||||
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
||||
}
|
||||
})
|
||||
@@ -104,9 +102,9 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
|
||||
}
|
||||
switch c {
|
||||
case 11:
|
||||
case 10:
|
||||
w.Write([]byte("[]"))
|
||||
case 12:
|
||||
case 11:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||
}
|
||||
})
|
||||
@@ -125,7 +123,6 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
ts := time.Now()
|
||||
|
||||
expErr := func(query, err string) {
|
||||
t.Helper()
|
||||
_, _, gotErr := pq.Query(ctx, query, ts)
|
||||
if gotErr == nil {
|
||||
t.Fatalf("expected %q got nil", err)
|
||||
@@ -138,11 +135,10 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
expErr(vmQuery, "500") // 0
|
||||
expErr(vmQuery, "error parsing response") // 1
|
||||
expErr(vmQuery, "response error") // 2
|
||||
expErr(vmQuery, "unknown response status") // 3
|
||||
expErr(vmQuery, "unknown status") // 3
|
||||
expErr(vmQuery, "unexpected end of JSON input") // 4
|
||||
expErr(vmQuery, "unknown result type") // 5
|
||||
|
||||
res, _, err := pq.Query(ctx, vmQuery, ts) // 6 - vector
|
||||
res, _, err := pq.Query(ctx, vmQuery, ts) // 5 - vector
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -163,7 +159,7 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
}
|
||||
metricsEqual(t, res.Data, expected)
|
||||
|
||||
res, req, err := pq.Query(ctx, vmQuery, ts) // 7 - scalar
|
||||
res, req, err := pq.Query(ctx, vmQuery, ts) // 6 - scalar
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -188,7 +184,7 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
res.SeriesFetched)
|
||||
}
|
||||
|
||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 8 - scalar with stats
|
||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 7 - scalar with stats
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -209,7 +205,7 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
*res.SeriesFetched)
|
||||
}
|
||||
|
||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 9
|
||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 8
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -220,7 +216,7 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
// test graphite
|
||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||
|
||||
res, _, err = gq.Query(ctx, queryRender, ts) // 10 - graphite
|
||||
res, _, err = gq.Query(ctx, queryRender, ts) // 9 - graphite
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -240,9 +236,9 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
vlogs := datasourceVLogs
|
||||
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
|
||||
|
||||
expErr(vlogsQuery, "error parsing response") // 11
|
||||
expErr(vlogsQuery, "error parsing response") // 10
|
||||
|
||||
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 12
|
||||
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 11
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -394,8 +390,6 @@ func TestVMRangeQuery(t *testing.T) {
|
||||
switch c {
|
||||
case 0:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
|
||||
case 1:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[1583786142, "1"]}}`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -428,7 +422,7 @@ func TestVMRangeQuery(t *testing.T) {
|
||||
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
|
||||
}
|
||||
switch c {
|
||||
case 2:
|
||||
case 1:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
|
||||
}
|
||||
})
|
||||
@@ -452,13 +446,13 @@ func TestVMRangeQuery(t *testing.T) {
|
||||
|
||||
start, end := time.Now().Add(-time.Minute), time.Now()
|
||||
|
||||
res, err := pq.QueryRange(ctx, vmQuery, start, end) // case 0
|
||||
res, err := pq.QueryRange(ctx, vmQuery, start, end)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
m := res.Data
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
}
|
||||
expected := Metric{
|
||||
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}},
|
||||
@@ -469,9 +463,6 @@ func TestVMRangeQuery(t *testing.T) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
||||
}
|
||||
|
||||
_, err = pq.QueryRange(ctx, vmQuery, start, end) // case 1
|
||||
expectError(t, err, "unexpected result type")
|
||||
|
||||
// test unsupported graphite
|
||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||
|
||||
|
||||
@@ -40,28 +40,8 @@ func (c *Client) setVLogsRangeReqParams(r *http.Request, query string, start, en
|
||||
c.setReqParams(r, query)
|
||||
}
|
||||
|
||||
func parseVLogsInstantResponse(resp *http.Response) (res Result, err error) {
|
||||
res, err = parsePrometheusInstantResponse(resp)
|
||||
if err != nil {
|
||||
return Result{}, err
|
||||
}
|
||||
for i := range res.Data {
|
||||
m := &res.Data[i]
|
||||
for j := range m.Labels {
|
||||
// reserve the stats func result name with a new label `stats_result` instead of dropping it,
|
||||
// since there could be multiple stats results in a single query, for instance:
|
||||
// _time:5m | stats quantile(0.5, request_duration_seconds) p50, quantile(0.9, request_duration_seconds) p90
|
||||
if m.Labels[j].Name == "__name__" {
|
||||
m.Labels[j].Name = "stats_result"
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func parseVLogsRangeResponse(resp *http.Response) (res Result, err error) {
|
||||
res, err = parsePrometheusRangeResponse(resp)
|
||||
func parseVLogsResponse(req *http.Request, resp *http.Response) (res Result, err error) {
|
||||
res, err = parsePrometheusResponse(req, resp)
|
||||
if err != nil {
|
||||
return Result{}, err
|
||||
}
|
||||
|
||||
@@ -132,7 +132,10 @@ func (ls Labels) String() string {
|
||||
// a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1
|
||||
// a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0
|
||||
func LabelCompare(a, b Labels) int {
|
||||
l := min(len(b), len(a))
|
||||
l := len(a)
|
||||
if len(b) < l {
|
||||
l = len(b)
|
||||
}
|
||||
|
||||
for i := 0; i < l; i++ {
|
||||
if a[i].Name != b[i].Name {
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -76,12 +77,15 @@ absolute path to all .tpl files in root.
|
||||
`Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+
|
||||
`If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
|
||||
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
|
||||
"In case of conflicts, original labels are kept with prefix 'exported_'.")
|
||||
"In case of conflicts, original labels are kept with prefix `exported_`.")
|
||||
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
|
||||
)
|
||||
|
||||
var extURL *url.URL
|
||||
var (
|
||||
alertURLGeneratorFn notifier.AlertURLGenerator
|
||||
extURL *url.URL
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
@@ -117,7 +121,7 @@ func main() {
|
||||
return
|
||||
}
|
||||
|
||||
err = notifier.InitAlertURLGeneratorFn(extURL, *externalAlertSource, *validateTemplates)
|
||||
alertURLGeneratorFn, err = getAlertURLGenerator(extURL, *externalAlertSource, *validateTemplates)
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to init `external.alert.source`: %s", err)
|
||||
}
|
||||
@@ -159,7 +163,7 @@ func main() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
manager, err := newManager(ctx)
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to create manager: %s", err)
|
||||
logger.Fatalf("failed to init: %s", err)
|
||||
}
|
||||
logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";"))
|
||||
groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)
|
||||
@@ -224,13 +228,14 @@ func newManager(ctx context.Context) (*manager, error) {
|
||||
labels[s[:n]] = s[n+1:]
|
||||
}
|
||||
|
||||
err = notifier.Init(labels, *externalURL)
|
||||
nts, err := notifier.Init(alertURLGeneratorFn, labels, *externalURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init notifier: %w", err)
|
||||
}
|
||||
manager := &manager{
|
||||
groups: make(map[uint64]*rule.Group),
|
||||
querierBuilder: q,
|
||||
notifiers: nts,
|
||||
labels: labels,
|
||||
}
|
||||
rw, err := remotewrite.Init(ctx)
|
||||
@@ -287,6 +292,35 @@ func getHostnameAsExternalURL(addr string, isSecure bool) (*url.URL, error) {
|
||||
return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
|
||||
}
|
||||
|
||||
func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, validateTemplate bool) (notifier.AlertURLGenerator, error) {
|
||||
if externalAlertSource == "" {
|
||||
return func(a notifier.Alert) string {
|
||||
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
|
||||
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, paramGroupID, gID, paramAlertID, aID)
|
||||
}, nil
|
||||
}
|
||||
if validateTemplate {
|
||||
if err := notifier.ValidateTemplates(map[string]string{
|
||||
"tpl": externalAlertSource,
|
||||
}); err != nil {
|
||||
return nil, fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
|
||||
}
|
||||
}
|
||||
m := map[string]string{
|
||||
"tpl": externalAlertSource,
|
||||
}
|
||||
return func(alert notifier.Alert) string {
|
||||
qFn := func(_ string) ([]datasource.Metric, error) {
|
||||
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
|
||||
}
|
||||
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
|
||||
if err != nil {
|
||||
logger.Errorf("cannot template alert source: %s", err)
|
||||
}
|
||||
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
|
||||
}, nil
|
||||
}
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
vmalert processes alerts and recording rules.
|
||||
|
||||
@@ -49,6 +49,30 @@ func TestGetExternalURL(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAlertURLGenerator(t *testing.T) {
|
||||
testAlert := notifier.Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
|
||||
u, _ := url.Parse("https://victoriametrics.com/path")
|
||||
fn, err := getAlertURLGenerator(u, "", false)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", paramGroupID, paramAlertID)
|
||||
if exp != fn(testAlert) {
|
||||
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
|
||||
}
|
||||
_, err = getAlertURLGenerator(nil, "foo?{{invalid}}", true)
|
||||
if err == nil {
|
||||
t.Fatalf("expected template validation error got nil")
|
||||
}
|
||||
fn, err = getAlertURLGenerator(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != fn(testAlert) {
|
||||
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfigReload(t *testing.T) {
|
||||
originalRulePath := *rulePath
|
||||
originalExternalURL := extURL
|
||||
@@ -96,10 +120,9 @@ groups:
|
||||
querierBuilder: &datasource.FakeQuerier{},
|
||||
groups: make(map[uint64]*rule.Group),
|
||||
labels: map[string]string{},
|
||||
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||
rw: &remotewrite.Client{},
|
||||
}
|
||||
_, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
|
||||
syncCh := make(chan struct{})
|
||||
sighupCh := procutil.NewSighupChan()
|
||||
|
||||
@@ -3,7 +3,6 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
@@ -17,6 +16,7 @@ import (
|
||||
// manager controls group states
|
||||
type manager struct {
|
||||
querierBuilder datasource.QuerierBuilder
|
||||
notifiers func() []notifier.Notifier
|
||||
|
||||
rw remotewrite.RWClient
|
||||
// remote read builder.
|
||||
@@ -29,8 +29,25 @@ type manager struct {
|
||||
groups map[uint64]*rule.Group
|
||||
}
|
||||
|
||||
// groupAPI generates apiGroup object from group by its ID(hash)
|
||||
func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
|
||||
// ruleAPI generates apiRule object from alert by its ID(hash)
|
||||
func (m *manager) ruleAPI(gID, rID uint64) (apiRule, error) {
|
||||
m.groupsMu.RLock()
|
||||
defer m.groupsMu.RUnlock()
|
||||
|
||||
g, ok := m.groups[gID]
|
||||
if !ok {
|
||||
return apiRule{}, fmt.Errorf("can't find group with id %d", gID)
|
||||
}
|
||||
for _, rule := range g.Rules {
|
||||
if rule.ID() == rID {
|
||||
return ruleToAPI(rule), nil
|
||||
}
|
||||
}
|
||||
return apiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
|
||||
}
|
||||
|
||||
// alertAPI generates apiAlert object from alert by its ID(hash)
|
||||
func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
|
||||
m.groupsMu.RLock()
|
||||
defer m.groupsMu.RUnlock()
|
||||
|
||||
@@ -38,47 +55,13 @@ func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("can't find group with id %d", gID)
|
||||
}
|
||||
return g.ToAPI(), nil
|
||||
}
|
||||
|
||||
// ruleAPI generates apiRule object from alert by its ID(hash)
|
||||
func (m *manager) ruleAPI(gID, rID uint64) (rule.ApiRule, error) {
|
||||
m.groupsMu.RLock()
|
||||
defer m.groupsMu.RUnlock()
|
||||
|
||||
group, ok := m.groups[gID]
|
||||
if !ok {
|
||||
return rule.ApiRule{}, fmt.Errorf("can't find group with id %d", gID)
|
||||
}
|
||||
g := group.ToAPI()
|
||||
ruleID := strconv.FormatUint(rID, 10)
|
||||
for _, r := range g.Rules {
|
||||
if r.ID == ruleID {
|
||||
return r, nil
|
||||
}
|
||||
}
|
||||
return rule.ApiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
|
||||
}
|
||||
|
||||
// alertAPI generates apiAlert object from alert by its ID(hash)
|
||||
func (m *manager) alertAPI(gID, aID uint64) (*rule.ApiAlert, error) {
|
||||
m.groupsMu.RLock()
|
||||
defer m.groupsMu.RUnlock()
|
||||
|
||||
group, ok := m.groups[gID]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("can't find group with id %d", gID)
|
||||
}
|
||||
g := group.ToAPI()
|
||||
for _, r := range g.Rules {
|
||||
if r.Type != rule.TypeAlerting {
|
||||
ar, ok := r.(*rule.AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
alertID := strconv.FormatUint(aID, 10)
|
||||
for _, a := range r.Alerts {
|
||||
if a.ID == alertID {
|
||||
return a, nil
|
||||
}
|
||||
if apiAlert := alertToAPI(ar, aID); apiAlert != nil {
|
||||
return apiAlert, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)
|
||||
@@ -99,16 +82,17 @@ func (m *manager) close() {
|
||||
}
|
||||
|
||||
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
|
||||
m.wg.Add(1)
|
||||
id := g.GetID()
|
||||
g.Init()
|
||||
m.wg.Go(func() {
|
||||
go func() {
|
||||
defer m.wg.Done()
|
||||
if restore {
|
||||
g.Start(ctx, m.rw, m.rr)
|
||||
g.Start(ctx, m.notifiers, m.rw, m.rr)
|
||||
} else {
|
||||
g.Start(ctx, m.rw, nil)
|
||||
g.Start(ctx, m.notifiers, m.rw, nil)
|
||||
}
|
||||
})
|
||||
|
||||
}()
|
||||
m.groups[id] = g
|
||||
return nil
|
||||
}
|
||||
@@ -135,7 +119,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
if rrPresent && m.rw == nil {
|
||||
return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set")
|
||||
}
|
||||
if arPresent && notifier.GetTargets() == nil {
|
||||
if arPresent && m.notifiers == nil {
|
||||
return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set")
|
||||
}
|
||||
|
||||
@@ -172,15 +156,15 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
if len(toUpdate) > 0 {
|
||||
var wg sync.WaitGroup
|
||||
for _, item := range toUpdate {
|
||||
oldG := item.old
|
||||
newG := item.new
|
||||
wg.Go(func() {
|
||||
// cancel evaluation so the Update will be applied as fast as possible.
|
||||
// it is important to call InterruptEval before the update, because cancel fn
|
||||
// can be re-assigned during the update.
|
||||
oldG.InterruptEval()
|
||||
oldG.UpdateWith(newG)
|
||||
})
|
||||
wg.Add(1)
|
||||
// cancel evaluation so the Update will be applied as fast as possible.
|
||||
// it is important to call InterruptEval before the update, because cancel fn
|
||||
// can be re-assigned during the update.
|
||||
item.old.InterruptEval()
|
||||
go func(oldGroup *rule.Group, newGroup *rule.Group) {
|
||||
oldGroup.UpdateWith(newGroup)
|
||||
wg.Done()
|
||||
}(item.old, item.new)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -40,11 +40,10 @@ func TestManagerEmptyRulesDir(t *testing.T) {
|
||||
// execution of configuration update.
|
||||
// Should be executed with -race flag
|
||||
func TestManagerUpdateConcurrent(t *testing.T) {
|
||||
_, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
m := &manager{
|
||||
groups: make(map[uint64]*rule.Group),
|
||||
querierBuilder: &datasource.FakeQuerier{},
|
||||
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||
}
|
||||
paths := []string{
|
||||
"config/testdata/dir/rules0-good.rules",
|
||||
@@ -65,9 +64,11 @@ func TestManagerUpdateConcurrent(t *testing.T) {
|
||||
|
||||
const workers = 500
|
||||
const iterations = 10
|
||||
var wg sync.WaitGroup
|
||||
for n := range workers {
|
||||
wg.Go(func() {
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
go func(n int) {
|
||||
defer wg.Done()
|
||||
r := rand.New(rand.NewSource(int64(n)))
|
||||
for i := 0; i < iterations; i++ {
|
||||
rnd := r.Intn(len(paths))
|
||||
@@ -77,7 +78,7 @@ func TestManagerUpdateConcurrent(t *testing.T) {
|
||||
}
|
||||
_ = m.update(context.Background(), cfg, false)
|
||||
}
|
||||
})
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
@@ -126,9 +127,8 @@ func TestManagerUpdate_Success(t *testing.T) {
|
||||
m := &manager{
|
||||
groups: make(map[uint64]*rule.Group),
|
||||
querierBuilder: &datasource.FakeQuerier{},
|
||||
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||
}
|
||||
_, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
|
||||
cfgInit := loadCfg(t, []string{initPath}, true, true)
|
||||
if err := m.update(ctx, cfgInit, false); err != nil {
|
||||
@@ -277,8 +277,7 @@ func TestManagerUpdate_Failure(t *testing.T) {
|
||||
rw: rw,
|
||||
}
|
||||
if notifiers != nil {
|
||||
_, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
m.notifiers = func() []notifier.Notifier { return notifiers }
|
||||
}
|
||||
err := m.update(context.Background(), []config.Group{cfg}, false)
|
||||
if err == nil {
|
||||
|
||||
@@ -80,15 +80,14 @@ func (as AlertState) String() string {
|
||||
|
||||
// AlertTplData is used to execute templating
|
||||
type AlertTplData struct {
|
||||
Type string
|
||||
Labels map[string]string
|
||||
Value float64
|
||||
Expr string
|
||||
AlertID uint64
|
||||
GroupID uint64
|
||||
ActiveAt time.Time
|
||||
For time.Duration
|
||||
IsPartial bool
|
||||
Type string
|
||||
Labels map[string]string
|
||||
Value float64
|
||||
Expr string
|
||||
AlertID uint64
|
||||
GroupID uint64
|
||||
ActiveAt time.Time
|
||||
For time.Duration
|
||||
}
|
||||
|
||||
var tplHeaders = []string{
|
||||
@@ -102,7 +101,6 @@ var tplHeaders = []string{
|
||||
"{{ $groupID := .GroupID }}",
|
||||
"{{ $activeAt := .ActiveAt }}",
|
||||
"{{ $for := .For }}",
|
||||
"{{ $isPartial := .IsPartial }}",
|
||||
}
|
||||
|
||||
// ExecTemplate executes the Alert template for given
|
||||
@@ -168,8 +166,8 @@ func templateAnnotations(annotations map[string]string, data AlertTplData, tmpl
|
||||
ctmpl, _ := tmpl.Clone()
|
||||
ctmpl = ctmpl.Option("missingkey=zero")
|
||||
if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil {
|
||||
r[key] = err.Error()
|
||||
eg.Add(fmt.Errorf("(key: %q, value: %q): %w", key, text, err))
|
||||
r[key] = text
|
||||
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
|
||||
continue
|
||||
}
|
||||
r[key] = buf.String()
|
||||
@@ -186,13 +184,13 @@ type tplData struct {
|
||||
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
|
||||
tpl, err := tpl.Parse(text)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing template: %w", err)
|
||||
return fmt.Errorf("error parsing annotation template: %w", err)
|
||||
}
|
||||
if !execute {
|
||||
return nil
|
||||
}
|
||||
if err = tpl.Execute(dst, data); err != nil {
|
||||
return fmt.Errorf("error evaluating template: %w", err)
|
||||
return fmt.Errorf("error evaluating annotation template: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ func TestAlertExecTemplate(t *testing.T) {
|
||||
)
|
||||
extLabels["cluster"] = extCluster
|
||||
extLabels["dc"] = extDC
|
||||
err := Init(extLabels, extURL)
|
||||
_, err := Init(nil, extLabels, extURL)
|
||||
checkErr(t, err)
|
||||
|
||||
f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) {
|
||||
|
||||
@@ -3,7 +3,6 @@ package notifier
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -14,6 +13,7 @@ import (
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
@@ -22,11 +22,10 @@ import (
|
||||
// AlertManager represents integration provider with Prometheus alert manager
|
||||
// https://github.com/prometheus/alertmanager
|
||||
type AlertManager struct {
|
||||
addr *url.URL
|
||||
argFunc AlertURLGenerator
|
||||
client *http.Client
|
||||
timeout time.Duration
|
||||
lastError string
|
||||
addr *url.URL
|
||||
argFunc AlertURLGenerator
|
||||
client *http.Client
|
||||
timeout time.Duration
|
||||
|
||||
authCfg *promauth.Config
|
||||
// stores already parsed RelabelConfigs object
|
||||
@@ -72,42 +71,24 @@ func (am AlertManager) Addr() string {
|
||||
return am.addr.Redacted()
|
||||
}
|
||||
|
||||
func (am *AlertManager) LastError() string {
|
||||
return am.lastError
|
||||
}
|
||||
|
||||
// Send an alert or resolve message
|
||||
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
|
||||
if len(alerts) != len(alertLabels) {
|
||||
return fmt.Errorf("mismatched number of alerts and label sets after global alert relabeling")
|
||||
}
|
||||
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[string]string) error {
|
||||
am.metrics.alertsSent.Add(len(alerts))
|
||||
startTime := time.Now()
|
||||
err := am.send(ctx, alerts, alertLabels, headers)
|
||||
err := am.send(ctx, alerts, headers)
|
||||
am.metrics.alertsSendDuration.UpdateDuration(startTime)
|
||||
if err != nil {
|
||||
// the context can be cancelled on graceful shutdown
|
||||
// or on group update. So no need to handle the error as usual.
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil
|
||||
}
|
||||
am.metrics.alertsSendErrors.Add(len(alerts))
|
||||
am.lastError = err.Error()
|
||||
} else {
|
||||
am.lastError = ""
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (am *AlertManager) send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
|
||||
func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[string]string) error {
|
||||
b := &bytes.Buffer{}
|
||||
alertsToSend := make([]Alert, 0, len(alerts))
|
||||
lblss := make([][]prompb.Label, 0, len(alerts))
|
||||
for i, a := range alerts {
|
||||
lbls := alertLabels[i]
|
||||
if am.relabelConfigs != nil {
|
||||
lbls = am.relabelConfigs.Apply(lbls, 0)
|
||||
}
|
||||
for _, a := range alerts {
|
||||
lbls := a.applyRelabelingIfNeeded(am.relabelConfigs)
|
||||
if len(lbls) == 0 {
|
||||
continue
|
||||
}
|
||||
@@ -171,6 +152,11 @@ const alertManagerPath = "/api/v2/alerts"
|
||||
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
|
||||
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
|
||||
) (*AlertManager, error) {
|
||||
|
||||
if err := httputil.CheckURL(alertManagerURL); err != nil {
|
||||
return nil, fmt.Errorf("invalid alertmanager URL: %w", err)
|
||||
}
|
||||
|
||||
tls := &promauth.TLSConfig{}
|
||||
if authCfg.TLSConfig != nil {
|
||||
tls = authCfg.TLSConfig
|
||||
|
||||
@@ -11,7 +11,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
)
|
||||
|
||||
@@ -146,11 +145,11 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
|
||||
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
|
||||
t.Fatalf("expected connection error got nil")
|
||||
}
|
||||
|
||||
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
|
||||
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
|
||||
t.Fatalf("expected wrong http code error got nil")
|
||||
}
|
||||
|
||||
@@ -161,7 +160,7 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
End: time.Now().UTC(),
|
||||
Labels: map[string]string{"alertname": "alert0"},
|
||||
Annotations: map[string]string{"a": "b", "c": "d"},
|
||||
}}, [][]prompb.Label{{{Name: "alertname", Value: "alert0"}}}, map[string]string{headerKey: "bar"}); err != nil {
|
||||
}}, map[string]string{headerKey: "bar"}); err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
|
||||
@@ -175,7 +174,7 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
Name: "alert2",
|
||||
Labels: map[string]string{"rule": "test", "tenant": "1"},
|
||||
},
|
||||
}, [][]prompb.Label{{{Name: "rule", Value: "test"}, {Name: "tenant", Value: "0"}}, {{Name: "rule", Value: "test"}, {Name: "tenant", Value: "1"}}}, map[string]string{headerKey: "bar"}); err != nil {
|
||||
}, map[string]string{headerKey: "bar"}); err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
|
||||
@@ -188,7 +187,7 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
Name: "alert2",
|
||||
Labels: map[string]string{},
|
||||
},
|
||||
}, [][]prompb.Label{{{Name: "rule", Value: "test"}}, {{}}}, map[string]string{}); err != nil {
|
||||
}, map[string]string{}); err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
|
||||
|
||||
@@ -27,9 +27,15 @@ type Config struct {
|
||||
// PathPrefix is added to URL path before adding alertManagerPath value
|
||||
PathPrefix string `yaml:"path_prefix,omitempty"`
|
||||
|
||||
ConsulSDConfigs []ConsulSDConfigs `yaml:"consul_sd_configs,omitempty"`
|
||||
DNSSDConfigs []DNSSDConfigs `yaml:"dns_sd_configs,omitempty"`
|
||||
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
|
||||
// ConsulSDConfigs contains list of settings for service discovery via Consul
|
||||
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
|
||||
ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"`
|
||||
// DNSSDConfigs contains list of settings for service discovery via DNS.
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
|
||||
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"`
|
||||
|
||||
// StaticConfigs contains list of static targets
|
||||
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
|
||||
|
||||
// HTTPClientConfig contains HTTP configuration for Notifier clients
|
||||
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
||||
@@ -56,29 +62,14 @@ type Config struct {
|
||||
parsedAlertRelabelConfigs *promrelabel.ParsedConfigs
|
||||
}
|
||||
|
||||
// staticConfig contains list of static targets in the following form:
|
||||
// StaticConfig contains list of static targets in the following form:
|
||||
//
|
||||
// targets:
|
||||
// [ - '<host>' ]
|
||||
type StaticConfig struct {
|
||||
Targets []string `yaml:"targets"`
|
||||
// HTTPClientConfig contains HTTP configuration for the Targets
|
||||
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
||||
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
||||
}
|
||||
|
||||
// ConsulSDConfigs contains list of settings for service discovery via Consul,
|
||||
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
|
||||
type ConsulSDConfigs struct {
|
||||
consul.SDConfig `yaml:",inline"`
|
||||
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
||||
}
|
||||
|
||||
// DNSSDConfigs contains list of settings for service discovery via DNS,
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
|
||||
type DNSSDConfigs struct {
|
||||
dns.SDConfig `yaml:",inline"`
|
||||
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
||||
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
@@ -104,31 +95,6 @@ func (cfg *Config) UnmarshalYAML(unmarshal func(any) error) error {
|
||||
}
|
||||
cfg.parsedAlertRelabelConfigs = arCfg
|
||||
|
||||
for _, s := range cfg.StaticConfigs {
|
||||
if len(s.AlertRelabelConfigs) > 0 {
|
||||
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse alert_relabel_configs in static_config: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, s := range cfg.ConsulSDConfigs {
|
||||
if len(s.AlertRelabelConfigs) > 0 {
|
||||
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse alert_relabel_configs in consul_sd_config: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, s := range cfg.DNSSDConfigs {
|
||||
if len(s.AlertRelabelConfigs) > 0 {
|
||||
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse alert_relabel_configs in dns_sd_config: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
b, err := yaml.Marshal(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal configuration for checksum: %w", err)
|
||||
|
||||
@@ -35,6 +35,4 @@ func TestParseConfig_Failure(t *testing.T) {
|
||||
|
||||
f("testdata/unknownFields.bad.yaml", "unknown field")
|
||||
f("non-existing-file", "error reading")
|
||||
f("testdata/consul.bad.yaml", "failed to parse alert_relabel_configs in consul_sd_config")
|
||||
f("testdata/dns.bad.yaml", "failed to parse alert relabeling config")
|
||||
}
|
||||
|
||||
@@ -8,7 +8,6 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
@@ -29,7 +28,11 @@ type configWatcher struct {
|
||||
targets map[TargetType][]Target
|
||||
}
|
||||
|
||||
func newWatcher(cfg *Config, gen AlertURLGenerator) (*configWatcher, error) {
|
||||
func newWatcher(path string, gen AlertURLGenerator) (*configWatcher, error) {
|
||||
cfg, err := parseConfig(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cw := &configWatcher{
|
||||
cfg: cfg,
|
||||
wg: sync.WaitGroup{},
|
||||
@@ -85,15 +88,18 @@ func (cw *configWatcher) reload(path string) error {
|
||||
return cw.start()
|
||||
}
|
||||
|
||||
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn getTargets) error {
|
||||
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
|
||||
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
|
||||
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
|
||||
for _, err := range errors {
|
||||
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
}
|
||||
|
||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||
|
||||
cw.wg.Go(func() {
|
||||
cw.wg.Add(1)
|
||||
go func() {
|
||||
defer cw.wg.Done()
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
@@ -103,77 +109,62 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn
|
||||
return
|
||||
case <-ticker.C:
|
||||
}
|
||||
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
|
||||
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
|
||||
for _, err := range errors {
|
||||
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
}
|
||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||
}
|
||||
})
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
type targetMetadata struct {
|
||||
*promutil.Labels
|
||||
alertRelabelConfigs *promrelabel.ParsedConfigs
|
||||
}
|
||||
|
||||
func getTargetMetadata(targetsFn getTargets, cfg *Config) (map[string]targetMetadata, []error) {
|
||||
metaLabelsList, alertRelabelCfgs, err := targetsFn()
|
||||
func getTargetMetadata(labelsFn getLabels, cfg *Config) (map[string]*promutil.Labels, []error) {
|
||||
metaLabels, err := labelsFn()
|
||||
if err != nil {
|
||||
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
|
||||
}
|
||||
targetMts := make(map[string]targetMetadata, len(metaLabelsList))
|
||||
targetMetadata := make(map[string]*promutil.Labels, len(metaLabels))
|
||||
var errors []error
|
||||
duplicates := make(map[string]struct{})
|
||||
for i := range metaLabelsList {
|
||||
metaLabels := metaLabelsList[i]
|
||||
alertRelabelCfg := alertRelabelCfgs[i]
|
||||
for _, labels := range metaLabels {
|
||||
target := labels.Get("__address__")
|
||||
u, processedLabels, err := parseLabels(target, labels, cfg)
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
continue
|
||||
}
|
||||
if len(u) == 0 {
|
||||
continue
|
||||
}
|
||||
// check for duplicated targets
|
||||
// targets with same address but different alert_relabel_configs are still considered duplicates since it's mostly due to misconfiguration and could cause duplicated notifications.
|
||||
if _, ok := duplicates[u]; ok {
|
||||
if !*suppressDuplicateTargetErrors {
|
||||
logger.Errorf("skipping duplicate target with identical address %q; "+
|
||||
"make sure service discovery and relabeling is set up properly; "+
|
||||
"original labels: %s; resulting labels: %s",
|
||||
u, labels, processedLabels)
|
||||
}
|
||||
continue
|
||||
}
|
||||
duplicates[u] = struct{}{}
|
||||
targetMts[u] = targetMetadata{
|
||||
Labels: processedLabels,
|
||||
alertRelabelConfigs: alertRelabelCfg,
|
||||
}
|
||||
for _, labels := range metaLabels {
|
||||
target := labels.Get("__address__")
|
||||
u, processedLabels, err := parseLabels(target, labels, cfg)
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
continue
|
||||
}
|
||||
if len(u) == 0 {
|
||||
continue
|
||||
}
|
||||
if _, ok := duplicates[u]; ok { // check for duplicates
|
||||
if !*suppressDuplicateTargetErrors {
|
||||
logger.Errorf("skipping duplicate target with identical address %q; "+
|
||||
"make sure service discovery and relabeling is set up properly; "+
|
||||
"original labels: %s; resulting labels: %s",
|
||||
u, labels, processedLabels)
|
||||
}
|
||||
continue
|
||||
}
|
||||
duplicates[u] = struct{}{}
|
||||
targetMetadata[u] = processedLabels
|
||||
}
|
||||
return targetMts, errors
|
||||
return targetMetadata, errors
|
||||
}
|
||||
|
||||
type getTargets func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error)
|
||||
type getLabels func() ([]*promutil.Labels, error)
|
||||
|
||||
func (cw *configWatcher) start() error {
|
||||
if len(cw.cfg.StaticConfigs) > 0 {
|
||||
var targets []Target
|
||||
for i, cfg := range cw.cfg.StaticConfigs {
|
||||
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.StaticConfigs[i].AlertRelabelConfigs)
|
||||
for _, cfg := range cw.cfg.StaticConfigs {
|
||||
httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig)
|
||||
for _, target := range cfg.Targets {
|
||||
address, labels, err := parseLabels(target, nil, cw.cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
|
||||
}
|
||||
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, alertRelabelConfig, cw.cfg.Timeout.Duration())
|
||||
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
|
||||
}
|
||||
@@ -187,20 +178,17 @@ func (cw *configWatcher) start() error {
|
||||
}
|
||||
|
||||
if len(cw.cfg.ConsulSDConfigs) > 0 {
|
||||
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
|
||||
var labels [][]*promutil.Labels
|
||||
var alertRelabelConfigs []*promrelabel.ParsedConfigs
|
||||
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]*promutil.Labels, error) {
|
||||
var labels []*promutil.Labels
|
||||
for i := range cw.cfg.ConsulSDConfigs {
|
||||
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.ConsulSDConfigs[i].AlertRelabelConfigs)
|
||||
sdc := &cw.cfg.ConsulSDConfigs[i]
|
||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("got labels err: %w", err)
|
||||
return nil, fmt.Errorf("got labels err: %w", err)
|
||||
}
|
||||
labels = append(labels, targetLabels)
|
||||
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
|
||||
labels = append(labels, targetLabels...)
|
||||
}
|
||||
return labels, alertRelabelConfigs, nil
|
||||
return labels, nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start consulSD discovery: %w", err)
|
||||
@@ -208,21 +196,17 @@ func (cw *configWatcher) start() error {
|
||||
}
|
||||
|
||||
if len(cw.cfg.DNSSDConfigs) > 0 {
|
||||
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
|
||||
var labels [][]*promutil.Labels
|
||||
var alertRelabelConfigs []*promrelabel.ParsedConfigs
|
||||
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]*promutil.Labels, error) {
|
||||
var labels []*promutil.Labels
|
||||
for i := range cw.cfg.DNSSDConfigs {
|
||||
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.DNSSDConfigs[i].AlertRelabelConfigs)
|
||||
sdc := &cw.cfg.DNSSDConfigs[i]
|
||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("got labels err: %w", err)
|
||||
return nil, fmt.Errorf("got labels err: %w", err)
|
||||
}
|
||||
labels = append(labels, targetLabels)
|
||||
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
|
||||
|
||||
labels = append(labels, targetLabels...)
|
||||
}
|
||||
return labels, alertRelabelConfigs, nil
|
||||
return labels, nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start DNSSD discovery: %w", err)
|
||||
@@ -256,30 +240,30 @@ func (cw *configWatcher) setTargets(key TargetType, targets []Target) {
|
||||
cw.targetsMu.Unlock()
|
||||
}
|
||||
|
||||
func (cw *configWatcher) updateTargets(key TargetType, targetMts map[string]targetMetadata, cfg *Config, genFn AlertURLGenerator) {
|
||||
func (cw *configWatcher) updateTargets(key TargetType, targetMetadata map[string]*promutil.Labels, cfg *Config, genFn AlertURLGenerator) {
|
||||
cw.targetsMu.Lock()
|
||||
defer cw.targetsMu.Unlock()
|
||||
oldTargets := cw.targets[key]
|
||||
var updatedTargets []Target
|
||||
for _, ot := range oldTargets {
|
||||
if _, ok := targetMts[ot.Addr()]; !ok {
|
||||
if _, ok := targetMetadata[ot.Addr()]; !ok {
|
||||
// if target not exists in currentTargets, close it
|
||||
ot.Close()
|
||||
} else {
|
||||
updatedTargets = append(updatedTargets, ot)
|
||||
delete(targetMts, ot.Addr())
|
||||
delete(targetMetadata, ot.Addr())
|
||||
}
|
||||
}
|
||||
// create new resources for the new targets
|
||||
for addr, metadata := range targetMts {
|
||||
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, metadata.alertRelabelConfigs, cfg.Timeout.Duration())
|
||||
for addr, labels := range targetMetadata {
|
||||
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
|
||||
if err != nil {
|
||||
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
|
||||
continue
|
||||
}
|
||||
updatedTargets = append(updatedTargets, Target{
|
||||
Notifier: am,
|
||||
Labels: metadata.Labels,
|
||||
Labels: labels,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -29,11 +28,7 @@ static_configs:
|
||||
- localhost:9093
|
||||
- localhost:9094
|
||||
`)
|
||||
cfg, err := parseConfig(f.Name())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse config: %s", err)
|
||||
}
|
||||
cw, err := newWatcher(cfg, nil)
|
||||
cw, err := newWatcher(f.Name(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to start config watcher: %s", err)
|
||||
}
|
||||
@@ -88,64 +83,33 @@ consul_sd_configs:
|
||||
- server: %s
|
||||
services:
|
||||
- alertmanager
|
||||
- server: %s
|
||||
services:
|
||||
- alertmanager
|
||||
alert_relabel_configs:
|
||||
- target_label: "foo"
|
||||
replacement: "tar"
|
||||
`, consulSDServer.URL, consulSDServer.URL))
|
||||
`, consulSDServer.URL))
|
||||
|
||||
cfg, err := parseConfig(consulSDFile.Name())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse config: %s", err)
|
||||
}
|
||||
cw, err := newWatcher(cfg, nil)
|
||||
cw, err := newWatcher(consulSDFile.Name(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to start config watcher: %s", err)
|
||||
}
|
||||
defer cw.mustStop()
|
||||
|
||||
if len(cw.notifiers()) != 3 {
|
||||
t.Fatalf("expected to get 3 notifiers; got %d", len(cw.notifiers()))
|
||||
if len(cw.notifiers()) != 2 {
|
||||
t.Fatalf("expected to get 2 notifiers; got %d", len(cw.notifiers()))
|
||||
}
|
||||
|
||||
expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1)
|
||||
expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2)
|
||||
expAddr3 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService3)
|
||||
|
||||
n1, n2, n3 := cw.notifiers()[0], cw.notifiers()[1], cw.notifiers()[2]
|
||||
n1, n2 := cw.notifiers()[0], cw.notifiers()[1]
|
||||
if n1.Addr() != expAddr1 {
|
||||
t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr())
|
||||
}
|
||||
if n2.Addr() != expAddr2 {
|
||||
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
|
||||
}
|
||||
if n3.Addr() != expAddr3 {
|
||||
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
|
||||
}
|
||||
|
||||
if n1.(*AlertManager).relabelConfigs.String() != "" {
|
||||
t.Fatalf("unexpected relabel configs: %q", n1.(*AlertManager).relabelConfigs.String())
|
||||
}
|
||||
if n2.(*AlertManager).relabelConfigs.String() != "" {
|
||||
t.Fatalf("unexpected relabel configs: %q", n2.(*AlertManager).relabelConfigs.String())
|
||||
}
|
||||
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
|
||||
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
|
||||
}
|
||||
|
||||
f := func() bool { return len(cw.notifiers()) == 1 }
|
||||
if !waitFor(f, time.Second) {
|
||||
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
|
||||
}
|
||||
n3 = cw.notifiers()[0]
|
||||
if n3.Addr() != expAddr3 {
|
||||
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
|
||||
}
|
||||
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
|
||||
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestConfigWatcherReloadConcurrent supposed to test concurrent
|
||||
@@ -200,11 +164,7 @@ consul_sd_configs:
|
||||
"unknownFields.bad.yaml",
|
||||
}
|
||||
|
||||
cfg, err := parseConfig(paths[0])
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse config: %s", err)
|
||||
}
|
||||
cw, err := newWatcher(cfg, nil)
|
||||
cw, err := newWatcher(paths[0], nil)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to start config watcher: %s", err)
|
||||
}
|
||||
@@ -212,16 +172,18 @@ consul_sd_configs:
|
||||
|
||||
const workers = 500
|
||||
const iterations = 10
|
||||
var wg sync.WaitGroup
|
||||
for n := range workers {
|
||||
wg.Go(func() {
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
go func(n int) {
|
||||
defer wg.Done()
|
||||
r := rand.New(rand.NewSource(int64(n)))
|
||||
for i := 0; i < iterations; i++ {
|
||||
rnd := r.Intn(len(paths))
|
||||
_ = cw.reload(paths[rnd]) // update can fail and this is expected
|
||||
_ = cw.notifiers()
|
||||
}
|
||||
})
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
@@ -240,11 +202,10 @@ func checkErr(t *testing.T, err error) {
|
||||
const (
|
||||
fakeConsulService1 = "127.0.0.1:9093"
|
||||
fakeConsulService2 = "127.0.0.1:9095"
|
||||
fakeConsulService3 = "127.0.0.1:9097"
|
||||
)
|
||||
|
||||
func newFakeConsulServer() *httptest.Server {
|
||||
var requestCount atomic.Int32
|
||||
requestCount := 0
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
|
||||
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
|
||||
@@ -259,7 +220,7 @@ func newFakeConsulServer() *httptest.Server {
|
||||
}`))
|
||||
})
|
||||
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
|
||||
if requestCount.Load() == 0 {
|
||||
if requestCount == 0 {
|
||||
rw.Header().Set("X-Consul-Index", "1")
|
||||
rw.Write([]byte(`
|
||||
[
|
||||
@@ -399,7 +360,7 @@ func newFakeConsulServer() *httptest.Server {
|
||||
}
|
||||
]`))
|
||||
}
|
||||
requestCount.Add(1)
|
||||
requestCount++
|
||||
})
|
||||
|
||||
return httptest.NewServer(mux)
|
||||
|
||||
@@ -5,8 +5,6 @@ import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
// FakeNotifier is a mock notifier
|
||||
@@ -17,32 +15,14 @@ type FakeNotifier struct {
|
||||
counter int
|
||||
}
|
||||
|
||||
// InitFakeNotifier initializes global notifier to FakeNotifier,
|
||||
// and returns a cleanup function to restore the original getActiveNotifiers.
|
||||
func InitFakeNotifier() (*FakeNotifier, func()) {
|
||||
originalGetActiveNotifiers := getActiveNotifiers
|
||||
fn := &FakeNotifier{}
|
||||
getActiveNotifiers = func() []Notifier {
|
||||
return []Notifier{fn}
|
||||
}
|
||||
return fn, func() {
|
||||
getActiveNotifiers = originalGetActiveNotifiers
|
||||
}
|
||||
}
|
||||
|
||||
// Close does nothing
|
||||
func (*FakeNotifier) Close() {}
|
||||
|
||||
// LastError returns last error message
|
||||
func (*FakeNotifier) LastError() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Addr returns ""
|
||||
func (*FakeNotifier) Addr() string { return "" }
|
||||
|
||||
// Send sets alerts and increases counter
|
||||
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error {
|
||||
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error {
|
||||
fn.Lock()
|
||||
defer fn.Unlock()
|
||||
fn.counter += len(alerts)
|
||||
|
||||
@@ -1,22 +1,14 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
)
|
||||
|
||||
@@ -65,61 +57,11 @@ var (
|
||||
sendTimeout = flagutil.NewArrayDuration("notifier.sendTimeout", 10*time.Second, "Timeout when sending alerts to the corresponding -notifier.url")
|
||||
)
|
||||
|
||||
// AlertURLGeneratorFn returns a URL to the passed alert object.
|
||||
// Call InitAlertURLGeneratorFn before using this function.
|
||||
var AlertURLGeneratorFn AlertURLGenerator
|
||||
|
||||
// InitAlertURLGeneratorFn populates AlertURLGeneratorFn
|
||||
func InitAlertURLGeneratorFn(externalURL *url.URL, externalAlertSource string, validateTemplate bool) error {
|
||||
if externalAlertSource == "" {
|
||||
AlertURLGeneratorFn = func(a Alert) string {
|
||||
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
|
||||
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, "group_id", gID, "alert_id", aID)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if validateTemplate {
|
||||
if err := ValidateTemplates(map[string]string{
|
||||
"tpl": externalAlertSource,
|
||||
}); err != nil {
|
||||
return fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
|
||||
}
|
||||
}
|
||||
m := map[string]string{
|
||||
"tpl": externalAlertSource,
|
||||
}
|
||||
AlertURLGeneratorFn = func(alert Alert) string {
|
||||
qFn := func(_ string) ([]datasource.Metric, error) {
|
||||
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
|
||||
}
|
||||
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
|
||||
if err != nil {
|
||||
logger.Errorf("cannot template alert source: %s", err)
|
||||
}
|
||||
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
// getActiveNotifiers returns the current list of Notifier objects.
|
||||
getActiveNotifiers func() []Notifier
|
||||
// globalRelabelCfg stores the parsed alert relabeling config from the config file if there is
|
||||
globalRelabelCfg *promrelabel.ParsedConfigs
|
||||
|
||||
// cw holds a configWatcher for configPath configuration file
|
||||
// configWatcher provides a list of Notifier objects discovered
|
||||
// from static config or via service discovery.
|
||||
// cw is not nil only if configPath is provided.
|
||||
cw *configWatcher
|
||||
|
||||
// externalLabels is a global variable for holding external labels configured via flags
|
||||
// It is supposed to be inited via Init function only.
|
||||
externalLabels map[string]string
|
||||
// externalURL is a global variable for holding external URL value configured via flag
|
||||
// It is supposed to be inited via Init function only.
|
||||
externalURL string
|
||||
)
|
||||
// cw holds a configWatcher for configPath configuration file
|
||||
// configWatcher provides a list of Notifier objects discovered
|
||||
// from static config or via service discovery.
|
||||
// cw is not nil only if configPath is provided.
|
||||
var cw *configWatcher
|
||||
|
||||
// Reload checks the changes in configPath configuration file
|
||||
// and applies changes if any.
|
||||
@@ -130,62 +72,66 @@ func Reload() error {
|
||||
return cw.reload(*configPath)
|
||||
}
|
||||
|
||||
var staticNotifiersFn func() []Notifier
|
||||
|
||||
var (
|
||||
// externalLabels is a global variable for holding external labels configured via flags
|
||||
// It is supposed to be inited via Init function only.
|
||||
externalLabels map[string]string
|
||||
// externalURL is a global variable for holding external URL value configured via flag
|
||||
// It is supposed to be inited via Init function only.
|
||||
externalURL string
|
||||
)
|
||||
|
||||
// Init returns a function for retrieving actual list of Notifier objects.
|
||||
// Init works in two mods:
|
||||
// - configuration via flags (for backward compatibility). Is always static
|
||||
// and don't support live reloads.
|
||||
// - configuration via file. Supports live reloads and service discovery.
|
||||
//
|
||||
// Init returns an error if both mods are used.
|
||||
func Init(extLabels map[string]string, extURL string) error {
|
||||
func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (func() []Notifier, error) {
|
||||
externalURL = extURL
|
||||
externalLabels = extLabels
|
||||
_, err := url.Parse(externalURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse external URL: %w", err)
|
||||
return nil, fmt.Errorf("failed to parse external URL: %w", err)
|
||||
}
|
||||
|
||||
if *blackHole {
|
||||
if len(*addrs) > 0 || *configPath != "" {
|
||||
return fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
|
||||
return nil, fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
|
||||
}
|
||||
notifier := newBlackHoleNotifier()
|
||||
getActiveNotifiers = func() []Notifier {
|
||||
staticNotifiersFn = func() []Notifier {
|
||||
return []Notifier{notifier}
|
||||
}
|
||||
return nil
|
||||
return staticNotifiersFn, nil
|
||||
}
|
||||
|
||||
if *configPath == "" && len(*addrs) == 0 {
|
||||
return nil
|
||||
return nil, nil
|
||||
}
|
||||
if *configPath != "" && len(*addrs) > 0 {
|
||||
return fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
|
||||
return nil, fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
|
||||
}
|
||||
|
||||
if len(*addrs) > 0 {
|
||||
notifiers, err := notifiersFromFlags(AlertURLGeneratorFn)
|
||||
notifiers, err := notifiersFromFlags(gen)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create notifier from flag values: %w", err)
|
||||
return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
|
||||
}
|
||||
getActiveNotifiers = func() []Notifier {
|
||||
staticNotifiersFn = func() []Notifier {
|
||||
return notifiers
|
||||
}
|
||||
return nil
|
||||
return staticNotifiersFn, nil
|
||||
}
|
||||
|
||||
cfg, err := parseConfig(*configPath)
|
||||
cw, err = newWatcher(*configPath, gen)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, fmt.Errorf("failed to init config watcher: %w", err)
|
||||
}
|
||||
if cfg.AlertRelabelConfigs != nil {
|
||||
globalRelabelCfg = cfg.parsedAlertRelabelConfigs
|
||||
}
|
||||
cw, err = newWatcher(cfg, AlertURLGeneratorFn)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to init config watcher: %w", err)
|
||||
}
|
||||
getActiveNotifiers = cw.notifiers
|
||||
return nil
|
||||
return cw.notifiers, nil
|
||||
}
|
||||
|
||||
// InitSecretFlags must be called after flag.Parse and before any logging
|
||||
@@ -229,9 +175,6 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
|
||||
Headers: []string{headers.GetOptionalArg(i)},
|
||||
}
|
||||
|
||||
if err := httputil.CheckURL(addr); err != nil {
|
||||
return nil, fmt.Errorf("invalid notifier.url %q: %w", addr, err)
|
||||
}
|
||||
addr = strings.TrimSuffix(addr, "/")
|
||||
am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i))
|
||||
if err != nil {
|
||||
@@ -263,58 +206,23 @@ const (
|
||||
|
||||
// GetTargets returns list of static or discovered targets
|
||||
// via notifier configuration.
|
||||
//
|
||||
// Must be called after Init.
|
||||
func GetTargets() map[TargetType][]Target {
|
||||
if getActiveNotifiers == nil {
|
||||
return nil
|
||||
var targets = make(map[TargetType][]Target)
|
||||
|
||||
if staticNotifiersFn != nil {
|
||||
for _, ns := range staticNotifiersFn() {
|
||||
targets[TargetStatic] = append(targets[TargetStatic], Target{
|
||||
Notifier: ns,
|
||||
})
|
||||
}
|
||||
}
|
||||
targets := make(map[TargetType][]Target)
|
||||
// use cached targets from configWatcher instead of getActiveNotifiers for the extra target labels
|
||||
|
||||
if cw != nil {
|
||||
cw.targetsMu.RLock()
|
||||
for key, ns := range cw.targets {
|
||||
targets[key] = append(targets[key], ns...)
|
||||
}
|
||||
cw.targetsMu.RUnlock()
|
||||
return targets
|
||||
}
|
||||
|
||||
// static notifiers don't have labels
|
||||
for _, ns := range getActiveNotifiers() {
|
||||
targets[TargetStatic] = append(targets[TargetStatic], Target{
|
||||
Notifier: ns,
|
||||
})
|
||||
}
|
||||
return targets
|
||||
}
|
||||
|
||||
// Send sends alerts to all active notifiers
|
||||
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) chan error {
|
||||
alertsToSend := make([]Alert, 0, len(alerts))
|
||||
lblss := make([][]prompb.Label, 0, len(alerts))
|
||||
// apply global relabel config first without modifying original alerts in alerts
|
||||
for _, a := range alerts {
|
||||
lbls := a.applyRelabelingIfNeeded(globalRelabelCfg)
|
||||
if len(lbls) == 0 {
|
||||
continue
|
||||
}
|
||||
alertsToSend = append(alertsToSend, a)
|
||||
lblss = append(lblss, lbls)
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
activeNotifiers := getActiveNotifiers()
|
||||
errCh := make(chan error, len(activeNotifiers))
|
||||
defer close(errCh)
|
||||
for i := range activeNotifiers {
|
||||
nt := activeNotifiers[i]
|
||||
wg.Go(func() {
|
||||
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
|
||||
errCh <- fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err)
|
||||
}
|
||||
})
|
||||
}
|
||||
wg.Wait()
|
||||
return errCh
|
||||
}
|
||||
|
||||
@@ -1,17 +1,9 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
)
|
||||
|
||||
func TestInit(t *testing.T) {
|
||||
@@ -20,13 +12,14 @@ func TestInit(t *testing.T) {
|
||||
|
||||
*addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"}
|
||||
|
||||
err := Init(nil, "")
|
||||
fn, err := Init(nil, nil, "")
|
||||
if err != nil {
|
||||
t.Fatalf("%s", err)
|
||||
}
|
||||
|
||||
if len(getActiveNotifiers()) != 2 {
|
||||
t.Fatalf("expected to get 2 notifiers; got %d", len(getActiveNotifiers()))
|
||||
nfs := fn()
|
||||
if len(nfs) != 2 {
|
||||
t.Fatalf("expected to get 2 notifiers; got %d", len(nfs))
|
||||
}
|
||||
|
||||
targets := GetTargets()
|
||||
@@ -55,22 +48,19 @@ func TestInitNegative(t *testing.T) {
|
||||
*blackHole = oldBlackHole
|
||||
}()
|
||||
|
||||
f := func(path string, addr []string, bh bool) {
|
||||
f := func(path, addr string, bh bool) {
|
||||
*configPath = path
|
||||
*addrs = flagutil.ArrayString(addr)
|
||||
*addrs = flagutil.ArrayString{addr}
|
||||
*blackHole = bh
|
||||
if err := Init(nil, ""); err == nil {
|
||||
if _, err := Init(nil, nil, ""); err == nil {
|
||||
t.Fatalf("expected to get error; got nil instead")
|
||||
}
|
||||
}
|
||||
|
||||
// *configPath, *addrs and *blackhole are mutually exclusive
|
||||
f("/dummy/path", []string{"127.0.0.1"}, false)
|
||||
f("/dummy/path", []string{}, true)
|
||||
f("", []string{"127.0.0.1"}, true)
|
||||
// addr cannot be ""
|
||||
f("", []string{""}, false)
|
||||
f("", []string{"127.0.0.1", ""}, false)
|
||||
f("/dummy/path", "127.0.0.1", false)
|
||||
f("/dummy/path", "", true)
|
||||
f("", "127.0.0.1", true)
|
||||
}
|
||||
|
||||
func TestBlackHole(t *testing.T) {
|
||||
@@ -79,13 +69,14 @@ func TestBlackHole(t *testing.T) {
|
||||
|
||||
*blackHole = true
|
||||
|
||||
err := Init(nil, "")
|
||||
fn, err := Init(nil, nil, "")
|
||||
if err != nil {
|
||||
t.Fatalf("%s", err)
|
||||
}
|
||||
|
||||
if len(getActiveNotifiers()) != 1 {
|
||||
t.Fatalf("expected to get 1 notifier; got %d", len(getActiveNotifiers()))
|
||||
nfs := fn()
|
||||
if len(nfs) != 1 {
|
||||
t.Fatalf("expected to get 1 notifier; got %d", len(nfs))
|
||||
}
|
||||
|
||||
targets := GetTargets()
|
||||
@@ -100,114 +91,3 @@ func TestBlackHole(t *testing.T) {
|
||||
t.Fatalf("expected to get \"blackhole\"; got %q instead", nf1.Addr())
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAlertURLGenerator(t *testing.T) {
|
||||
oldAlertURLGeneratorFn := AlertURLGeneratorFn
|
||||
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
|
||||
|
||||
testAlert := Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
|
||||
u, _ := url.Parse("https://victoriametrics.com/path")
|
||||
err := InitAlertURLGeneratorFn(u, "", false)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", "group_id", "alert_id")
|
||||
if exp != AlertURLGeneratorFn(testAlert) {
|
||||
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
|
||||
}
|
||||
err = InitAlertURLGeneratorFn(nil, "foo?{{invalid}}", true)
|
||||
if err == nil {
|
||||
t.Fatalf("expected template validation error got nil")
|
||||
}
|
||||
err = InitAlertURLGeneratorFn(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != AlertURLGeneratorFn(testAlert) {
|
||||
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendAlerts(t *testing.T) {
|
||||
oldAlertURLGeneratorFn := AlertURLGeneratorFn
|
||||
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
|
||||
AlertURLGeneratorFn = func(alert Alert) string {
|
||||
return ""
|
||||
}
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
|
||||
t.Fatalf("should not be called")
|
||||
})
|
||||
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
|
||||
var a []struct {
|
||||
Labels map[string]string `json:"labels"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&a); err != nil {
|
||||
t.Fatalf("can not unmarshal data into alert %s", err)
|
||||
}
|
||||
if len(a) != 2 {
|
||||
t.Fatalf("expected 2 alert in array got %d", len(a))
|
||||
}
|
||||
if len(a[0].Labels) != 4 {
|
||||
t.Fatalf("expected 4 labels got %d", len(a[0].Labels))
|
||||
}
|
||||
if a[0].Labels["env"] != "prod" {
|
||||
t.Fatalf("expected env label to be prod during relabeling, got %s", a[0].Labels["env"])
|
||||
}
|
||||
if a[0].Labels["c"] != "baz" {
|
||||
t.Fatalf("expected c label to be baz during relabeling, got %s", a[0].Labels["c"])
|
||||
}
|
||||
if len(a[1].Labels) != 1 {
|
||||
t.Fatalf("expected 1 labels got %d", len(a[1].Labels))
|
||||
}
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
f, err := os.CreateTemp("", "")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fs.MustRemovePath(f.Name())
|
||||
|
||||
rawConfig := `
|
||||
static_configs:
|
||||
- targets:
|
||||
- %s
|
||||
alert_relabel_configs:
|
||||
- source_labels: [b]
|
||||
target_label: "c"
|
||||
alert_relabel_configs:
|
||||
- source_labels: [a]
|
||||
target_label: "b"
|
||||
- target_label: "env"
|
||||
replacement: "prod"
|
||||
`
|
||||
config := fmt.Sprintf(rawConfig, srv.URL+alertManagerPath)
|
||||
writeToFile(f.Name(), config)
|
||||
|
||||
oldConfigPath := configPath
|
||||
defer func() { configPath = oldConfigPath }()
|
||||
*configPath = f.Name()
|
||||
err = Init(nil, "")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when parse notifier config: %s", err)
|
||||
}
|
||||
|
||||
firingAlerts := []Alert{
|
||||
{
|
||||
Name: "alert1",
|
||||
Labels: map[string]string{"a": "baz"},
|
||||
},
|
||||
{
|
||||
Name: "alert2",
|
||||
Labels: map[string]string{},
|
||||
},
|
||||
}
|
||||
errG := Send(context.Background(), firingAlerts, nil)
|
||||
for err := range errG {
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error when sending alerts: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,21 +1,15 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
import "context"
|
||||
|
||||
// Notifier is a common interface for alert manager provider
|
||||
type Notifier interface {
|
||||
// Send sends the given list of alerts.
|
||||
// Returns an error if fails to send the alerts.
|
||||
// Must unblock if the given ctx is cancelled.
|
||||
Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, notifierHeaders map[string]string) error
|
||||
Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) error
|
||||
// Addr returns address where alerts are sent.
|
||||
Addr() string
|
||||
// LastError returns error, that occured during last attempt to send data
|
||||
LastError() string
|
||||
// Close is a destructor for the Notifier
|
||||
Close()
|
||||
}
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
import "context"
|
||||
|
||||
// blackHoleNotifier is a Notifier stub, used when no notifications need
|
||||
// to be sent.
|
||||
@@ -14,7 +10,7 @@ type blackHoleNotifier struct {
|
||||
}
|
||||
|
||||
// Send will send no notifications, but increase the metric.
|
||||
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error { //nolint:revive
|
||||
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error { //nolint:revive
|
||||
bh.metrics.alertsSent.Add(len(alerts))
|
||||
return nil
|
||||
}
|
||||
@@ -29,11 +25,6 @@ func (bh *blackHoleNotifier) Close() {
|
||||
bh.metrics.close()
|
||||
}
|
||||
|
||||
// LastError return last notifier's error
|
||||
func (bh *blackHoleNotifier) LastError() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// newBlackHoleNotifier creates a new blackHoleNotifier
|
||||
func newBlackHoleNotifier() *blackHoleNotifier {
|
||||
address := "blackhole"
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
metricset "github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -17,7 +16,7 @@ func TestBlackHoleNotifier_Send(t *testing.T) {
|
||||
Start: time.Now().UTC(),
|
||||
End: time.Now().UTC(),
|
||||
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
||||
}}, [][]prompb.Label{{}}, nil); err != nil {
|
||||
}}, nil); err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
|
||||
@@ -35,7 +34,7 @@ func TestBlackHoleNotifier_Close(t *testing.T) {
|
||||
Start: time.Now().UTC(),
|
||||
End: time.Now().UTC(),
|
||||
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
||||
}}, [][]prompb.Label{{}}, nil); err != nil {
|
||||
}}, nil); err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
|
||||
|
||||
19
app/vmalert/notifier/testdata/consul.bad.yaml
vendored
19
app/vmalert/notifier/testdata/consul.bad.yaml
vendored
@@ -1,19 +0,0 @@
|
||||
consul_sd_configs:
|
||||
- server: localhost:8500
|
||||
scheme: http
|
||||
services:
|
||||
- alertmanager
|
||||
alert_relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "prod"
|
||||
- server: localhost:8500
|
||||
services:
|
||||
- consul
|
||||
alert_relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "(abc"
|
||||
alert_relabel_configs:
|
||||
- target_label: "foo"
|
||||
replacement: "aaa"
|
||||
13
app/vmalert/notifier/testdata/dns.bad.yaml
vendored
13
app/vmalert/notifier/testdata/dns.bad.yaml
vendored
@@ -1,13 +0,0 @@
|
||||
dns_sd_configs:
|
||||
- names:
|
||||
- cloudflare.com
|
||||
type: 'A'
|
||||
port: 9093
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_dns_name]
|
||||
replacement: '${1}'
|
||||
target_label: dns_name
|
||||
alert_relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "(abc"
|
||||
15
app/vmalert/notifier/testdata/mixed.good.yaml
vendored
15
app/vmalert/notifier/testdata/mixed.good.yaml
vendored
@@ -2,19 +2,12 @@ static_configs:
|
||||
- targets:
|
||||
- localhost:9093
|
||||
- localhost:9095
|
||||
alert_relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "static"
|
||||
|
||||
consul_sd_configs:
|
||||
- server: localhost:8500
|
||||
scheme: http
|
||||
services:
|
||||
- alertmanager
|
||||
alert_relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "consul"
|
||||
- server: localhost:8500
|
||||
services:
|
||||
- consul
|
||||
@@ -24,10 +17,6 @@ dns_sd_configs:
|
||||
- cloudflare.com
|
||||
type: 'A'
|
||||
port: 9093
|
||||
alert_relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dns"
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_consul_tags]
|
||||
@@ -36,4 +25,4 @@ relabel_configs:
|
||||
target_label: __scheme__
|
||||
- source_labels: [__meta_dns_name]
|
||||
replacement: '${1}'
|
||||
target_label: dns_name
|
||||
target_label: dns_name
|
||||
26
app/vmalert/notifier/testdata/static.good.yaml
vendored
26
app/vmalert/notifier/testdata/static.good.yaml
vendored
@@ -1,14 +1,22 @@
|
||||
headers:
|
||||
- 'CustomHeader: foo'
|
||||
|
||||
static_configs:
|
||||
- targets:
|
||||
- http://192.168.0.101:9093
|
||||
alert_relabel_configs:
|
||||
- target_label: "foo"
|
||||
replacement: "aaa"
|
||||
- localhost:9093
|
||||
- localhost:9095
|
||||
- https://localhost:9093/test/api/v2/alerts
|
||||
basic_auth:
|
||||
username: foo
|
||||
password: bar
|
||||
|
||||
- targets:
|
||||
- http://192.168.0.101:9093
|
||||
alert_relabel_configs:
|
||||
- target_label: "foo"
|
||||
replacement: "ccc"
|
||||
|
||||
- localhost:9096
|
||||
- localhost:9097
|
||||
basic_auth:
|
||||
username: foo
|
||||
password: baz
|
||||
|
||||
alert_relabel_configs:
|
||||
- target_label: "foo"
|
||||
replacement: "aaa"
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
package notifier
|
||||
|
||||
// ApiNotifier represents a Notifier configuration for WEB view
|
||||
type ApiNotifier struct {
|
||||
// Kind is a Notifier type
|
||||
Kind TargetType `json:"kind"`
|
||||
// Targets is a list of Notifier targets
|
||||
Targets []*ApiTarget `json:"targets"`
|
||||
}
|
||||
|
||||
// ApiTarget represents a specific Notifier target for WEB view
|
||||
type ApiTarget struct {
|
||||
// Address is a URL for sending notifications
|
||||
Address string `json:"address"`
|
||||
// Labels is a list of labels to add to each sent notification
|
||||
Labels map[string]string `json:"labels"`
|
||||
// LastError contains the error faced while sending to notifier.
|
||||
LastError string `json:"lastError"`
|
||||
}
|
||||
@@ -14,9 +14,9 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect. "+
|
||||
"Remote read is used to restore alerts state. "+
|
||||
"This configuration makes sense only if vmalert was configured with '-remoteWrite.url' before and has been successfully persisted its state. "+
|
||||
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect."+
|
||||
"Remote read is used to restore alerts state."+
|
||||
"This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state. "+
|
||||
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
|
||||
"See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.")
|
||||
|
||||
|
||||
@@ -173,8 +173,9 @@ func (c *Client) run(ctx context.Context) {
|
||||
|
||||
cancel()
|
||||
}
|
||||
|
||||
c.wg.Go(func() {
|
||||
c.wg.Add(1)
|
||||
go func() {
|
||||
defer c.wg.Done()
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
@@ -196,7 +197,7 @@ func (c *Client) run(ctx context.Context) {
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}()
|
||||
}
|
||||
|
||||
var (
|
||||
|
||||
@@ -2,7 +2,6 @@ package rule
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"math"
|
||||
@@ -188,54 +187,6 @@ func (ar *AlertingRule) ID() uint64 {
|
||||
return ar.RuleID
|
||||
}
|
||||
|
||||
// ToAPI returns ApiRule representation of ar
|
||||
func (ar *AlertingRule) ToAPI() ApiRule {
|
||||
state := ar.state
|
||||
lastState := state.getLast()
|
||||
r := ApiRule{
|
||||
Type: TypeAlerting,
|
||||
DatasourceType: ar.Type.String(),
|
||||
Name: ar.Name,
|
||||
Query: ar.Expr,
|
||||
Duration: ar.For.Seconds(),
|
||||
KeepFiringFor: ar.KeepFiringFor.Seconds(),
|
||||
Labels: ar.Labels,
|
||||
Annotations: ar.Annotations,
|
||||
LastEvaluation: lastState.Time,
|
||||
EvaluationTime: lastState.Duration.Seconds(),
|
||||
Health: "ok",
|
||||
State: "inactive",
|
||||
Alerts: ar.AlertsToAPI(),
|
||||
LastSamples: lastState.Samples,
|
||||
LastSeriesFetched: lastState.SeriesFetched,
|
||||
MaxUpdates: state.size(),
|
||||
Updates: state.getAll(),
|
||||
Debug: ar.Debug,
|
||||
|
||||
// encode as strings to avoid rounding in JSON
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||
GroupName: ar.GroupName,
|
||||
File: ar.File,
|
||||
}
|
||||
if lastState.Err != nil {
|
||||
r.LastError = lastState.Err.Error()
|
||||
r.Health = "err"
|
||||
}
|
||||
// satisfy apiRule.State logic
|
||||
if len(r.Alerts) > 0 {
|
||||
r.State = notifier.StatePending.String()
|
||||
stateFiring := notifier.StateFiring.String()
|
||||
for _, a := range r.Alerts {
|
||||
if a.State == stateFiring {
|
||||
r.State = stateFiring
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// GetAlerts returns active alerts of rule
|
||||
func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
|
||||
ar.alertsMu.RLock()
|
||||
@@ -247,6 +198,16 @@ func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
|
||||
return alerts
|
||||
}
|
||||
|
||||
// GetAlert returns alert if id exists
|
||||
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
|
||||
ar.alertsMu.RLock()
|
||||
defer ar.alertsMu.RUnlock()
|
||||
if ar.alerts == nil {
|
||||
return nil
|
||||
}
|
||||
return ar.alerts[id]
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) {
|
||||
if !ar.Debug {
|
||||
return
|
||||
@@ -312,11 +273,6 @@ type labelSet struct {
|
||||
// On k conflicts in origin set, the original value is preferred and copied
|
||||
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
|
||||
func (ls *labelSet) add(k, v string) {
|
||||
// do not add label with empty value, since it has no meaning.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
|
||||
if v == "" {
|
||||
return
|
||||
}
|
||||
ls.processed[k] = v
|
||||
ov, ok := ls.origin[k]
|
||||
if !ok {
|
||||
@@ -346,13 +302,14 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
||||
ls.processed[l.Name] = l.Value
|
||||
}
|
||||
|
||||
// labels only support limited templating variables,
|
||||
// including `labels`, `value` and `expr`, to avoid breaking alert states or causing cardinality issue with results
|
||||
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
|
||||
Labels: ls.origin,
|
||||
Value: m.Values[0],
|
||||
Expr: ar.Expr,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to expand labels: %w", err)
|
||||
}
|
||||
for k, v := range extraLabels {
|
||||
ls.add(k, v)
|
||||
}
|
||||
@@ -363,7 +320,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
||||
if !*disableAlertGroupLabel && ar.GroupName != "" {
|
||||
ls.add(alertGroupNameLabel, ar.GroupName)
|
||||
}
|
||||
return ls, err
|
||||
return ls, nil
|
||||
}
|
||||
|
||||
// execRange executes alerting rule on the given time range similarly to exec.
|
||||
@@ -384,12 +341,16 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
||||
return []datasource.Metric{{Timestamps: []int64{0}, Values: []float64{math.NaN()}}}, nil
|
||||
}
|
||||
for _, s := range res.Data {
|
||||
ls, err := ar.expandLabelTemplates(s, qFn)
|
||||
ls, err := ar.expandLabelTemplates(s)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
alertID := hash(ls.processed)
|
||||
a := ar.newAlert(s, time.Time{}, ls.processed, nil) // initial alert
|
||||
as, err := ar.expandAnnotationTemplates(s, qFn, time.Time{}, ls)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
a := ar.newAlert(s, time.Time{}, ls.processed, as) // initial alert
|
||||
|
||||
prevT := time.Time{}
|
||||
for i := range s.Values {
|
||||
@@ -405,6 +366,8 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
|
||||
// reset to Pending if there are gaps > EvalInterval between DPs
|
||||
a.State = notifier.StatePending
|
||||
a.ActiveAt = at
|
||||
// re-template the annotations as active timestamp is changed
|
||||
a.Annotations, _ = ar.expandAnnotationTemplates(s, qFn, at, ls)
|
||||
a.Start = time.Time{}
|
||||
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
|
||||
a.State = notifier.StateFiring
|
||||
@@ -450,7 +413,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
|
||||
defer func() {
|
||||
ar.state.add(curState)
|
||||
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
|
||||
if curState.Err != nil {
|
||||
ar.metrics.errors.Inc()
|
||||
}
|
||||
}()
|
||||
@@ -459,8 +422,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
|
||||
}
|
||||
|
||||
isPartial := isPartialResponse(res)
|
||||
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartial)
|
||||
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartialResponse(res))
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
res, _, err := ar.q.Query(ctx, query, ts)
|
||||
return res.Data, err
|
||||
@@ -472,11 +434,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
expandedLabels := make([]*labelSet, len(res.Data))
|
||||
expandedAnnotations := make([]map[string]string, len(res.Data))
|
||||
for i, m := range res.Data {
|
||||
ls, err := ar.expandLabelTemplates(m, qFn)
|
||||
ls, err := ar.expandLabelTemplates(m)
|
||||
if err != nil {
|
||||
// only set error in current state, but do not break alert processing
|
||||
curState.Err = err
|
||||
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
|
||||
return nil, curState.Err
|
||||
}
|
||||
at := ts
|
||||
alertID := hash(ls.processed)
|
||||
@@ -486,11 +447,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
at = a.ActiveAt
|
||||
}
|
||||
}
|
||||
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls, isPartial)
|
||||
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls)
|
||||
if err != nil {
|
||||
// only set error in current state, but do not break alert processing
|
||||
curState.Err = err
|
||||
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
|
||||
return nil, curState.Err
|
||||
}
|
||||
expandedLabels[i] = ls
|
||||
expandedAnnotations[i] = as
|
||||
@@ -596,29 +556,31 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||
return append(tss, ar.toTimeSeries(ts.Unix())...), nil
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
|
||||
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric) (*labelSet, error) {
|
||||
qFn := func(_ string) ([]datasource.Metric, error) {
|
||||
return nil, fmt.Errorf("`query` template isn't supported in rule label")
|
||||
}
|
||||
ls, err := ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
return ls, fmt.Errorf("failed to expand label templates: %s", err)
|
||||
return nil, fmt.Errorf("failed to expand label templates: %s", err)
|
||||
}
|
||||
return ls, nil
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet, isPartial bool) (map[string]string, error) {
|
||||
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet) (map[string]string, error) {
|
||||
tplData := notifier.AlertTplData{
|
||||
Value: m.Values[0],
|
||||
Type: ar.Type.String(),
|
||||
Labels: ls.origin,
|
||||
Expr: ar.Expr,
|
||||
AlertID: hash(ls.processed),
|
||||
GroupID: ar.GroupID,
|
||||
ActiveAt: activeAt,
|
||||
For: ar.For,
|
||||
IsPartial: isPartial,
|
||||
Value: m.Values[0],
|
||||
Type: ar.Type.String(),
|
||||
Labels: ls.origin,
|
||||
Expr: ar.Expr,
|
||||
AlertID: hash(ls.processed),
|
||||
GroupID: ar.GroupID,
|
||||
ActiveAt: activeAt,
|
||||
For: ar.For,
|
||||
}
|
||||
as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData)
|
||||
if err != nil {
|
||||
return as, fmt.Errorf("failed to expand annotation templates: %s", err)
|
||||
return nil, fmt.Errorf("failed to expand annotation templates: %s", err)
|
||||
}
|
||||
return as, nil
|
||||
}
|
||||
@@ -818,9 +780,7 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
|
||||
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
|
||||
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
|
||||
|
||||
// query ALERTS_FOR_STATE at `ts-1s` instead `ts` to avoid retrieving data written in the current run,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10335
|
||||
res, _, err := q.Query(ctx, expr, ts.Add(-1*time.Second))
|
||||
res, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
|
||||
}
|
||||
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -664,7 +663,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
|
||||
Name: "for-pending",
|
||||
Type: config.NewPrometheusType().String(),
|
||||
Labels: map[string]string{"alertname": "for-pending"},
|
||||
Annotations: map[string]string{},
|
||||
Annotations: map[string]string{"activeAt": "5000"},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
@@ -684,7 +683,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
|
||||
Name: "for-firing",
|
||||
Type: config.NewPrometheusType().String(),
|
||||
Labels: map[string]string{"alertname": "for-firing"},
|
||||
Annotations: map[string]string{},
|
||||
Annotations: map[string]string{"activeAt": "1000"},
|
||||
State: notifier.StateFiring,
|
||||
ActiveAt: time.Unix(1, 0),
|
||||
Start: time.Unix(5, 0),
|
||||
@@ -705,7 +704,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
|
||||
Name: "for-hold-pending",
|
||||
Type: config.NewPrometheusType().String(),
|
||||
Labels: map[string]string{"alertname": "for-hold-pending"},
|
||||
Annotations: map[string]string{},
|
||||
Annotations: map[string]string{"activeAt": "5000"},
|
||||
State: notifier.StatePending,
|
||||
ActiveAt: time.Unix(5, 0),
|
||||
Value: 1,
|
||||
@@ -827,9 +826,12 @@ func TestGroup_Restore(t *testing.T) {
|
||||
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
||||
fg.Init()
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Go(func() {
|
||||
fg.Start(context.Background(), nil, fqr)
|
||||
})
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
nts := func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} }
|
||||
fg.Start(context.Background(), nts, nil, fqr)
|
||||
wg.Done()
|
||||
}()
|
||||
fg.Close()
|
||||
wg.Wait()
|
||||
|
||||
@@ -1120,7 +1122,7 @@ func TestAlertingRuleLimit_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAlertingRule_Template(t *testing.T) {
|
||||
f := func(rule *AlertingRule, metrics []datasource.Metric, isResponsePartial bool, alertsExpected map[uint64]*notifier.Alert) {
|
||||
f := func(rule *AlertingRule, metrics []datasource.Metric, alertsExpected map[uint64]*notifier.Alert) {
|
||||
t.Helper()
|
||||
|
||||
fakeGroup := Group{
|
||||
@@ -1133,7 +1135,6 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
entries: make([]StateEntry, 10),
|
||||
}
|
||||
fq.Add(metrics...)
|
||||
fq.SetPartialResponse(isResponsePartial)
|
||||
|
||||
if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
@@ -1164,7 +1165,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
}, []datasource.Metric{
|
||||
metricWithValueAndLabels(t, 1, "instance", "foo"),
|
||||
metricWithValueAndLabels(t, 1, "instance", "bar"),
|
||||
}, false, map[uint64]*notifier.Alert{
|
||||
}, map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
|
||||
Annotations: map[string]string{
|
||||
"summary": `common: Too high connection number for "foo"`,
|
||||
@@ -1193,14 +1194,14 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"instance": "{{ $labels.instance }}",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}".{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
|
||||
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}"`,
|
||||
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
}, []datasource.Metric{
|
||||
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
|
||||
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
|
||||
}, false, map[uint64]*notifier.Alert{
|
||||
}, map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
|
||||
Labels: map[string]string{
|
||||
alertNameLabel: "override label",
|
||||
@@ -1208,7 +1209,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"instance": "foo",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `first: Too high connection number for "foo".`,
|
||||
"summary": `first: Too high connection number for "foo"`,
|
||||
"description": `override: It is 2 connections for "foo"`,
|
||||
},
|
||||
},
|
||||
@@ -1219,7 +1220,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"instance": "bar",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `second: Too high connection number for "bar".`,
|
||||
"summary": `second: Too high connection number for "bar"`,
|
||||
"description": `override: It is 10 connections for "bar"`,
|
||||
},
|
||||
},
|
||||
@@ -1232,7 +1233,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"instance": "{{ $labels.instance }}",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}.{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
|
||||
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
}, []datasource.Metric{
|
||||
@@ -1240,7 +1241,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
alertNameLabel, "originAlertname",
|
||||
alertGroupNameLabel, "originGroupname",
|
||||
"instance", "foo"),
|
||||
}, true, map[uint64]*notifier.Alert{
|
||||
}, map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{
|
||||
alertNameLabel: "OriginLabels",
|
||||
"exported_alertname": "originAlertname",
|
||||
@@ -1256,7 +1257,7 @@ func TestAlertingRule_Template(t *testing.T) {
|
||||
"instance": "foo",
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": `Alert "originAlertname(originGroupname)" for instance foo. WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.`,
|
||||
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -1371,10 +1372,8 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
||||
|
||||
ar := &AlertingRule{
|
||||
Labels: map[string]string{
|
||||
"instance": "override", // this should override instance with new value
|
||||
"group": "vmalert", // this shouldn't have effect since value in metric is equal
|
||||
"invalid_label": "{{ .Values.mustRuntimeFail }}",
|
||||
"empty_label": "", // this should be dropped
|
||||
"instance": "override", // this should override instance with new value
|
||||
"group": "vmalert", // this shouldn't have effect since value in metric is equal
|
||||
},
|
||||
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
|
||||
Name: "AlertingRulesError",
|
||||
@@ -1382,11 +1381,10 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
||||
}
|
||||
|
||||
expectedOriginLabels := map[string]string{
|
||||
"instance": "0.0.0.0:8800",
|
||||
"group": "vmalert",
|
||||
"alertname": "ConfigurationReloadFailure",
|
||||
"alertgroup": "vmalert",
|
||||
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
|
||||
"instance": "0.0.0.0:8800",
|
||||
"group": "vmalert",
|
||||
"alertname": "ConfigurationReloadFailure",
|
||||
"alertgroup": "vmalert",
|
||||
}
|
||||
|
||||
expectedProcessedLabels := map[string]string{
|
||||
@@ -1396,12 +1394,11 @@ func TestAlertingRule_ToLabels(t *testing.T) {
|
||||
"exported_alertname": "ConfigurationReloadFailure",
|
||||
"group": "vmalert",
|
||||
"alertgroup": "vmalert",
|
||||
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
|
||||
}
|
||||
|
||||
ls, err := ar.toLabels(metric, nil)
|
||||
if err == nil || !strings.Contains(err.Error(), "error evaluating template") {
|
||||
t.Fatalf("unexpected error %q", err.Error())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
|
||||
@@ -1432,142 +1429,3 @@ func TestAlertingRuleExec_Partial(t *testing.T) {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertingRule_QueryTemplateInLabels(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
fakeGroup := Group{
|
||||
Name: "TestQueryTemplateInLabels",
|
||||
}
|
||||
|
||||
ar := &AlertingRule{
|
||||
Name: "test_alert",
|
||||
Labels: map[string]string{
|
||||
"suppress_for_mass_alert": `{{ if (printf "ALERTS{alertname='SomeAlert', alertstate='firing', device='%s'} == 1" $labels.device | query) }}true{{ else }}false{{ end }}`,
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": "Test alert with query template in labels",
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
}
|
||||
ar.GroupID = fakeGroup.GetID()
|
||||
ar.q = fq
|
||||
ar.state = &ruleState{
|
||||
entries: make([]StateEntry, 10),
|
||||
}
|
||||
|
||||
// Add a metric that should trigger the alert
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "device", "sda1"))
|
||||
|
||||
ts := time.Now()
|
||||
_, err := ar.exec(context.TODO(), ts, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error with query template in labels: %s", err)
|
||||
}
|
||||
|
||||
// Verify that the alert was created and the query template was executed
|
||||
if len(ar.alerts) != 1 {
|
||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
||||
}
|
||||
|
||||
alert := ar.GetAlerts()[0]
|
||||
suppressLabel, exists := alert.Labels["suppress_for_mass_alert"]
|
||||
if !exists {
|
||||
t.Fatalf("expected 'suppress_for_mass_alert' label to exist")
|
||||
}
|
||||
// The query template should have been executed (even if it returns false due to mock data)
|
||||
if suppressLabel != "true" && suppressLabel != "false" {
|
||||
t.Fatalf("expected 'suppress_for_mass_alert' label to be 'true' or 'false', got '%s'", suppressLabel)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
|
||||
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
|
||||
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
|
||||
// wrap into synctest because of time manipulations
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
|
||||
ar := &AlertingRule{
|
||||
Name: "TestActiveAtPreservation",
|
||||
Labels: map[string]string{
|
||||
"test_query_in_label": `{{ "static_value" }}`,
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"description": "Alert active since {{ $activeAt }}",
|
||||
},
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
q: fq,
|
||||
state: &ruleState{
|
||||
entries: make([]StateEntry, 10),
|
||||
},
|
||||
}
|
||||
|
||||
// Mock query result - return empty result to make suppress_for_mass_alert = false
|
||||
// (no need to add anything to fq for empty result)
|
||||
|
||||
// Add a metric that should trigger the alert
|
||||
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
|
||||
|
||||
// First execution - creates new alert
|
||||
ts1 := time.Now()
|
||||
_, err := ar.exec(context.TODO(), ts1, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on first exec: %s", err)
|
||||
}
|
||||
|
||||
if len(ar.alerts) != 1 {
|
||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
||||
}
|
||||
|
||||
firstAlert := ar.GetAlerts()[0]
|
||||
// Verify first execution: activeAt should be ts1 and annotation should reflect it
|
||||
if !firstAlert.ActiveAt.Equal(ts1) {
|
||||
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
|
||||
}
|
||||
|
||||
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
|
||||
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
|
||||
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
|
||||
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Second execution - should preserve activeAt in annotation
|
||||
|
||||
// Ensure different timestamp with different seconds
|
||||
// sleep is non-blocking thanks to synctest
|
||||
time.Sleep(2 * time.Second)
|
||||
ts2 := time.Now()
|
||||
_, err = ar.exec(context.TODO(), ts2, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error on second exec: %s", err)
|
||||
}
|
||||
|
||||
// Get the alert again (should be the same alert)
|
||||
if len(ar.alerts) != 1 {
|
||||
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
|
||||
}
|
||||
secondAlert := ar.GetAlerts()[0]
|
||||
|
||||
// Critical test: activeAt should still be ts1, not ts2
|
||||
if !secondAlert.ActiveAt.Equal(ts1) {
|
||||
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
|
||||
}
|
||||
|
||||
// Critical test: annotation should still contain ts1 time, not ts2
|
||||
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
|
||||
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Additional verification: annotation should NOT contain ts2 time
|
||||
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
|
||||
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
|
||||
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
|
||||
}
|
||||
|
||||
// Verify query template in labels still works (this would fail if query templates were broken)
|
||||
if firstAlert.Labels["test_query_in_label"] != "static_value" {
|
||||
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package rule
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
@@ -24,10 +25,6 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
ruleResultsLimit = flag.Int("rule.resultsLimit", 0, "Limits the number of alerts or recording results a single rule can produce. "+
|
||||
"Can be overridden by the limit option under group if specified. "+
|
||||
"If exceeded, the rule will be marked with an error and all its results will be discarded. "+
|
||||
"0 means no limit.")
|
||||
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
|
||||
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
|
||||
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
|
||||
@@ -39,8 +36,6 @@ var (
|
||||
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+
|
||||
"For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
maxStartDelay = flag.Duration("group.maxStartDelay", 5*time.Minute, "Defines the max delay before starting the group evaluation. Group's start is artificially delayed for random duration on interval"+
|
||||
" [0..min(--group.maxStartDelay, group.interval)]. This helps smoothing out the load on the configured datasource, so evaluations aren't executed too close to each other.")
|
||||
)
|
||||
|
||||
// Group is an entity for grouping rules
|
||||
@@ -117,6 +112,7 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
||||
Name: cfg.Name,
|
||||
File: cfg.File,
|
||||
Interval: cfg.Interval.Duration(),
|
||||
Limit: cfg.Limit,
|
||||
Concurrency: cfg.Concurrency,
|
||||
checksum: cfg.Checksum,
|
||||
Params: cfg.Params,
|
||||
@@ -133,11 +129,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
|
||||
if g.Interval == 0 {
|
||||
g.Interval = defaultInterval
|
||||
}
|
||||
if cfg.Limit != nil {
|
||||
g.Limit = *cfg.Limit
|
||||
} else {
|
||||
g.Limit = *ruleResultsLimit
|
||||
}
|
||||
if g.Concurrency < 1 {
|
||||
g.Concurrency = 1
|
||||
}
|
||||
@@ -298,7 +289,7 @@ func (g *Group) InterruptEval() {
|
||||
}
|
||||
}
|
||||
|
||||
// Close stops the group and its rules, unregisters group metrics
|
||||
// Close stops the group and it's rules, unregisters group metrics
|
||||
func (g *Group) Close() {
|
||||
if g.doneCh == nil {
|
||||
return
|
||||
@@ -307,6 +298,10 @@ func (g *Group) Close() {
|
||||
g.InterruptEval()
|
||||
<-g.finishedCh
|
||||
|
||||
g.closeGroupMetrics()
|
||||
}
|
||||
|
||||
func (g *Group) closeGroupMetrics() {
|
||||
metrics.UnregisterSet(g.metrics.set, true)
|
||||
}
|
||||
|
||||
@@ -332,13 +327,13 @@ func (g *Group) Init() {
|
||||
}
|
||||
|
||||
// Start starts group's evaluation
|
||||
func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
|
||||
func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
|
||||
defer func() { close(g.finishedCh) }()
|
||||
evalTS := time.Now()
|
||||
// sleep random duration to spread group rules evaluation
|
||||
// over maxStartDelay to reduce the load on datasource.
|
||||
// over time in order to reduce load on datasource.
|
||||
if !SkipRandSleepOnGroupStart {
|
||||
sleepBeforeStart := g.delayBeforeStart(evalTS, *maxStartDelay)
|
||||
sleepBeforeStart := delayBeforeStart(evalTS, g.GetID(), g.Interval, g.EvalOffset)
|
||||
g.infof("will start in %v", sleepBeforeStart)
|
||||
|
||||
sleepTimer := time.NewTimer(sleepBeforeStart)
|
||||
@@ -370,12 +365,13 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
|
||||
e := &executor{
|
||||
Rw: rw,
|
||||
Notifiers: nts,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
}
|
||||
|
||||
g.infof("started")
|
||||
|
||||
eval := func(ctx context.Context, ts time.Time) time.Time {
|
||||
eval := func(ctx context.Context, ts time.Time) {
|
||||
g.metrics.iterationTotal.Inc()
|
||||
|
||||
start := time.Now()
|
||||
@@ -383,7 +379,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
if len(g.Rules) < 1 {
|
||||
g.metrics.iterationDuration.UpdateDuration(start)
|
||||
g.LastEvaluation = start
|
||||
return ts
|
||||
return
|
||||
}
|
||||
|
||||
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
|
||||
@@ -397,7 +393,6 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
}
|
||||
g.metrics.iterationDuration.UpdateDuration(start)
|
||||
g.LastEvaluation = start
|
||||
return ts
|
||||
}
|
||||
|
||||
evalCtx, cancel := context.WithCancel(ctx)
|
||||
@@ -406,7 +401,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
g.mu.Unlock()
|
||||
defer g.evalCancel()
|
||||
|
||||
realEvalTS := eval(evalCtx, evalTS)
|
||||
eval(evalCtx, evalTS)
|
||||
|
||||
t := time.NewTicker(g.Interval)
|
||||
defer t.Stop()
|
||||
@@ -414,7 +409,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
// restore the rules state after the first evaluation
|
||||
// so only active alerts can be restored.
|
||||
if rr != nil {
|
||||
err := g.restore(ctx, rr, realEvalTS, *remoteReadLookBack)
|
||||
err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
|
||||
if err != nil {
|
||||
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
|
||||
}
|
||||
@@ -477,31 +472,32 @@ func (g *Group) UpdateWith(newGroup *Group) {
|
||||
g.updateCh <- newGroup
|
||||
}
|
||||
|
||||
// delayBeforeStart returns duration for delaying the evaluation start
|
||||
// based on given ts and Group settings. The delay can't exceed maxDelay.
|
||||
// maxDelay is ignored if g.EvalOffset != nil.
|
||||
//
|
||||
// Delaying is important to smooth out the load on the datasource when all groups start at the same time.
|
||||
// delayBeforeStart calculates delay based on Group ID, so all groups will start at different moments of time.
|
||||
func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Duration {
|
||||
if g.EvalOffset != nil {
|
||||
// if offset is specified, ignore the maxDelay and return a duration aligned with offset
|
||||
currentOffsetPoint := ts.Truncate(g.Interval).Add(*g.EvalOffset)
|
||||
// DeepCopy returns a deep copy of group
|
||||
func (g *Group) DeepCopy() *Group {
|
||||
g.mu.RLock()
|
||||
data, _ := json.Marshal(g)
|
||||
g.mu.RUnlock()
|
||||
newG := Group{}
|
||||
_ = json.Unmarshal(data, &newG)
|
||||
newG.Rules = g.Rules
|
||||
newG.id = g.id
|
||||
return &newG
|
||||
}
|
||||
|
||||
// if offset is specified, delayBeforeStart returns a duration to help aligning timestamp with offset;
|
||||
// otherwise, it returns a random duration between [0..interval] based on group key.
|
||||
func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *time.Duration) time.Duration {
|
||||
if offset != nil {
|
||||
currentOffsetPoint := ts.Truncate(interval).Add(*offset)
|
||||
if currentOffsetPoint.Before(ts) {
|
||||
// wait until the next offset point
|
||||
return currentOffsetPoint.Add(g.Interval).Sub(ts)
|
||||
return currentOffsetPoint.Add(interval).Sub(ts)
|
||||
}
|
||||
return currentOffsetPoint.Sub(ts)
|
||||
}
|
||||
|
||||
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
|
||||
interval := g.Interval
|
||||
if interval > maxDelay {
|
||||
// artificially limit interval, so groups with big intervals could start sooner.
|
||||
interval = maxDelay
|
||||
}
|
||||
var randSleep time.Duration
|
||||
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64)))
|
||||
randSleep = time.Duration(float64(interval) * (float64(key) / (1 << 64)))
|
||||
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
|
||||
if randSleep < sleepOffset {
|
||||
randSleep += interval
|
||||
@@ -563,13 +559,15 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
|
||||
if !disableProgressBar {
|
||||
bar = pb.StartNew(iterations * len(g.Rules))
|
||||
}
|
||||
for i := range g.Rules {
|
||||
rule := g.Rules[i]
|
||||
for _, r := range g.Rules {
|
||||
sem <- struct{}{}
|
||||
wg.Go(func() {
|
||||
res <- replayRuleRange(rule, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
|
||||
wg.Add(1)
|
||||
go func(r Rule, ri rangeIterator) {
|
||||
// pass ri as a copy, so it can be modified within the replayRuleRange
|
||||
res <- replayRuleRange(r, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
|
||||
<-sem
|
||||
})
|
||||
wg.Done()
|
||||
}(r, ri)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
@@ -599,10 +597,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
|
||||
res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1)
|
||||
for ri.next() {
|
||||
sem <- struct{}{}
|
||||
start := ri.s
|
||||
end := ri.e
|
||||
wg.Go(func() {
|
||||
n, err := replayRule(r, start, end, rw, replayRuleRetryAttempts)
|
||||
wg.Add(1)
|
||||
|
||||
go func(s, e time.Time) {
|
||||
n, err := replayRule(r, s, e, rw, replayRuleRetryAttempts)
|
||||
if err != nil {
|
||||
logger.Fatalf("rule %q: %s", r, err)
|
||||
}
|
||||
@@ -611,7 +609,8 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
|
||||
}
|
||||
res <- n
|
||||
<-sem
|
||||
})
|
||||
wg.Done()
|
||||
}(ri.s, ri.e)
|
||||
}
|
||||
wg.Wait()
|
||||
close(res)
|
||||
@@ -625,9 +624,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
|
||||
}
|
||||
|
||||
// ExecOnce evaluates all the rules under group for once with given timestamp.
|
||||
func (g *Group) ExecOnce(ctx context.Context, rw remotewrite.RWClient, evalTS time.Time) chan error {
|
||||
func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
|
||||
e := &executor{
|
||||
Rw: rw,
|
||||
Notifiers: nts,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
}
|
||||
if len(g.Rules) < 1 {
|
||||
@@ -702,6 +702,7 @@ func (g *Group) getEvalDelay() time.Duration {
|
||||
|
||||
// executor contains group's notify and rw configs
|
||||
type executor struct {
|
||||
Notifiers func() []notifier.Notifier
|
||||
notifierHeaders map[string]string
|
||||
|
||||
Rw remotewrite.RWClient
|
||||
@@ -722,13 +723,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
|
||||
sem := make(chan struct{}, concurrency)
|
||||
go func() {
|
||||
wg := sync.WaitGroup{}
|
||||
for i := range rules {
|
||||
rule := rules[i]
|
||||
for _, r := range rules {
|
||||
sem <- struct{}{}
|
||||
wg.Go(func() {
|
||||
res <- e.exec(ctx, rule, ts, resolveDuration, limit)
|
||||
wg.Add(1)
|
||||
go func(r Rule) {
|
||||
res <- e.exec(ctx, r, ts, resolveDuration, limit)
|
||||
<-sem
|
||||
})
|
||||
wg.Done()
|
||||
}(r)
|
||||
}
|
||||
wg.Wait()
|
||||
close(res)
|
||||
@@ -757,7 +759,6 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
|
||||
}
|
||||
|
||||
var errG vmalertutil.ErrGroup
|
||||
if e.Rw != nil {
|
||||
pushToRW := func(tss []prompb.TimeSeries) error {
|
||||
var lastErr error
|
||||
@@ -769,26 +770,31 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
return lastErr
|
||||
}
|
||||
if err := pushToRW(tss); err != nil {
|
||||
errG.Add(err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
ar, ok := r.(*AlertingRule)
|
||||
if !ok {
|
||||
return errG.Err()
|
||||
return nil
|
||||
}
|
||||
|
||||
alerts := ar.alertsToSend(resolveDuration, *resendDelay)
|
||||
if len(alerts) < 1 {
|
||||
return errG.Err()
|
||||
return nil
|
||||
}
|
||||
|
||||
notifierErr := notifier.Send(ctx, alerts, e.notifierHeaders)
|
||||
for err := range notifierErr {
|
||||
if err != nil {
|
||||
errG.Add(fmt.Errorf("rule %q: notifier failure: %w", r, err))
|
||||
}
|
||||
wg := sync.WaitGroup{}
|
||||
errGr := new(vmalertutil.ErrGroup)
|
||||
for _, nt := range e.Notifiers() {
|
||||
wg.Add(1)
|
||||
go func(nt notifier.Notifier) {
|
||||
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
|
||||
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
|
||||
}
|
||||
wg.Done()
|
||||
}(nt)
|
||||
}
|
||||
|
||||
return errG.Err()
|
||||
wg.Wait()
|
||||
return errGr.Err()
|
||||
}
|
||||
|
||||
@@ -262,7 +262,7 @@ func TestUpdateDuringRandSleep(t *testing.T) {
|
||||
updateCh: make(chan *Group),
|
||||
}
|
||||
g.Init()
|
||||
go g.Start(context.Background(), nil, nil)
|
||||
go g.Start(context.Background(), nil, nil, nil)
|
||||
|
||||
rule1 := AlertingRule{
|
||||
Name: "jobDown",
|
||||
@@ -346,8 +346,7 @@ func TestGroupStart(t *testing.T) {
|
||||
}
|
||||
|
||||
fs := &datasource.FakeQuerier{}
|
||||
fn, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
fn := ¬ifier.FakeNotifier{}
|
||||
|
||||
const evalInterval = time.Millisecond
|
||||
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
|
||||
@@ -396,7 +395,7 @@ func TestGroupStart(t *testing.T) {
|
||||
fs.Add(m2)
|
||||
g.Init()
|
||||
go func() {
|
||||
g.Start(context.Background(), nil, fs)
|
||||
g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
|
||||
close(finished)
|
||||
}()
|
||||
|
||||
@@ -473,10 +472,15 @@ func TestFaultyNotifier(t *testing.T) {
|
||||
r := newTestAlertingRule("instant", 0)
|
||||
r.q = fq
|
||||
|
||||
fn, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
|
||||
e := &executor{}
|
||||
fn := ¬ifier.FakeNotifier{}
|
||||
e := &executor{
|
||||
Notifiers: func() []notifier.Notifier {
|
||||
return []notifier.Notifier{
|
||||
¬ifier.FaultyNotifier{},
|
||||
fn,
|
||||
}
|
||||
},
|
||||
}
|
||||
delay := 5 * time.Second
|
||||
ctx, cancel := context.WithTimeout(context.Background(), delay)
|
||||
defer cancel()
|
||||
@@ -549,7 +553,7 @@ func TestCloseWithEvalInterruption(t *testing.T) {
|
||||
g := NewGroup(groups[0], fq, evalInterval, nil)
|
||||
g.Init()
|
||||
|
||||
go g.Start(context.Background(), nil, nil)
|
||||
go g.Start(context.Background(), nil, nil, nil)
|
||||
|
||||
time.Sleep(evalInterval * 20)
|
||||
|
||||
@@ -567,10 +571,9 @@ func TestCloseWithEvalInterruption(t *testing.T) {
|
||||
|
||||
func TestGroupStartDelay(t *testing.T) {
|
||||
g := &Group{}
|
||||
g.id = uint64(math.MaxUint64 / 10)
|
||||
// interval of 5min and key generate a static delay of 30s
|
||||
g.Interval = time.Minute * 5
|
||||
maxDelay := time.Minute * 5
|
||||
key := uint64(math.MaxUint64 / 10)
|
||||
|
||||
f := func(atS, expS string) {
|
||||
t.Helper()
|
||||
@@ -582,7 +585,7 @@ func TestGroupStartDelay(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
delay := g.delayBeforeStart(at, maxDelay)
|
||||
delay := delayBeforeStart(at, key, g.Interval, g.EvalOffset)
|
||||
gotStart := at.Add(delay)
|
||||
if expTS != gotStart {
|
||||
t.Fatalf("expected to get %v; got %v instead", expTS, gotStart)
|
||||
@@ -603,15 +606,6 @@ func TestGroupStartDelay(t *testing.T) {
|
||||
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
|
||||
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||
|
||||
maxDelay = time.Minute * 1
|
||||
g.EvalOffset = nil
|
||||
|
||||
// test group with maxDelay, and offset disabled
|
||||
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:06.000+00:00")
|
||||
f("2023-01-01T00:00:01.000+00:00", "2023-01-01T00:00:06.000+00:00")
|
||||
f("2023-01-01T00:00:06.100+00:00", "2023-01-01T00:01:06.000+00:00")
|
||||
f("2023-01-01T00:00:11.000+00:00", "2023-01-01T00:01:06.000+00:00")
|
||||
}
|
||||
|
||||
func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
|
||||
@@ -2,7 +2,6 @@ package rule
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -82,37 +81,6 @@ func (rr *RecordingRule) ID() uint64 {
|
||||
return rr.RuleID
|
||||
}
|
||||
|
||||
// ToAPI returns ApiRule representation of rr
|
||||
func (rr *RecordingRule) ToAPI() ApiRule {
|
||||
state := rr.state
|
||||
lastState := state.getLast()
|
||||
r := ApiRule{
|
||||
Type: TypeRecording,
|
||||
DatasourceType: rr.Type.String(),
|
||||
Name: rr.Name,
|
||||
Query: rr.Expr,
|
||||
Labels: rr.Labels,
|
||||
LastEvaluation: lastState.Time,
|
||||
EvaluationTime: lastState.Duration.Seconds(),
|
||||
Health: "ok",
|
||||
LastSamples: lastState.Samples,
|
||||
LastSeriesFetched: lastState.SeriesFetched,
|
||||
MaxUpdates: state.size(),
|
||||
Updates: state.getAll(),
|
||||
|
||||
// encode as strings to avoid rounding
|
||||
ID: fmt.Sprintf("%d", rr.ID()),
|
||||
GroupID: fmt.Sprintf("%d", rr.GroupID),
|
||||
GroupName: rr.GroupName,
|
||||
File: rr.File,
|
||||
}
|
||||
if lastState.Err != nil {
|
||||
r.LastError = lastState.Err.Error()
|
||||
r.Health = "err"
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// NewRecordingRule creates a new RecordingRule
|
||||
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
|
||||
debug := group.Debug
|
||||
@@ -198,7 +166,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
|
||||
|
||||
defer func() {
|
||||
rr.state.add(curState)
|
||||
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
|
||||
if curState.Err != nil {
|
||||
rr.metrics.errors.Inc()
|
||||
}
|
||||
}()
|
||||
@@ -237,8 +205,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
|
||||
Labels: stringToLabels(k),
|
||||
Samples: []prompb.Sample{
|
||||
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
|
||||
},
|
||||
})
|
||||
}})
|
||||
}
|
||||
rr.lastEvaluation = curEvaluation
|
||||
return tss, nil
|
||||
@@ -293,11 +260,6 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompb.TimeSeries {
|
||||
}
|
||||
// add extra labels configured by user
|
||||
for k := range rr.Labels {
|
||||
// do not add label with empty value, since it has no meaning.
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
|
||||
if rr.Labels[k] == "" {
|
||||
continue
|
||||
}
|
||||
existingLabel := promrelabel.GetLabelByName(m.Labels, k)
|
||||
if existingLabel != nil { // there is a conflict between extra and existing label
|
||||
if existingLabel.Value == rr.Labels[k] {
|
||||
|
||||
@@ -21,8 +21,6 @@ type Rule interface {
|
||||
// ID returns unique ID that may be used for
|
||||
// identifying this Rule among others.
|
||||
ID() uint64
|
||||
// ToAPI returns ApiRule representation of Rule
|
||||
ToAPI() ApiRule
|
||||
// exec executes the rule with given context at the given timestamp and limit.
|
||||
// returns an err if number of resulting time series exceeds the limit.
|
||||
exec(ctx context.Context, ts time.Time, limit int) ([]prompb.TimeSeries, error)
|
||||
@@ -70,6 +68,39 @@ type StateEntry struct {
|
||||
Curl string `json:"curl"`
|
||||
}
|
||||
|
||||
// GetLastEntry returns latest stateEntry of rule
|
||||
func GetLastEntry(r Rule) StateEntry {
|
||||
if rule, ok := r.(*AlertingRule); ok {
|
||||
return rule.state.getLast()
|
||||
}
|
||||
if rule, ok := r.(*RecordingRule); ok {
|
||||
return rule.state.getLast()
|
||||
}
|
||||
return StateEntry{}
|
||||
}
|
||||
|
||||
// GetRuleStateSize returns size of rule stateEntry
|
||||
func GetRuleStateSize(r Rule) int {
|
||||
if rule, ok := r.(*AlertingRule); ok {
|
||||
return rule.state.size()
|
||||
}
|
||||
if rule, ok := r.(*RecordingRule); ok {
|
||||
return rule.state.size()
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// GetAllRuleState returns rule entire stateEntries
|
||||
func GetAllRuleState(r Rule) []StateEntry {
|
||||
if rule, ok := r.(*AlertingRule); ok {
|
||||
return rule.state.getAll()
|
||||
}
|
||||
if rule, ok := r.(*RecordingRule); ok {
|
||||
return rule.state.getAll()
|
||||
}
|
||||
return []StateEntry{}
|
||||
}
|
||||
|
||||
func (s *ruleState) size() int {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
@@ -65,15 +65,17 @@ func TestRule_stateConcurrent(_ *testing.T) {
|
||||
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
|
||||
const workers = 50
|
||||
const iterations = 100
|
||||
var wg sync.WaitGroup
|
||||
for range workers {
|
||||
wg.Go(func() {
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(workers)
|
||||
for i := 0; i < workers; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for i := 0; i < iterations; i++ {
|
||||
r.state.add(StateEntry{At: time.Now()})
|
||||
r.state.getAll()
|
||||
r.state.getLast()
|
||||
}
|
||||
})
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -34,12 +34,11 @@ body {
|
||||
padding-top: 4.5rem;
|
||||
}
|
||||
|
||||
.vm-group {
|
||||
.group-items {
|
||||
cursor: pointer;
|
||||
padding: 5px;
|
||||
margin-top: 5px;
|
||||
position: relative;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.btn svg, .dropdown-item svg {
|
||||
@@ -56,22 +55,14 @@ body {
|
||||
height: 38px;
|
||||
}
|
||||
|
||||
.vm-item:not(.vm-found) {
|
||||
display: none;
|
||||
.group-items:not(:has(.sub-item:not(.d-none))) {
|
||||
display: none !important;
|
||||
}
|
||||
|
||||
.vm-group:has(.vm-item:is(.vm-found)), .vm-group:is(.vm-found) {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.vm-group:hover {
|
||||
.group-items:hover {
|
||||
background-color: #f8f9fa!important;
|
||||
}
|
||||
|
||||
.vm-group:is(.vm-found) .vm-item {
|
||||
display: table-row;
|
||||
}
|
||||
|
||||
.table {
|
||||
table-layout: fixed;
|
||||
}
|
||||
@@ -120,9 +111,3 @@ textarea.curl-area {
|
||||
.w-60 {
|
||||
width: 60%;
|
||||
}
|
||||
|
||||
.annotations {
|
||||
white-space: pre-wrap;
|
||||
color: gray;
|
||||
word-wrap: break-word;
|
||||
}
|
||||
|
||||
@@ -65,34 +65,32 @@ function getParamURL(key) {
|
||||
return url.searchParams.get(key)
|
||||
}
|
||||
|
||||
function matchText(search, item) {
|
||||
const text = item.innerText.toLowerCase();
|
||||
return text.indexOf(search) >= 0;
|
||||
}
|
||||
|
||||
function filterRules(searchPhrase) {
|
||||
document.querySelectorAll('.vm-group').forEach((group) => {
|
||||
if (!searchPhrase) {
|
||||
group.classList.add('vm-found');
|
||||
return;
|
||||
}
|
||||
for (const item of group.querySelectorAll('.vm-group-search')) {
|
||||
if (matchText(searchPhrase, item)) {
|
||||
group.classList.add('vm-found');
|
||||
return;
|
||||
document.querySelectorAll('.sub-items').forEach((rules) => {
|
||||
let found = false;
|
||||
rules.querySelectorAll('.sub-item').forEach((rule) => {
|
||||
if (searchPhrase) {
|
||||
const ruleName = rule.innerText.toLowerCase();
|
||||
const matches = []
|
||||
const hasValue = ruleName.indexOf(searchPhrase) >= 0;
|
||||
rule.querySelectorAll('.label').forEach((label) => {
|
||||
const text = label.innerText.toLowerCase();
|
||||
if (text.indexOf(searchPhrase) >= 0) {
|
||||
matches.push(text);
|
||||
}
|
||||
});
|
||||
if (!matches.length && !hasValue) {
|
||||
rule.classList.add('d-none');
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
group.classList.remove('vm-found');
|
||||
for (const item of group.querySelectorAll('.vm-item')) {
|
||||
if (matchText(searchPhrase, item)) {
|
||||
item.classList.add('vm-found');
|
||||
continue;
|
||||
}
|
||||
if (Array.from(item.querySelectorAll('.label')).find(l => matchText(searchPhrase, l))) {
|
||||
item.classList.add('vm-found');
|
||||
continue;
|
||||
}
|
||||
item.classList.remove('vm-found');
|
||||
rule.classList.remove('d-none');
|
||||
found = true;
|
||||
});
|
||||
if (found && searchPhrase || !searchPhrase) {
|
||||
rules.classList.remove('d-none');
|
||||
} else {
|
||||
rules.classList.add('d-none');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -485,12 +485,6 @@ func templateFuncs() textTpl.FuncMap {
|
||||
|
||||
/* Helpers */
|
||||
|
||||
// now returns the Unix timestamp in seconds at the time of the template evaluation.
|
||||
// For example: {{ (now | toTime).Sub $activeAt }} will return the duration the alert has been active.
|
||||
"now": func() float64 {
|
||||
return float64(time.Now().Unix())
|
||||
},
|
||||
|
||||
// Converts a list of objects to a map with keys arg0, arg1 etc.
|
||||
// This is intended to allow multiple arguments to be passed to templates.
|
||||
"args": func(args ...any) map[string]any {
|
||||
|
||||
@@ -45,7 +45,7 @@ func (eg *ErrGroup) Error() string {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, "errors(%d): \n", len(eg.errs))
|
||||
fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
|
||||
for i, err := range eg.errs {
|
||||
b.WriteString(err.Error())
|
||||
if i != len(eg.errs)-1 {
|
||||
|
||||
@@ -30,8 +30,8 @@ func TestErrGroup(t *testing.T) {
|
||||
}
|
||||
|
||||
f(nil, "")
|
||||
f([]error{errors.New("timeout")}, "errors(1): \ntimeout")
|
||||
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): \ntimeout\ndeadline")
|
||||
f([]error{errors.New("timeout")}, "errors(1): timeout")
|
||||
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): timeout\ndeadline")
|
||||
}
|
||||
|
||||
// TestErrGroupConcurrent supposed to test concurrent
|
||||
|
||||
@@ -29,9 +29,7 @@ var (
|
||||
{"api/v1/rules", "list all loaded groups and rules"},
|
||||
{"api/v1/alerts", "list all active alerts"},
|
||||
{"api/v1/notifiers", "list all notifiers"},
|
||||
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamAlertID), "get alert status by group and alert ID"},
|
||||
{fmt.Sprintf("api/v1/rule?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamRuleID), "get rule status by group and rule ID"},
|
||||
{fmt.Sprintf("api/v1/group?%s=<int>", rule.ParamGroupID), "get group status by group ID"},
|
||||
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", paramGroupID, paramAlertID), "get alert status by group and alert ID"},
|
||||
}
|
||||
systemLinks = [][2]string{
|
||||
{"vmalert/groups", "UI"},
|
||||
@@ -47,8 +45,8 @@ var (
|
||||
{Name: "Docs", URL: "https://docs.victoriametrics.com/victoriametrics/vmalert/"},
|
||||
}
|
||||
ruleTypeMap = map[string]string{
|
||||
"alert": rule.TypeAlerting,
|
||||
"record": rule.TypeRecording,
|
||||
"alert": ruleTypeAlerting,
|
||||
"record": ruleTypeRecording,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -114,7 +112,7 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case "/rules":
|
||||
// Grafana makes an extra request to `/rules`
|
||||
// handler in addition to `/api/v1/rules` calls in alerts UI
|
||||
var data []*rule.ApiGroup
|
||||
var data []*apiGroup
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
@@ -180,14 +178,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.Write(data)
|
||||
return true
|
||||
case "/vmalert/api/v1/rule", "/api/v1/rule":
|
||||
apiRule, err := rh.getRule(r)
|
||||
rule, err := rh.getRule(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
rwu := rule.ApiRuleWithUpdates{
|
||||
ApiRule: apiRule,
|
||||
StateUpdates: apiRule.Updates,
|
||||
rwu := apiRuleWithUpdates{
|
||||
apiRule: rule,
|
||||
StateUpdates: rule.Updates,
|
||||
}
|
||||
data, err := json.Marshal(rwu)
|
||||
if err != nil {
|
||||
@@ -197,20 +195,6 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(data)
|
||||
return true
|
||||
case "/vmalert/api/v1/group", "/api/v1/group":
|
||||
group, err := rh.getGroup(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data, err := json.Marshal(group)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "failed to marshal group: %s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(data)
|
||||
return true
|
||||
case "/-/reload":
|
||||
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
|
||||
return true
|
||||
@@ -225,42 +209,30 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
||||
func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
||||
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
|
||||
}
|
||||
obj, err := rh.m.groupAPI(groupID)
|
||||
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, errResponse(err, http.StatusNotFound)
|
||||
}
|
||||
return obj, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
||||
}
|
||||
ruleID, err := strconv.ParseUint(r.FormValue(rule.ParamRuleID), 10, 64)
|
||||
if err != nil {
|
||||
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err)
|
||||
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramRuleID, err)
|
||||
}
|
||||
obj, err := rh.m.ruleAPI(groupID, ruleID)
|
||||
if err != nil {
|
||||
return rule.ApiRule{}, errResponse(err, http.StatusNotFound)
|
||||
return apiRule{}, errResponse(err, http.StatusNotFound)
|
||||
}
|
||||
return obj, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
||||
func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
|
||||
}
|
||||
alertID, err := strconv.ParseUint(r.FormValue(rule.ParamAlertID), 10, 64)
|
||||
alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err)
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", paramAlertID, err)
|
||||
}
|
||||
a, err := rh.m.alertAPI(groupID, alertID)
|
||||
if err != nil {
|
||||
@@ -272,7 +244,7 @@ func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
|
||||
type listGroupsResponse struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
Groups []*rule.ApiGroup `json:"groups"`
|
||||
Groups []*apiGroup `json:"groups"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
@@ -338,19 +310,19 @@ func (rf *rulesFilter) matchesGroup(group *rule.Group) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
|
||||
func (rh *requestHandler) groups(rf *rulesFilter) []*apiGroup {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
groups := make([]*rule.ApiGroup, 0)
|
||||
groups := make([]*apiGroup, 0)
|
||||
for _, group := range rh.m.groups {
|
||||
if !rf.matchesGroup(group) {
|
||||
continue
|
||||
}
|
||||
g := group.ToAPI()
|
||||
g := groupToAPI(group)
|
||||
// the returned list should always be non-nil
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
|
||||
filteredRules := make([]rule.ApiRule, 0)
|
||||
filteredRules := make([]apiRule, 0)
|
||||
for _, rule := range g.Rules {
|
||||
if rf.ruleType != "" && rf.ruleType != rule.Type {
|
||||
continue
|
||||
@@ -378,7 +350,7 @@ func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
|
||||
groups = append(groups, g)
|
||||
}
|
||||
// sort list of groups for deterministic output
|
||||
slices.SortFunc(groups, func(a, b *rule.ApiGroup) int {
|
||||
slices.SortFunc(groups, func(a, b *apiGroup) int {
|
||||
if a.Name != b.Name {
|
||||
return strings.Compare(a.Name, b.Name)
|
||||
}
|
||||
@@ -403,32 +375,32 @@ func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, error) {
|
||||
type listAlertsResponse struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
Alerts []*rule.ApiAlert `json:"alerts"`
|
||||
Alerts []*apiAlert `json:"alerts"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (rh *requestHandler) groupAlerts() []rule.GroupAlerts {
|
||||
func (rh *requestHandler) groupAlerts() []groupAlerts {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
var gAlerts []rule.GroupAlerts
|
||||
for _, group := range rh.m.groups {
|
||||
var alerts []*rule.ApiAlert
|
||||
g := group.ToAPI()
|
||||
var gAlerts []groupAlerts
|
||||
for _, g := range rh.m.groups {
|
||||
var alerts []*apiAlert
|
||||
for _, r := range g.Rules {
|
||||
if r.Type != rule.TypeAlerting {
|
||||
a, ok := r.(*rule.AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
alerts = append(alerts, r.Alerts...)
|
||||
alerts = append(alerts, ruleToAPIAlert(a)...)
|
||||
}
|
||||
if len(alerts) > 0 {
|
||||
gAlerts = append(gAlerts, rule.GroupAlerts{
|
||||
Group: g,
|
||||
gAlerts = append(gAlerts, groupAlerts{
|
||||
Group: groupToAPI(g),
|
||||
Alerts: alerts,
|
||||
})
|
||||
}
|
||||
}
|
||||
slices.SortFunc(gAlerts, func(a, b rule.GroupAlerts) int {
|
||||
slices.SortFunc(gAlerts, func(a, b groupAlerts) int {
|
||||
return strings.Compare(a.Group.Name, b.Group.Name)
|
||||
})
|
||||
return gAlerts
|
||||
@@ -439,22 +411,22 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
lr := listAlertsResponse{Status: "success"}
|
||||
lr.Data.Alerts = make([]*rule.ApiAlert, 0)
|
||||
lr.Data.Alerts = make([]*apiAlert, 0)
|
||||
for _, group := range rh.m.groups {
|
||||
if !rf.matchesGroup(group) {
|
||||
continue
|
||||
}
|
||||
g := group.ToAPI()
|
||||
for _, r := range g.Rules {
|
||||
if r.Type != rule.TypeAlerting {
|
||||
for _, r := range group.Rules {
|
||||
a, ok := r.(*rule.AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
lr.Data.Alerts = append(lr.Data.Alerts, r.Alerts...)
|
||||
lr.Data.Alerts = append(lr.Data.Alerts, ruleToAPIAlert(a)...)
|
||||
}
|
||||
}
|
||||
|
||||
// sort list of alerts for deterministic output
|
||||
slices.SortFunc(lr.Data.Alerts, func(a, b *rule.ApiAlert) int {
|
||||
slices.SortFunc(lr.Data.Alerts, func(a, b *apiAlert) int {
|
||||
return strings.Compare(a.ID, b.ID)
|
||||
})
|
||||
|
||||
@@ -471,7 +443,7 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
|
||||
type listNotifiersResponse struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
Notifiers []*notifier.ApiNotifier `json:"notifiers"`
|
||||
Notifiers []*apiNotifier `json:"notifiers"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
@@ -479,20 +451,19 @@ func (rh *requestHandler) listNotifiers() ([]byte, error) {
|
||||
targets := notifier.GetTargets()
|
||||
|
||||
lr := listNotifiersResponse{Status: "success"}
|
||||
lr.Data.Notifiers = make([]*notifier.ApiNotifier, 0)
|
||||
lr.Data.Notifiers = make([]*apiNotifier, 0)
|
||||
for protoName, protoTargets := range targets {
|
||||
nr := ¬ifier.ApiNotifier{
|
||||
Kind: protoName,
|
||||
Targets: make([]*notifier.ApiTarget, 0, len(protoTargets)),
|
||||
notifier := &apiNotifier{
|
||||
Kind: string(protoName),
|
||||
Targets: make([]*apiTarget, 0, len(protoTargets)),
|
||||
}
|
||||
for _, target := range protoTargets {
|
||||
nr.Targets = append(nr.Targets, ¬ifier.ApiTarget{
|
||||
Address: target.Addr(),
|
||||
Labels: target.Labels.ToMap(),
|
||||
LastError: target.LastError(),
|
||||
notifier.Targets = append(notifier.Targets, &apiTarget{
|
||||
Address: target.Addr(),
|
||||
Labels: target.Labels.ToMap(),
|
||||
})
|
||||
}
|
||||
lr.Data.Notifiers = append(lr.Data.Notifiers, nr)
|
||||
lr.Data.Notifiers = append(lr.Data.Notifiers, notifier)
|
||||
}
|
||||
|
||||
b, err := json.Marshal(lr)
|
||||
|
||||
@@ -8,8 +8,6 @@
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
) %}
|
||||
|
||||
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
|
||||
@@ -79,8 +77,6 @@
|
||||
{% func Welcome(r *http.Request) %}
|
||||
{%= tpl.Header(r, navItems, "vmalert", getLastConfigError()) %}
|
||||
<p>
|
||||
Version {%s buildinfo.Version %} <br>
|
||||
|
||||
API:<br>
|
||||
{% for _, p := range apiLinks %}
|
||||
{%code p, doc := p[0], p[1] %}
|
||||
@@ -97,7 +93,7 @@
|
||||
{%= tpl.Footer(r) %}
|
||||
{% endfunc %}
|
||||
|
||||
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, filter string) %}
|
||||
{% func ListGroups(r *http.Request, groups []*apiGroup, filter string) %}
|
||||
{%code
|
||||
prefix := vmalertutil.Prefix(r.URL.Path)
|
||||
filters := map[string]string{
|
||||
@@ -117,17 +113,14 @@
|
||||
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
|
||||
{% if len(groups) > 0 %}
|
||||
{% for _, g := range groups %}
|
||||
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
|
||||
<div id="group-{%s g.ID %}" class="d-flex w-100 border-0 flex-column group-items{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
|
||||
<span class="d-flex justify-content-between">
|
||||
<a
|
||||
class="vm-group-search"
|
||||
href="#group-{%s g.ID %}"
|
||||
>{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
|
||||
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
|
||||
<span
|
||||
class="flex-grow-1 d-flex justify-content-end"
|
||||
role="button"
|
||||
data-bs-toggle="collapse"
|
||||
data-bs-target="#item-{%s g.ID %}"
|
||||
data-bs-target="#sub-{%s g.ID %}"
|
||||
>
|
||||
<span class="d-flex gap-2">
|
||||
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
|
||||
@@ -140,9 +133,9 @@
|
||||
class="d-flex flex-column row-gap-2 mb-2"
|
||||
role="button"
|
||||
data-bs-toggle="collapse"
|
||||
data-bs-target="#item-{%s g.ID %}"
|
||||
data-bs-target="#sub-{%s g.ID %}"
|
||||
>
|
||||
<span class="fs-6 text-start vm-group-search w-100 fw-lighter">{%s g.File %}</span>
|
||||
<span class="fs-6 text-start w-100 fw-lighter">{%s g.File %}</span>
|
||||
{% if len(g.Params) > 0 %}
|
||||
<span class="fs-6 text-start w-100 d-flex justify-content-between fw-lighter">
|
||||
<span>Extra params</span>
|
||||
@@ -164,7 +157,7 @@
|
||||
</span>
|
||||
{% endif %}
|
||||
</span>
|
||||
<div class="collapse" id="item-{%s g.ID %}">
|
||||
<div class="collapse sub-items" id="sub-{%s g.ID %}">
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
@@ -175,7 +168,7 @@
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for _, r := range g.Rules %}
|
||||
<tr class="vm-item{% if r.LastError != "" %} alert-danger{% endif %}">
|
||||
<tr class="sub-item{% if r.LastError != "" %} alert-danger{% endif %}">
|
||||
<td>
|
||||
<div class="row">
|
||||
<div class="col-12 mb-2">
|
||||
@@ -212,12 +205,7 @@
|
||||
</div>
|
||||
</td>
|
||||
<td class="text-center">{%d r.LastSamples %}</td>
|
||||
<td class="text-center">{% if r.LastEvaluation.IsZero() %}
|
||||
Never
|
||||
{% else %}
|
||||
{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago
|
||||
{% endif %}
|
||||
</td>
|
||||
<td class="text-center">{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
@@ -234,7 +222,7 @@
|
||||
{% endfunc %}
|
||||
|
||||
|
||||
{% func ListAlerts(r *http.Request, groupAlerts []rule.GroupAlerts) %}
|
||||
{% func ListAlerts(r *http.Request, groupAlerts []groupAlerts) %}
|
||||
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
||||
{%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %}
|
||||
{%= Controls(prefix, "", "", nil, nil, true) %}
|
||||
@@ -243,7 +231,7 @@
|
||||
{%code
|
||||
g := ga.Group
|
||||
var keys []string
|
||||
alertsByRule := make(map[string][]*rule.ApiAlert)
|
||||
alertsByRule := make(map[string][]*apiAlert)
|
||||
for _, alert := range ga.Alerts {
|
||||
if len(alertsByRule[alert.RuleID]) < 1 {
|
||||
keys = append(keys, alert.RuleID)
|
||||
@@ -252,14 +240,14 @@
|
||||
}
|
||||
sort.Strings(keys)
|
||||
%}
|
||||
<div class="w-100 flex-column vm-group alert-danger">
|
||||
<div class="d-flex w-100 flex-column group-items alert-danger">
|
||||
<span id="group-{%s g.ID %}" class="d-flex justify-content-between">
|
||||
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %}</a>
|
||||
<span
|
||||
class="flex-grow-1 d-flex justify-content-end"
|
||||
role="button"
|
||||
data-bs-toggle="collapse"
|
||||
data-bs-target="#item-{%s g.ID %}"
|
||||
data-bs-target="#sub-{%s g.ID %}"
|
||||
>
|
||||
<span class="badge bg-danger" title="Number of active alerts">{%d len(ga.Alerts) %}</span>
|
||||
</span>
|
||||
@@ -269,10 +257,10 @@
|
||||
class="fs-6 text-start w-100 fw-lighter"
|
||||
role="button"
|
||||
data-bs-toggle="collapse"
|
||||
data-bs-target="#item-{%s g.ID %}"
|
||||
data-bs-target="#sub-{%s g.ID %}"
|
||||
>{%s g.File %}</span>
|
||||
</span>
|
||||
<div class="collapse" id="item-{%s g.ID %}">
|
||||
<div class="collapse sub-items" id="sub-{%s g.ID %}">
|
||||
{% for _, ruleID := range keys %}
|
||||
{%code
|
||||
defaultAR := alertsByRule[ruleID][0]
|
||||
@@ -283,7 +271,7 @@
|
||||
sort.Strings(labelKeys)
|
||||
%}
|
||||
<br>
|
||||
<div class="vm-item">
|
||||
<div class="sub-item">
|
||||
<b>alert:</b> {%s defaultAR.Name %} ({%d len(alertsByRule[ruleID]) %})
|
||||
| <span><a target="_blank" href="{%s defaultAR.SourceLink %}">Source</a></span>
|
||||
<br>
|
||||
@@ -348,20 +336,20 @@
|
||||
typeK, ns := keys[i], targets[notifier.TargetType(keys[i])]
|
||||
count := len(ns)
|
||||
%}
|
||||
<div class="w-100 flex-column vm-group">
|
||||
<div class="d-flex w-100 flex-column group-items">
|
||||
<span class="d-flex justify-content-between" id="group-{%s typeK %}">
|
||||
<a href="#group-{%s typeK %}">{%s typeK %} ({%d count %})</a>
|
||||
<span
|
||||
class="flex-grow-1"
|
||||
role="button"
|
||||
data-bs-toggle="collapse"
|
||||
data-bs-target="#item-{%s typeK %}"
|
||||
data-bs-target="#sub-{%s typeK %}"
|
||||
></span>
|
||||
</span>
|
||||
<div id="item-{%s typeK %}" class="collapse show">
|
||||
<div id="sub-{%s typeK %}" class="collapse show sub-items">
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr class="vm-item">
|
||||
<tr class="sub-item">
|
||||
<th scope="col">Labels</th>
|
||||
<th scope="col">Address</th>
|
||||
</tr>
|
||||
@@ -390,7 +378,7 @@
|
||||
{%= tpl.Footer(r) %}
|
||||
{% endfunc %}
|
||||
|
||||
{% func Alert(r *http.Request, alert *rule.ApiAlert) %}
|
||||
{% func Alert(r *http.Request, alert *apiAlert) %}
|
||||
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
||||
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
||||
{%code
|
||||
@@ -446,7 +434,7 @@
|
||||
<div class="col">
|
||||
{% for _, k := range annotationKeys %}
|
||||
<b>{%s k %}:</b><br>
|
||||
<p class="annotations">{%s alert.Annotations[k] %}</p>
|
||||
<p>{%s alert.Annotations[k] %}</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
@@ -476,7 +464,7 @@
|
||||
{% endfunc %}
|
||||
|
||||
|
||||
{% func RuleDetails(r *http.Request, rule rule.ApiRule) %}
|
||||
{% func RuleDetails(r *http.Request, rule apiRule) %}
|
||||
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
||||
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
||||
{%code
|
||||
@@ -560,7 +548,7 @@
|
||||
<div class="col">
|
||||
{% for _, k := range annotationKeys %}
|
||||
<b>{%s k %}:</b><br>
|
||||
<p class="annotations">{%s rule.Annotations[k] %}</p>
|
||||
<p>{%s rule.Annotations[k] %}</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
@@ -605,11 +593,11 @@
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" title="The time when the rule was executed">Updated at</th>
|
||||
<th scope="col" title="The time when event was created">Updated at</th>
|
||||
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
||||
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
|
||||
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" class="text-center" title="The time used in execution query request">Execution timestamp</th>
|
||||
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
|
||||
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
||||
</tr>
|
||||
</thead>
|
||||
@@ -661,7 +649,7 @@
|
||||
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
|
||||
{% endfunc %}
|
||||
|
||||
{% func seriesFetchedWarn(prefix string, r rule.ApiRule) %}
|
||||
{% func seriesFetchedWarn(prefix string, r apiRule) %}
|
||||
{% if isNoMatch(r) %}
|
||||
<svg
|
||||
data-bs-toggle="tooltip"
|
||||
@@ -675,7 +663,7 @@
|
||||
{% endfunc %}
|
||||
|
||||
{%code
|
||||
func isNoMatch (r rule.ApiRule) bool {
|
||||
func isNoMatch (r apiRule) bool {
|
||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||
}
|
||||
%}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -23,12 +23,8 @@ func TestHandler(t *testing.T) {
|
||||
Timestamps: []int64{0},
|
||||
})
|
||||
m := &manager{groups: map[uint64]*rule.Group{}}
|
||||
_, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
|
||||
var ar *rule.AlertingRule
|
||||
var rr *rule.RecordingRule
|
||||
var groupIDs []uint64
|
||||
for _, dsType := range []string{"prometheus", "", "graphite"} {
|
||||
g := rule.NewGroup(config.Group{
|
||||
Name: "group",
|
||||
@@ -48,10 +44,8 @@ func TestHandler(t *testing.T) {
|
||||
}, fq, 1*time.Minute, nil)
|
||||
ar = g.Rules[0].(*rule.AlertingRule)
|
||||
rr = g.Rules[1].(*rule.RecordingRule)
|
||||
g.ExecOnce(context.Background(), nil, time.Time{})
|
||||
id := g.CreateID()
|
||||
m.groups[id] = g
|
||||
groupIDs = append(groupIDs, id)
|
||||
g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, nil, time.Time{})
|
||||
m.groups[g.CreateID()] = g
|
||||
}
|
||||
rh := &requestHandler{m: m}
|
||||
|
||||
@@ -88,22 +82,22 @@ func TestHandler(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("/vmalert/rule", func(t *testing.T) {
|
||||
a := ar.ToAPI()
|
||||
a := ruleToAPI(ar)
|
||||
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
||||
r := rr.ToAPI()
|
||||
r := ruleToAPI(rr)
|
||||
getResp(t, ts.URL+"/vmalert/"+r.WebLink(), nil, 200)
|
||||
})
|
||||
t.Run("/vmalert/alert", func(t *testing.T) {
|
||||
alerts := ar.AlertsToAPI()
|
||||
alerts := ruleToAPIAlert(ar)
|
||||
for _, a := range alerts {
|
||||
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
|
||||
}
|
||||
})
|
||||
t.Run("/vmalert/rule?badParam", func(t *testing.T) {
|
||||
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamRuleID)
|
||||
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramRuleID)
|
||||
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
|
||||
|
||||
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamRuleID)
|
||||
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramRuleID)
|
||||
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
|
||||
})
|
||||
|
||||
@@ -130,14 +124,14 @@ func TestHandler(t *testing.T) {
|
||||
}
|
||||
})
|
||||
t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) {
|
||||
expAlert := rule.NewAlertAPI(ar, ar.GetAlerts()[0])
|
||||
alert := &rule.ApiAlert{}
|
||||
expAlert := newAlertAPI(ar, ar.GetAlerts()[0])
|
||||
alert := &apiAlert{}
|
||||
getResp(t, ts.URL+"/"+expAlert.APILink(), alert, 200)
|
||||
if !reflect.DeepEqual(alert, expAlert) {
|
||||
t.Fatalf("expected %v is equal to %v", alert, expAlert)
|
||||
}
|
||||
|
||||
alert = &rule.ApiAlert{}
|
||||
alert = &apiAlert{}
|
||||
getResp(t, ts.URL+"/vmalert/"+expAlert.APILink(), alert, 200)
|
||||
if !reflect.DeepEqual(alert, expAlert) {
|
||||
t.Fatalf("expected %v is equal to %v", alert, expAlert)
|
||||
@@ -145,16 +139,16 @@ func TestHandler(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("/api/v1/alert?badParams", func(t *testing.T) {
|
||||
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamAlertID)
|
||||
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramAlertID)
|
||||
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
|
||||
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
|
||||
|
||||
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamAlertID)
|
||||
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramAlertID)
|
||||
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
|
||||
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
|
||||
|
||||
// bad request, alertID is missing
|
||||
params = fmt.Sprintf("?%s=1", rule.ParamGroupID)
|
||||
params = fmt.Sprintf("?%s=1", paramGroupID)
|
||||
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 400)
|
||||
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 400)
|
||||
})
|
||||
@@ -173,42 +167,27 @@ func TestHandler(t *testing.T) {
|
||||
}
|
||||
})
|
||||
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
|
||||
expRule := ar.ToAPI()
|
||||
gotRule := rule.ApiRule{}
|
||||
expRule := ruleToAPI(ar)
|
||||
gotRule := apiRule{}
|
||||
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRule, 200)
|
||||
|
||||
if expRule.ID != gotRule.ID {
|
||||
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
||||
}
|
||||
|
||||
gotRule = rule.ApiRule{}
|
||||
gotRule = apiRule{}
|
||||
getResp(t, ts.URL+"/vmalert/"+expRule.APILink(), &gotRule, 200)
|
||||
|
||||
if expRule.ID != gotRule.ID {
|
||||
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
||||
}
|
||||
|
||||
gotRuleWithUpdates := rule.ApiRuleWithUpdates{}
|
||||
gotRuleWithUpdates := apiRuleWithUpdates{}
|
||||
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRuleWithUpdates, 200)
|
||||
if len(gotRuleWithUpdates.StateUpdates) < 1 {
|
||||
t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates)
|
||||
}
|
||||
})
|
||||
t.Run("/api/v1/group?groupID", func(t *testing.T) {
|
||||
id := groupIDs[0]
|
||||
g := m.groups[id]
|
||||
expGroup := g.ToAPI()
|
||||
gotGroup := rule.ApiGroup{}
|
||||
getResp(t, ts.URL+"/"+expGroup.APILink(), &gotGroup, 200)
|
||||
if expGroup.ID != gotGroup.ID {
|
||||
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
|
||||
}
|
||||
gotGroup = rule.ApiGroup{}
|
||||
getResp(t, ts.URL+"/vmalert/"+expGroup.APILink(), &gotGroup, 200)
|
||||
if expGroup.ID != gotGroup.ID {
|
||||
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("/api/v1/rules&filters", func(t *testing.T) {
|
||||
check := func(url string, statusCode, expGroups, expRules int) {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package rule
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -8,28 +8,79 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||
)
|
||||
|
||||
const (
|
||||
// ParamGroupID is group id key in url parameter
|
||||
ParamGroupID = "group_id"
|
||||
paramGroupID = "group_id"
|
||||
// ParamAlertID is alert id key in url parameter
|
||||
ParamAlertID = "alert_id"
|
||||
paramAlertID = "alert_id"
|
||||
// ParamRuleID is rule id key in url parameter
|
||||
ParamRuleID = "rule_id"
|
||||
|
||||
// TypeRecording is a RecordingRule type
|
||||
TypeRecording = "recording"
|
||||
// TypeAlerting is an AlertingRule type
|
||||
TypeAlerting = "alerting"
|
||||
paramRuleID = "rule_id"
|
||||
)
|
||||
|
||||
// ApiGroup represents a Group for web view
|
||||
type ApiGroup struct {
|
||||
type apiNotifier struct {
|
||||
Kind string `json:"kind"`
|
||||
Targets []*apiTarget `json:"targets"`
|
||||
}
|
||||
|
||||
type apiTarget struct {
|
||||
Address string `json:"address"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
}
|
||||
|
||||
// apiAlert represents a notifier.AlertingRule state
|
||||
// for WEB view
|
||||
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||
type apiAlert struct {
|
||||
State string `json:"state"`
|
||||
Name string `json:"name"`
|
||||
Value string `json:"value"`
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
Annotations map[string]string `json:"annotations"`
|
||||
ActiveAt time.Time `json:"activeAt"`
|
||||
|
||||
// Additional fields
|
||||
|
||||
// ID is an unique Alert's ID within a group
|
||||
ID string `json:"id"`
|
||||
// RuleID is an unique Rule's ID within a group
|
||||
RuleID string `json:"rule_id"`
|
||||
// GroupID is an unique Group's ID
|
||||
GroupID string `json:"group_id"`
|
||||
// Expression contains the PromQL/MetricsQL expression
|
||||
// for Rule's evaluation
|
||||
Expression string `json:"expression"`
|
||||
// SourceLink contains a link to a system which should show
|
||||
// why Alert was generated
|
||||
SourceLink string `json:"source"`
|
||||
// Restored shows whether Alert's state was restored on restart
|
||||
Restored bool `json:"restored"`
|
||||
// Stabilizing shows when firing state is kept because of
|
||||
// `keep_firing_for` instead of real alert
|
||||
Stabilizing bool `json:"stabilizing"`
|
||||
}
|
||||
|
||||
// WebLink returns a link to the alert which can be used in UI.
|
||||
func (aa *apiAlert) WebLink() string {
|
||||
return fmt.Sprintf("alert?%s=%s&%s=%s",
|
||||
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
|
||||
}
|
||||
|
||||
// APILink returns a link to the alert's JSON representation.
|
||||
func (aa *apiAlert) APILink() string {
|
||||
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
|
||||
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
|
||||
}
|
||||
|
||||
// apiGroup represents Group for web view
|
||||
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||
type apiGroup struct {
|
||||
// Name is the group name as present in the config
|
||||
Name string `json:"name"`
|
||||
// Rules contains both recording and alerting rules
|
||||
Rules []ApiRule `json:"rules"`
|
||||
Rules []apiRule `json:"rules"`
|
||||
// Interval is the Group's evaluation interval in float seconds as present in the file.
|
||||
Interval float64 `json:"interval"`
|
||||
// LastEvaluation is the timestamp of the last time the Group was executed
|
||||
@@ -65,20 +116,15 @@ type ApiGroup struct {
|
||||
NoMatch int
|
||||
}
|
||||
|
||||
// APILink returns a link to the group's JSON representation.
|
||||
func (ag *ApiGroup) APILink() string {
|
||||
return fmt.Sprintf("api/v1/group?%s=%s", ParamGroupID, ag.ID)
|
||||
// groupAlerts represents a group of alerts for WEB view
|
||||
type groupAlerts struct {
|
||||
Group *apiGroup
|
||||
Alerts []*apiAlert
|
||||
}
|
||||
|
||||
// GroupAlerts represents a Group with its Alerts for web view
|
||||
type GroupAlerts struct {
|
||||
Group *ApiGroup
|
||||
Alerts []*ApiAlert
|
||||
}
|
||||
|
||||
// ApiRule represents a Rule for web view
|
||||
// apiRule represents a Rule for web view
|
||||
// see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||
type ApiRule struct {
|
||||
type apiRule struct {
|
||||
// State must be one of these under following scenarios
|
||||
// "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState.
|
||||
// "firing": at least 1 alert in the rule in firing state.
|
||||
@@ -100,7 +146,7 @@ type ApiRule struct {
|
||||
// LastEvaluation is the timestamp of the last time the rule was executed
|
||||
LastEvaluation time.Time `json:"lastEvaluation"`
|
||||
// Alerts is the list of all the alerts in this rule that are currently pending or firing
|
||||
Alerts []*ApiAlert `json:"alerts,omitempty"`
|
||||
Alerts []*apiAlert `json:"alerts,omitempty"`
|
||||
// Health is the health of rule evaluation.
|
||||
// It MUST be one of "ok", "err", "unknown"
|
||||
Health string `json:"health"`
|
||||
@@ -131,87 +177,143 @@ type ApiRule struct {
|
||||
// MaxUpdates is the max number of recorded ruleStateEntry objects
|
||||
MaxUpdates int `json:"max_updates_entries"`
|
||||
// Updates contains the ordered list of recorded ruleStateEntry objects
|
||||
Updates []StateEntry `json:"-"`
|
||||
Updates []rule.StateEntry `json:"-"`
|
||||
}
|
||||
|
||||
// ApiAlert represents a notifier.AlertingRule state
|
||||
// for WEB view
|
||||
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||
type ApiAlert struct {
|
||||
State string `json:"state"`
|
||||
Name string `json:"name"`
|
||||
Value string `json:"value"`
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
Annotations map[string]string `json:"annotations"`
|
||||
ActiveAt time.Time `json:"activeAt"`
|
||||
|
||||
// Additional fields
|
||||
|
||||
// ID is an unique Alert's ID within a group
|
||||
ID string `json:"id"`
|
||||
// RuleID is an unique Rule's ID within a group
|
||||
RuleID string `json:"rule_id"`
|
||||
// GroupID is an unique Group's ID
|
||||
GroupID string `json:"group_id"`
|
||||
// Expression contains the PromQL/MetricsQL expression
|
||||
// for Rule's evaluation
|
||||
Expression string `json:"expression"`
|
||||
// SourceLink contains a link to a system which should show
|
||||
// why Alert was generated
|
||||
SourceLink string `json:"source"`
|
||||
// Restored shows whether Alert's state was restored on restart
|
||||
Restored bool `json:"restored"`
|
||||
// Stabilizing shows when firing state is kept because of
|
||||
// `keep_firing_for` instead of real alert
|
||||
Stabilizing bool `json:"stabilizing"`
|
||||
}
|
||||
|
||||
// WebLink returns a link to the alert which can be used in UI.
|
||||
func (aa *ApiAlert) WebLink() string {
|
||||
return fmt.Sprintf("alert?%s=%s&%s=%s",
|
||||
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
|
||||
}
|
||||
|
||||
// APILink returns a link to the alert's JSON representation.
|
||||
func (aa *ApiAlert) APILink() string {
|
||||
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
|
||||
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
|
||||
}
|
||||
|
||||
// ApiRuleWithUpdates represents ApiRule but with extra fields for marshalling
|
||||
type ApiRuleWithUpdates struct {
|
||||
ApiRule
|
||||
// apiRuleWithUpdates represents apiRule but with extra fields for marshalling
|
||||
type apiRuleWithUpdates struct {
|
||||
apiRule
|
||||
// Updates contains the ordered list of recorded ruleStateEntry objects
|
||||
StateUpdates []StateEntry `json:"updates,omitempty"`
|
||||
StateUpdates []rule.StateEntry `json:"updates,omitempty"`
|
||||
}
|
||||
|
||||
// APILink returns a link to the rule's JSON representation.
|
||||
func (ar ApiRule) APILink() string {
|
||||
func (ar apiRule) APILink() string {
|
||||
return fmt.Sprintf("api/v1/rule?%s=%s&%s=%s",
|
||||
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID)
|
||||
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
|
||||
}
|
||||
|
||||
// WebLink returns a link to the alert which can be used in UI.
|
||||
func (ar ApiRule) WebLink() string {
|
||||
func (ar apiRule) WebLink() string {
|
||||
return fmt.Sprintf("rule?%s=%s&%s=%s",
|
||||
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID)
|
||||
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
|
||||
}
|
||||
|
||||
// AlertsToAPI returns list of ApiAlert objects from existing alerts
|
||||
func (ar *AlertingRule) AlertsToAPI() []*ApiAlert {
|
||||
var alerts []*ApiAlert
|
||||
func ruleToAPI(r any) apiRule {
|
||||
if ar, ok := r.(*rule.AlertingRule); ok {
|
||||
return alertingToAPI(ar)
|
||||
}
|
||||
if rr, ok := r.(*rule.RecordingRule); ok {
|
||||
return recordingToAPI(rr)
|
||||
}
|
||||
return apiRule{}
|
||||
}
|
||||
|
||||
const (
|
||||
ruleTypeRecording = "recording"
|
||||
ruleTypeAlerting = "alerting"
|
||||
)
|
||||
|
||||
func recordingToAPI(rr *rule.RecordingRule) apiRule {
|
||||
lastState := rule.GetLastEntry(rr)
|
||||
r := apiRule{
|
||||
Type: ruleTypeRecording,
|
||||
DatasourceType: rr.Type.String(),
|
||||
Name: rr.Name,
|
||||
Query: rr.Expr,
|
||||
Labels: rr.Labels,
|
||||
LastEvaluation: lastState.Time,
|
||||
EvaluationTime: lastState.Duration.Seconds(),
|
||||
Health: "ok",
|
||||
LastSamples: lastState.Samples,
|
||||
LastSeriesFetched: lastState.SeriesFetched,
|
||||
MaxUpdates: rule.GetRuleStateSize(rr),
|
||||
Updates: rule.GetAllRuleState(rr),
|
||||
|
||||
// encode as strings to avoid rounding
|
||||
ID: fmt.Sprintf("%d", rr.ID()),
|
||||
GroupID: fmt.Sprintf("%d", rr.GroupID),
|
||||
GroupName: rr.GroupName,
|
||||
File: rr.File,
|
||||
}
|
||||
if lastState.Err != nil {
|
||||
r.LastError = lastState.Err.Error()
|
||||
r.Health = "err"
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// alertingToAPI returns Rule representation in form of apiRule
|
||||
func alertingToAPI(ar *rule.AlertingRule) apiRule {
|
||||
lastState := rule.GetLastEntry(ar)
|
||||
r := apiRule{
|
||||
Type: ruleTypeAlerting,
|
||||
DatasourceType: ar.Type.String(),
|
||||
Name: ar.Name,
|
||||
Query: ar.Expr,
|
||||
Duration: ar.For.Seconds(),
|
||||
KeepFiringFor: ar.KeepFiringFor.Seconds(),
|
||||
Labels: ar.Labels,
|
||||
Annotations: ar.Annotations,
|
||||
LastEvaluation: lastState.Time,
|
||||
EvaluationTime: lastState.Duration.Seconds(),
|
||||
Health: "ok",
|
||||
State: "inactive",
|
||||
Alerts: ruleToAPIAlert(ar),
|
||||
LastSamples: lastState.Samples,
|
||||
LastSeriesFetched: lastState.SeriesFetched,
|
||||
MaxUpdates: rule.GetRuleStateSize(ar),
|
||||
Updates: rule.GetAllRuleState(ar),
|
||||
Debug: ar.Debug,
|
||||
|
||||
// encode as strings to avoid rounding in JSON
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||
GroupName: ar.GroupName,
|
||||
File: ar.File,
|
||||
}
|
||||
if lastState.Err != nil {
|
||||
r.LastError = lastState.Err.Error()
|
||||
r.Health = "err"
|
||||
}
|
||||
// satisfy apiRule.State logic
|
||||
if len(r.Alerts) > 0 {
|
||||
r.State = notifier.StatePending.String()
|
||||
stateFiring := notifier.StateFiring.String()
|
||||
for _, a := range r.Alerts {
|
||||
if a.State == stateFiring {
|
||||
r.State = stateFiring
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// ruleToAPIAlert generates list of apiAlert objects from existing alerts
|
||||
func ruleToAPIAlert(ar *rule.AlertingRule) []*apiAlert {
|
||||
var alerts []*apiAlert
|
||||
for _, a := range ar.GetAlerts() {
|
||||
if a.State == notifier.StateInactive {
|
||||
continue
|
||||
}
|
||||
alerts = append(alerts, NewAlertAPI(ar, a))
|
||||
alerts = append(alerts, newAlertAPI(ar, a))
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
// alertToAPI generates apiAlert object from alert by its id(hash)
|
||||
func alertToAPI(ar *rule.AlertingRule, id uint64) *apiAlert {
|
||||
a := ar.GetAlert(id)
|
||||
if a == nil {
|
||||
return nil
|
||||
}
|
||||
return newAlertAPI(ar, a)
|
||||
}
|
||||
|
||||
// NewAlertAPI creates apiAlert for notifier.Alert
|
||||
func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
||||
aa := &ApiAlert{
|
||||
func newAlertAPI(ar *rule.AlertingRule, a *notifier.Alert) *apiAlert {
|
||||
aa := &apiAlert{
|
||||
// encode as strings to avoid rounding
|
||||
ID: fmt.Sprintf("%d", a.ID),
|
||||
GroupID: fmt.Sprintf("%d", a.GroupID),
|
||||
@@ -226,8 +328,8 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
||||
Restored: a.Restored,
|
||||
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
|
||||
}
|
||||
if notifier.AlertURLGeneratorFn != nil {
|
||||
aa.SourceLink = notifier.AlertURLGeneratorFn(*a)
|
||||
if alertURLGeneratorFn != nil {
|
||||
aa.SourceLink = alertURLGeneratorFn(*a)
|
||||
}
|
||||
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
|
||||
aa.Stabilizing = true
|
||||
@@ -235,11 +337,9 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
||||
return aa
|
||||
}
|
||||
|
||||
// ToAPI returns ApiGroup representation of g
|
||||
func (g *Group) ToAPI() *ApiGroup {
|
||||
g.mu.RLock()
|
||||
defer g.mu.RUnlock()
|
||||
ag := ApiGroup{
|
||||
func groupToAPI(g *rule.Group) *apiGroup {
|
||||
g = g.DeepCopy()
|
||||
ag := apiGroup{
|
||||
// encode as string to avoid rounding
|
||||
ID: strconv.FormatUint(g.GetID(), 10),
|
||||
Name: g.Name,
|
||||
@@ -259,9 +359,9 @@ func (g *Group) ToAPI() *ApiGroup {
|
||||
if g.EvalDelay != nil {
|
||||
ag.EvalDelay = g.EvalDelay.Seconds()
|
||||
}
|
||||
ag.Rules = make([]ApiRule, 0)
|
||||
ag.Rules = make([]apiRule, 0)
|
||||
for _, r := range g.Rules {
|
||||
ag.Rules = append(ag.Rules, r.ToAPI())
|
||||
ag.Rules = append(ag.Rules, ruleToAPI(r))
|
||||
}
|
||||
return &ag
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package rule
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||
)
|
||||
|
||||
func TestRecordingToApi(t *testing.T) {
|
||||
@@ -16,7 +17,7 @@ func TestRecordingToApi(t *testing.T) {
|
||||
Values: []float64{1}, Timestamps: []int64{0},
|
||||
})
|
||||
entriesLimit := 44
|
||||
g := NewGroup(config.Group{
|
||||
g := rule.NewGroup(config.Group{
|
||||
Name: "group",
|
||||
File: "rules.yaml",
|
||||
Concurrency: 1,
|
||||
@@ -30,24 +31,24 @@ func TestRecordingToApi(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}, fq, 1*time.Minute, nil)
|
||||
rr := g.Rules[0].(*RecordingRule)
|
||||
rr := g.Rules[0].(*rule.RecordingRule)
|
||||
|
||||
expectedRes := ApiRule{
|
||||
expectedRes := apiRule{
|
||||
Name: "record_name",
|
||||
Query: "up",
|
||||
Labels: map[string]string{"label": "value"},
|
||||
Health: "ok",
|
||||
Type: TypeRecording,
|
||||
Type: ruleTypeRecording,
|
||||
DatasourceType: "prometheus",
|
||||
ID: "1248",
|
||||
GroupID: fmt.Sprintf("%d", g.CreateID()),
|
||||
GroupName: "group",
|
||||
File: "rules.yaml",
|
||||
MaxUpdates: 44,
|
||||
Updates: make([]StateEntry, 0),
|
||||
Updates: make([]rule.StateEntry, 0),
|
||||
}
|
||||
|
||||
res := rr.ToAPI()
|
||||
res := recordingToAPI(rr)
|
||||
|
||||
if !reflect.DeepEqual(res, expectedRes) {
|
||||
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expectedRes, res)
|
||||
@@ -27,9 +27,6 @@ vmauth-linux-ppc64le-prod:
|
||||
vmauth-linux-386-prod:
|
||||
APP_NAME=vmauth $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmauth-linux-s390x-prod:
|
||||
APP_NAME=vmauth $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmauth-darwin-amd64-prod:
|
||||
APP_NAME=vmauth $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"math"
|
||||
@@ -42,9 +41,6 @@ var (
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing for details")
|
||||
defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+
|
||||
"Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing")
|
||||
defaultMergeQueryArgs = flagutil.NewArrayString("mergeQueryArgs", "An optional list of client query arg names, which must be merged with args at backend urls. "+
|
||||
"The rest of client query args are replaced by the corresponding query args from backend urls for security reasons; "+
|
||||
"see https://docs.victoriametrics.com/victoriametrics/vmauth/#query-args-handling")
|
||||
discoverBackendIPsGlobal = flag.Bool("discoverBackendIPs", false, "Whether to discover backend IPs via periodic DNS queries to hostnames specified in url_prefix. "+
|
||||
"This may be useful when url_prefix points to a hostname with dynamically scaled instances behind it. See https://docs.victoriametrics.com/victoriametrics/vmauth/#discovering-backend-ips")
|
||||
discoverBackendIPsInterval = flag.Duration("discoverBackendIPsInterval", 10*time.Second, "The interval for re-discovering backend IPs if -discoverBackendIPs command-line flag is set. "+
|
||||
@@ -79,7 +75,6 @@ type UserInfo struct {
|
||||
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
|
||||
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
||||
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
||||
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
|
||||
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||
TLSCAFile string `yaml:"tls_ca_file,omitempty"`
|
||||
TLSCertFile string `yaml:"tls_cert_file,omitempty"`
|
||||
@@ -95,8 +90,6 @@ type UserInfo struct {
|
||||
rt http.RoundTripper
|
||||
|
||||
requests *metrics.Counter
|
||||
requestErrors *metrics.Counter
|
||||
backendRequests *metrics.Counter
|
||||
backendErrors *metrics.Counter
|
||||
requestsDuration *metrics.Summary
|
||||
}
|
||||
@@ -108,29 +101,13 @@ type HeadersConf struct {
|
||||
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
|
||||
}
|
||||
|
||||
func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
|
||||
func (ui *UserInfo) beginConcurrencyLimit() error {
|
||||
select {
|
||||
case ui.concurrencyLimitCh <- struct{}{}:
|
||||
return nil
|
||||
default:
|
||||
// The number of concurrently executed requests for the given user equals the limt.
|
||||
// Wait until some of the currently executed requests are finished, so the current request could be executed.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
|
||||
select {
|
||||
case ui.concurrencyLimitCh <- struct{}{}:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
err := ctx.Err()
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
// The current request couldn't be executed until the request timeout.
|
||||
ui.concurrencyLimitReached.Inc()
|
||||
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because %d concurrent requests from the user %s are executed",
|
||||
*maxQueueDuration, ui.getMaxConcurrentRequests(), ui.name())
|
||||
}
|
||||
|
||||
return fmt.Errorf("cannot start executing the request because %d concurrent requests from the user %s are executed: %w",
|
||||
ui.getMaxConcurrentRequests(), ui.name(), err)
|
||||
}
|
||||
ui.concurrencyLimitReached.Inc()
|
||||
return fmt.Errorf("cannot handle more than %d concurrent requests from user %s", ui.getMaxConcurrentRequests(), ui.name())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,28 +123,6 @@ func (ui *UserInfo) getMaxConcurrentRequests() int {
|
||||
return mcr
|
||||
}
|
||||
|
||||
func (ui *UserInfo) stopHealthChecks() {
|
||||
if ui == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if ui.URLPrefix != nil {
|
||||
bus := ui.URLPrefix.bus.Load()
|
||||
bus.stopHealthChecks()
|
||||
}
|
||||
if ui.DefaultURL != nil {
|
||||
bus := ui.DefaultURL.bus.Load()
|
||||
bus.stopHealthChecks()
|
||||
}
|
||||
for i := range ui.URLMaps {
|
||||
um := &ui.URLMaps[i]
|
||||
if um.URLPrefix != nil {
|
||||
bus := um.URLPrefix.bus.Load()
|
||||
bus.stopHealthChecks()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Header is `Name: Value` http header, which must be added to the proxied request.
|
||||
type Header struct {
|
||||
Name string
|
||||
@@ -227,11 +182,6 @@ type URLMap struct {
|
||||
// LoadBalancingPolicy is load balancing policy among UrlPrefix backends.
|
||||
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
||||
|
||||
// MergeQueryArgs is a list of client query args, which must be merged with the existing backend query args.
|
||||
//
|
||||
// The rest of client query args are replaced with the corresponding backend query args for security reasons.
|
||||
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
|
||||
|
||||
// DropSrcPathPrefixParts is the number of `/`-delimited request path prefix parts to drop before proxying the request to backend.
|
||||
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||
}
|
||||
@@ -278,7 +228,7 @@ func (qa *QueryArg) MarshalYAML() (any, error) {
|
||||
return qa.sOriginal, nil
|
||||
}
|
||||
|
||||
// URLPrefix represents the `url_prefix` from auth config.
|
||||
// URLPrefix represents passed `url_prefix`
|
||||
type URLPrefix struct {
|
||||
// requests are re-tried on other backend urls for these http response status codes
|
||||
retryStatusCodes []int
|
||||
@@ -286,11 +236,6 @@ type URLPrefix struct {
|
||||
// load balancing policy used
|
||||
loadBalancingPolicy string
|
||||
|
||||
// the list of client query args, which must be merged with backend query args.
|
||||
//
|
||||
// By default backend query args replace all the client query args for security reasons.
|
||||
mergeQueryArgs []string
|
||||
|
||||
// how many request path prefix parts to drop before routing the request to backendURL
|
||||
dropSrcPathPrefixParts int
|
||||
|
||||
@@ -303,7 +248,7 @@ type URLPrefix struct {
|
||||
// the list of backend urls
|
||||
//
|
||||
// the list can be dynamically updated if `discover_backend_ips` option is set.
|
||||
bus atomic.Pointer[backendURLs]
|
||||
bus atomic.Pointer[[]*backendURL]
|
||||
|
||||
// if this option is set, then backend ips for busOriginal are periodically re-discovered and put to bus.
|
||||
discoverBackendIPs bool
|
||||
@@ -327,91 +272,21 @@ func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
|
||||
}
|
||||
}
|
||||
|
||||
type backendURLs struct {
|
||||
healthChecksContext context.Context
|
||||
healthChecksCancel func()
|
||||
healthChecksWG sync.WaitGroup
|
||||
|
||||
bus []*backendURL
|
||||
}
|
||||
|
||||
func newBackendURLs() *backendURLs {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
return &backendURLs{
|
||||
healthChecksContext: ctx,
|
||||
healthChecksCancel: cancel,
|
||||
}
|
||||
}
|
||||
|
||||
func (bus *backendURLs) add(u *url.URL) {
|
||||
bus.bus = append(bus.bus, &backendURL{
|
||||
url: u,
|
||||
healthCheckContext: bus.healthChecksContext,
|
||||
healthCheckWG: &bus.healthChecksWG,
|
||||
})
|
||||
}
|
||||
|
||||
func (bus *backendURLs) stopHealthChecks() {
|
||||
bus.healthChecksCancel()
|
||||
bus.healthChecksWG.Wait()
|
||||
}
|
||||
|
||||
type backendURL struct {
|
||||
broken atomic.Bool
|
||||
|
||||
healthCheckContext context.Context
|
||||
healthCheckWG *sync.WaitGroup
|
||||
|
||||
brokenDeadline atomic.Uint64
|
||||
concurrentRequests atomic.Int32
|
||||
|
||||
url *url.URL
|
||||
}
|
||||
|
||||
func (bu *backendURL) isBroken() bool {
|
||||
return bu.broken.Load()
|
||||
ct := fasttime.UnixTimestamp()
|
||||
return ct < bu.brokenDeadline.Load()
|
||||
}
|
||||
|
||||
func (bu *backendURL) setBroken() {
|
||||
if bu.broken.CompareAndSwap(false, true) {
|
||||
bu.healthCheckWG.Go(func() {
|
||||
bu.runHealthCheck()
|
||||
bu.broken.Store(false)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (bu *backendURL) runHealthCheck() {
|
||||
port := bu.url.Port()
|
||||
if port == "" {
|
||||
port = "80"
|
||||
}
|
||||
addr := net.JoinHostPort(bu.url.Hostname(), port)
|
||||
|
||||
t := time.NewTicker(*failTimeout)
|
||||
defer t.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-t.C:
|
||||
// Verify network connectivity via TCP dial before marking backend healthy.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997
|
||||
ctx, cancel := context.WithTimeout(bu.healthCheckContext, time.Second)
|
||||
c, err := netutil.Dialer.DialContext(ctx, "tcp", addr)
|
||||
cancel()
|
||||
if err != nil {
|
||||
if errors.Is(bu.healthCheckContext.Err(), context.Canceled) {
|
||||
return
|
||||
}
|
||||
logger.Warnf("ignoring the backend at %s for %s because of dial error: %s", addr, *failTimeout, err)
|
||||
continue
|
||||
}
|
||||
|
||||
_ = c.Close()
|
||||
return
|
||||
case <-bu.healthCheckContext.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
deadline := fasttime.UnixTimestamp() + uint64((*failTimeout).Seconds())
|
||||
bu.brokenDeadline.Store(deadline)
|
||||
}
|
||||
|
||||
func (bu *backendURL) get() {
|
||||
@@ -423,8 +298,8 @@ func (bu *backendURL) put() {
|
||||
}
|
||||
|
||||
func (up *URLPrefix) getBackendsCount() int {
|
||||
bus := up.bus.Load()
|
||||
return len(bus.bus)
|
||||
pbus := up.bus.Load()
|
||||
return len(*pbus)
|
||||
}
|
||||
|
||||
// getBackendURL returns the backendURL depending on the load balance policy.
|
||||
@@ -435,15 +310,16 @@ func (up *URLPrefix) getBackendsCount() int {
|
||||
func (up *URLPrefix) getBackendURL() *backendURL {
|
||||
up.discoverBackendAddrsIfNeeded()
|
||||
|
||||
bus := up.bus.Load()
|
||||
if len(bus.bus) == 0 {
|
||||
pbus := up.bus.Load()
|
||||
bus := *pbus
|
||||
if len(bus) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if up.loadBalancingPolicy == "first_available" {
|
||||
return getFirstAvailableBackendURL(bus.bus)
|
||||
return getFirstAvailableBackendURL(bus)
|
||||
}
|
||||
return getLeastLoadedBackendURL(bus.bus, &up.n)
|
||||
return getLeastLoadedBackendURL(bus, &up.n)
|
||||
}
|
||||
|
||||
func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
|
||||
@@ -517,24 +393,25 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
|
||||
cancel()
|
||||
|
||||
// generate new backendURLs for the resolved IPs
|
||||
busNew := newBackendURLs()
|
||||
var busNew []*backendURL
|
||||
for _, bu := range up.busOriginal {
|
||||
host := bu.Hostname()
|
||||
for _, addr := range hostToAddrs[host] {
|
||||
buCopy := *bu
|
||||
buCopy.Host = addr
|
||||
busNew.add(&buCopy)
|
||||
busNew = append(busNew, &backendURL{
|
||||
url: &buCopy,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
bus := up.bus.Load()
|
||||
if areEqualBackendURLs(bus.bus, busNew.bus) {
|
||||
pbus := up.bus.Load()
|
||||
if areEqualBackendURLs(*pbus, busNew) {
|
||||
return
|
||||
}
|
||||
|
||||
// Store new backend urls
|
||||
up.bus.Store(busNew)
|
||||
bus.stopHealthChecks()
|
||||
up.bus.Store(&busNew)
|
||||
}
|
||||
|
||||
func areEqualBackendURLs(a, b []*backendURL) bool {
|
||||
@@ -565,23 +442,20 @@ func getFirstAvailableBackendURL(bus []*backendURL) *backendURL {
|
||||
for i := 1; i < len(bus); i++ {
|
||||
if !bus[i].isBroken() {
|
||||
bu = bus[i]
|
||||
bu.get()
|
||||
return bu
|
||||
break
|
||||
}
|
||||
}
|
||||
return nil
|
||||
bu.get()
|
||||
return bu
|
||||
}
|
||||
|
||||
// getLeastLoadedBackendURL returns a non-broken backendURL with the lowest number of concurrent requests.
|
||||
// getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
|
||||
//
|
||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||
func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL {
|
||||
if len(bus) == 1 {
|
||||
// Fast path - return the only backend url.
|
||||
bu := bus[0]
|
||||
if bu.isBroken() {
|
||||
return nil
|
||||
}
|
||||
bu.get()
|
||||
return bu
|
||||
}
|
||||
@@ -594,37 +468,27 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
|
||||
if bu.isBroken() {
|
||||
continue
|
||||
}
|
||||
|
||||
// The Load() in front of CompareAndSwap() avoids CAS overhead for items with values bigger than 0.
|
||||
if bu.concurrentRequests.Load() == 0 && bu.concurrentRequests.CompareAndSwap(0, 1) {
|
||||
atomicCounter.CompareAndSwap(n+1, idx+1)
|
||||
// There is no need in the call bu.get(), because we already incremented bu.concrrentRequests above.
|
||||
if bu.concurrentRequests.Load() == 0 {
|
||||
// Fast path - return the backend with zero concurrently executed requests.
|
||||
// Do not use CompareAndSwap() instead of Load(), since it is much slower on systems with many CPU cores.
|
||||
bu.concurrentRequests.Add(1)
|
||||
return bu
|
||||
}
|
||||
}
|
||||
|
||||
// Slow path - return the backend with the minimum number of concurrently executed requests.
|
||||
buMinIdx := n % uint32(len(bus))
|
||||
minRequests := bus[buMinIdx].concurrentRequests.Load()
|
||||
for i := uint32(1); i < uint32(len(bus)); i++ {
|
||||
idx := (n + i) % uint32(len(bus))
|
||||
bu := bus[idx]
|
||||
buMin := bus[n%uint32(len(bus))]
|
||||
minRequests := buMin.concurrentRequests.Load()
|
||||
for _, bu := range bus {
|
||||
if bu.isBroken() {
|
||||
continue
|
||||
}
|
||||
|
||||
reqs := bu.concurrentRequests.Load()
|
||||
if reqs < minRequests || bus[buMinIdx].isBroken() {
|
||||
buMinIdx = idx
|
||||
minRequests = reqs
|
||||
if n := bu.concurrentRequests.Load(); n < minRequests || buMin.isBroken() {
|
||||
buMin = bu
|
||||
minRequests = n
|
||||
}
|
||||
}
|
||||
buMin := bus[buMinIdx]
|
||||
if buMin.isBroken() {
|
||||
return nil
|
||||
}
|
||||
buMin.get()
|
||||
atomicCounter.CompareAndSwap(n+1, buMinIdx+1)
|
||||
return buMin
|
||||
}
|
||||
|
||||
@@ -741,9 +605,11 @@ func initAuthConfig() {
|
||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||
|
||||
stopCh = make(chan struct{})
|
||||
authConfigWG.Go(func() {
|
||||
authConfigWG.Add(1)
|
||||
go func() {
|
||||
defer authConfigWG.Done()
|
||||
authConfigReloader(sighupCh)
|
||||
})
|
||||
}()
|
||||
}
|
||||
|
||||
func stopAuthConfig() {
|
||||
@@ -815,7 +681,7 @@ func reloadAuthConfig() (bool, error) {
|
||||
|
||||
ok, err := reloadAuthConfigData(data)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse -auth.config=%q: %w", *authConfigPath, err)
|
||||
return false, fmt.Errorf("failed to pars -auth.config=%q: %w", *authConfigPath, err)
|
||||
}
|
||||
if !ok {
|
||||
return false, nil
|
||||
@@ -845,11 +711,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
|
||||
|
||||
acPrev := authConfig.Load()
|
||||
if acPrev != nil {
|
||||
acPrev.UnauthorizedUser.stopHealthChecks()
|
||||
for i := range acPrev.Users {
|
||||
acPrev.Users[i].stopHealthChecks()
|
||||
}
|
||||
|
||||
metrics.UnregisterSet(acPrev.ms, true)
|
||||
}
|
||||
metrics.RegisterSet(ac.ms)
|
||||
@@ -896,8 +757,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
||||
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
|
||||
}
|
||||
ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
|
||||
ui.requestErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_errors_total` + metricLabels)
|
||||
ui.backendRequests = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_requests_total` + metricLabels)
|
||||
ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
|
||||
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
|
||||
@@ -946,8 +805,6 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
||||
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||
}
|
||||
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
||||
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
|
||||
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
|
||||
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
||||
mcr := ui.getMaxConcurrentRequests()
|
||||
@@ -999,7 +856,6 @@ func (ui *UserInfo) getMetricLabels() (string, error) {
|
||||
func (ui *UserInfo) initURLs() error {
|
||||
retryStatusCodes := defaultRetryStatusCodes.Values()
|
||||
loadBalancingPolicy := *defaultLoadBalancingPolicy
|
||||
mergeQueryArgs := *defaultMergeQueryArgs
|
||||
dropSrcPathPrefixParts := 0
|
||||
discoverBackendIPs := *discoverBackendIPsGlobal
|
||||
if ui.RetryStatusCodes != nil {
|
||||
@@ -1008,9 +864,6 @@ func (ui *UserInfo) initURLs() error {
|
||||
if ui.LoadBalancingPolicy != "" {
|
||||
loadBalancingPolicy = ui.LoadBalancingPolicy
|
||||
}
|
||||
if len(ui.MergeQueryArgs) != 0 {
|
||||
mergeQueryArgs = ui.MergeQueryArgs
|
||||
}
|
||||
if ui.DropSrcPathPrefixParts != nil {
|
||||
dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts
|
||||
}
|
||||
@@ -1018,18 +871,16 @@ func (ui *UserInfo) initURLs() error {
|
||||
discoverBackendIPs = *ui.DiscoverBackendIPs
|
||||
}
|
||||
|
||||
up := ui.URLPrefix
|
||||
if up != nil {
|
||||
if err := up.sanitizeAndInitialize(); err != nil {
|
||||
if ui.URLPrefix != nil {
|
||||
if err := ui.URLPrefix.sanitizeAndInitialize(); err != nil {
|
||||
return err
|
||||
}
|
||||
up.retryStatusCodes = retryStatusCodes
|
||||
up.dropSrcPathPrefixParts = dropSrcPathPrefixParts
|
||||
up.discoverBackendIPs = discoverBackendIPs
|
||||
if err := up.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
|
||||
ui.URLPrefix.retryStatusCodes = retryStatusCodes
|
||||
ui.URLPrefix.dropSrcPathPrefixParts = dropSrcPathPrefixParts
|
||||
ui.URLPrefix.discoverBackendIPs = discoverBackendIPs
|
||||
if err := ui.URLPrefix.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
|
||||
return err
|
||||
}
|
||||
up.mergeQueryArgs = mergeQueryArgs
|
||||
}
|
||||
if ui.DefaultURL != nil {
|
||||
if err := ui.DefaultURL.sanitizeAndInitialize(); err != nil {
|
||||
@@ -1048,7 +899,6 @@ func (ui *UserInfo) initURLs() error {
|
||||
}
|
||||
rscs := retryStatusCodes
|
||||
lbp := loadBalancingPolicy
|
||||
mqa := mergeQueryArgs
|
||||
dsp := dropSrcPathPrefixParts
|
||||
dbd := discoverBackendIPs
|
||||
if e.RetryStatusCodes != nil {
|
||||
@@ -1057,9 +907,6 @@ func (ui *UserInfo) initURLs() error {
|
||||
if e.LoadBalancingPolicy != "" {
|
||||
lbp = e.LoadBalancingPolicy
|
||||
}
|
||||
if len(e.MergeQueryArgs) != 0 {
|
||||
mqa = e.MergeQueryArgs
|
||||
}
|
||||
if e.DropSrcPathPrefixParts != nil {
|
||||
dsp = *e.DropSrcPathPrefixParts
|
||||
}
|
||||
@@ -1070,7 +917,6 @@ func (ui *UserInfo) initURLs() error {
|
||||
if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil {
|
||||
return err
|
||||
}
|
||||
e.URLPrefix.mergeQueryArgs = mqa
|
||||
e.URLPrefix.dropSrcPathPrefixParts = dsp
|
||||
e.URLPrefix.discoverBackendIPs = dbd
|
||||
}
|
||||
@@ -1182,11 +1028,13 @@ func (up *URLPrefix) sanitizeAndInitialize() error {
|
||||
}
|
||||
|
||||
// Initialize up.bus
|
||||
bus := newBackendURLs()
|
||||
for _, bu := range up.busOriginal {
|
||||
bus.add(bu)
|
||||
bus := make([]*backendURL, len(up.busOriginal))
|
||||
for i, bu := range up.busOriginal {
|
||||
bus[i] = &backendURL{
|
||||
url: bu,
|
||||
}
|
||||
}
|
||||
up.bus.Store(bus)
|
||||
up.bus.Store(&bus)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -280,7 +280,7 @@ users:
|
||||
}
|
||||
|
||||
func TestParseAuthConfigSuccess(t *testing.T) {
|
||||
f := func(s string, expectedAuthConfig map[string]*UserInfo, expectedUnauthorizedUserConfig *UserInfo) {
|
||||
f := func(s string, expectedAuthConfig map[string]*UserInfo) {
|
||||
t.Helper()
|
||||
ac, err := parseAuthConfig([]byte(s))
|
||||
if err != nil {
|
||||
@@ -294,19 +294,15 @@ func TestParseAuthConfigSuccess(t *testing.T) {
|
||||
if err := areEqualConfigs(m, expectedAuthConfig); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := areEqualConfigs(ac.UnauthorizedUser, expectedUnauthorizedUserConfig); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
insecureSkipVerifyTrue := true
|
||||
|
||||
// Empty config
|
||||
f(``, map[string]*UserInfo{}, nil)
|
||||
f(``, map[string]*UserInfo{})
|
||||
|
||||
// Empty users
|
||||
f(`users: []`, map[string]*UserInfo{}, nil)
|
||||
f(`users: []`, map[string]*UserInfo{})
|
||||
|
||||
// Single user
|
||||
f(`
|
||||
@@ -324,7 +320,7 @@ users:
|
||||
MaxConcurrentRequests: 5,
|
||||
TLSInsecureSkipVerify: &insecureSkipVerifyTrue,
|
||||
},
|
||||
}, nil)
|
||||
})
|
||||
|
||||
// Single user with auth_token
|
||||
f(`
|
||||
@@ -348,7 +344,7 @@ users:
|
||||
TLSCertFile: "foo/baz",
|
||||
TLSKeyFile: "foo/foo",
|
||||
},
|
||||
}, nil)
|
||||
})
|
||||
|
||||
// Multiple url_prefix entries
|
||||
insecureSkipVerifyFalse := false
|
||||
@@ -363,7 +359,6 @@ users:
|
||||
tls_insecure_skip_verify: false
|
||||
retry_status_codes: [500, 501]
|
||||
load_balancing_policy: first_available
|
||||
merge_query_args: [foo, bar]
|
||||
drop_src_path_prefix_parts: 1
|
||||
discover_backend_ips: true
|
||||
`, map[string]*UserInfo{
|
||||
@@ -377,11 +372,10 @@ users:
|
||||
TLSInsecureSkipVerify: &insecureSkipVerifyFalse,
|
||||
RetryStatusCodes: []int{500, 501},
|
||||
LoadBalancingPolicy: "first_available",
|
||||
MergeQueryArgs: []string{"foo", "bar"},
|
||||
DropSrcPathPrefixParts: intp(1),
|
||||
DiscoverBackendIPs: &discoverBackendIPsTrue,
|
||||
},
|
||||
}, nil)
|
||||
})
|
||||
|
||||
// Multiple users
|
||||
f(`
|
||||
@@ -399,7 +393,7 @@ users:
|
||||
Username: "bar",
|
||||
URLPrefix: mustParseURL("https://bar/x/"),
|
||||
},
|
||||
}, nil)
|
||||
})
|
||||
|
||||
// non-empty URLMap
|
||||
sharedUserInfo := &UserInfo{
|
||||
@@ -449,7 +443,7 @@ users:
|
||||
`, map[string]*UserInfo{
|
||||
getHTTPAuthBearerToken("foo"): sharedUserInfo,
|
||||
getHTTPAuthBasicToken("foo", ""): sharedUserInfo,
|
||||
}, nil)
|
||||
})
|
||||
|
||||
// Multiple users with the same name - this should work, since these users have different passwords
|
||||
f(`
|
||||
@@ -471,7 +465,7 @@ users:
|
||||
Password: "bar",
|
||||
URLPrefix: mustParseURL("https://bar/x"),
|
||||
},
|
||||
}, nil)
|
||||
})
|
||||
|
||||
// with default url
|
||||
keepOriginalHost := true
|
||||
@@ -487,8 +481,6 @@ users:
|
||||
- "foo: bar"
|
||||
- "xxx: y"
|
||||
keep_original_host: true
|
||||
load_balancing_policy: first_available
|
||||
merge_query_args: [foo, bar]
|
||||
default_url:
|
||||
- http://default1/select/0/prometheus
|
||||
- http://default2/select/0/prometheus
|
||||
@@ -513,8 +505,6 @@ users:
|
||||
},
|
||||
KeepOriginalHost: &keepOriginalHost,
|
||||
},
|
||||
LoadBalancingPolicy: "first_available",
|
||||
MergeQueryArgs: []string{"foo", "bar"},
|
||||
},
|
||||
},
|
||||
DefaultURL: mustParseURLs([]string{
|
||||
@@ -542,8 +532,6 @@ users:
|
||||
},
|
||||
KeepOriginalHost: &keepOriginalHost,
|
||||
},
|
||||
LoadBalancingPolicy: "first_available",
|
||||
MergeQueryArgs: []string{"foo", "bar"},
|
||||
},
|
||||
},
|
||||
DefaultURL: mustParseURLs([]string{
|
||||
@@ -551,7 +539,7 @@ users:
|
||||
"http://default2/select/0/prometheus",
|
||||
}),
|
||||
},
|
||||
}, nil)
|
||||
})
|
||||
|
||||
// With metric_labels
|
||||
f(`
|
||||
@@ -603,23 +591,6 @@ users:
|
||||
},
|
||||
},
|
||||
},
|
||||
}, nil)
|
||||
|
||||
// unauthorized_user
|
||||
f(`
|
||||
unauthorized_user:
|
||||
merge_query_args: [extra_filters]
|
||||
url_map:
|
||||
- src_paths: ["/select/.+"]
|
||||
url_prefix: 'http://victoria-logs:9428/?extra_filters={env="prod"}'
|
||||
`, nil, &UserInfo{
|
||||
MergeQueryArgs: []string{"extra_filters"},
|
||||
URLMaps: []URLMap{
|
||||
{
|
||||
SrcPaths: getRegexs([]string{"/select/.+"}),
|
||||
URLPrefix: mustParseURL(`http://victoria-logs:9428/?extra_filters={env="prod"}`),
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -752,12 +723,10 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
|
||||
})
|
||||
up.loadBalancingPolicy = "least_loaded"
|
||||
|
||||
pbus := up.bus.Load()
|
||||
bus := pbus.bus
|
||||
|
||||
fn := func(ns ...int) {
|
||||
t.Helper()
|
||||
|
||||
pbus := up.bus.Load()
|
||||
bus := *pbus
|
||||
for i, b := range bus {
|
||||
got := int(b.concurrentRequests.Load())
|
||||
exp := ns[i]
|
||||
@@ -769,52 +738,45 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
|
||||
|
||||
up.getBackendURL()
|
||||
fn(1, 0, 0)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(1, 1, 0)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(1, 1, 1)
|
||||
|
||||
bus[1].put()
|
||||
bus[2].put()
|
||||
fn(1, 0, 0)
|
||||
up.getBackendURL()
|
||||
up.getBackendURL()
|
||||
fn(2, 2, 1)
|
||||
|
||||
bus := up.bus.Load()
|
||||
pbus := *bus
|
||||
pbus[0].concurrentRequests.Add(2)
|
||||
pbus[2].concurrentRequests.Add(5)
|
||||
fn(4, 2, 6)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(1, 1, 0)
|
||||
fn(4, 3, 6)
|
||||
|
||||
bus[1].put()
|
||||
up.getBackendURL()
|
||||
fn(1, 0, 1)
|
||||
fn(4, 4, 6)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(4, 5, 6)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(5, 5, 6)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(6, 5, 6)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(6, 6, 6)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(6, 6, 7)
|
||||
|
||||
up.getBackendURL()
|
||||
up.getBackendURL()
|
||||
fn(1, 1, 2)
|
||||
|
||||
bus[0].concurrentRequests.Add(2)
|
||||
bus[2].concurrentRequests.Add(2)
|
||||
fn(3, 1, 4)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(3, 2, 4)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(3, 3, 4)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(4, 3, 4)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(4, 4, 4)
|
||||
|
||||
bus[0].put()
|
||||
bus[2].put()
|
||||
|
||||
up.getBackendURL()
|
||||
fn(3, 4, 4)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(4, 4, 4)
|
||||
fn(7, 7, 7)
|
||||
}
|
||||
|
||||
func TestBrokenBackend(t *testing.T) {
|
||||
@@ -825,7 +787,7 @@ func TestBrokenBackend(t *testing.T) {
|
||||
})
|
||||
up.loadBalancingPolicy = "least_loaded"
|
||||
pbus := up.bus.Load()
|
||||
bus := pbus.bus
|
||||
bus := *pbus
|
||||
|
||||
// explicitly mark one of the backends as broken
|
||||
bus[1].setBroken()
|
||||
@@ -848,7 +810,7 @@ func TestDiscoverBackendIPsWithIPV6(t *testing.T) {
|
||||
|
||||
up.discoverBackendAddrsIfNeeded()
|
||||
pbus := up.bus.Load()
|
||||
bus := pbus.bus
|
||||
bus := *pbus
|
||||
|
||||
if len(bus) != 1 {
|
||||
t.Fatalf("expected url list to be of size 1; got %d instead", len(bus))
|
||||
@@ -922,7 +884,7 @@ func removeMetrics(m map[string]*UserInfo) {
|
||||
}
|
||||
}
|
||||
|
||||
func areEqualConfigs(a, b any) error {
|
||||
func areEqualConfigs(a, b map[string]*UserInfo) error {
|
||||
aData, err := yaml.Marshal(a)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot marshal a: %w", err)
|
||||
@@ -942,14 +904,16 @@ func mustParseURL(u string) *URLPrefix {
|
||||
}
|
||||
|
||||
func mustParseURLs(us []string) *URLPrefix {
|
||||
bus := newBackendURLs()
|
||||
bus := make([]*backendURL, len(us))
|
||||
urls := make([]*url.URL, len(us))
|
||||
for i, u := range us {
|
||||
pu, err := url.Parse(u)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("BUG: cannot parse %q: %w", u, err))
|
||||
}
|
||||
bus.add(pu)
|
||||
bus[i] = &backendURL{
|
||||
url: pu,
|
||||
}
|
||||
urls[i] = pu
|
||||
}
|
||||
up := &URLPrefix{}
|
||||
@@ -958,7 +922,7 @@ func mustParseURLs(us []string) *URLPrefix {
|
||||
} else {
|
||||
up.vOriginal = us
|
||||
}
|
||||
up.bus.Store(bus)
|
||||
up.bus.Store(&bus)
|
||||
up.busOriginal = urls
|
||||
return up
|
||||
}
|
||||
|
||||
@@ -24,7 +24,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ioutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
@@ -41,38 +40,22 @@ var (
|
||||
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host")
|
||||
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
|
||||
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host. "+
|
||||
"See also -maxConcurrentRequests")
|
||||
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
|
||||
"It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services")
|
||||
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
|
||||
|
||||
requestBufferSize = flagutil.NewBytes("requestBufferSize", 32*1024, "The size of the buffer for reading the request body before proxying the request to backends. "+
|
||||
"This allows reducing the comsumption of backend resources when processing requests from clients connected via slow networks. "+
|
||||
"Set to 0 to disable request buffering. See https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering")
|
||||
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size to buffer in memory for potential retries at other backends. "+
|
||||
"Request bodies larger than this size cannot be retried if the backend fails. Zero or negative value disables request body buffering and retries. "+
|
||||
"See also -requestBufferSize")
|
||||
|
||||
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process simultaneously. "+
|
||||
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
|
||||
"This protects vmauth itself from overloading and out-of-memory (OOM) failures. See also -maxConcurrentPerUserRequests "+
|
||||
"and https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
|
||||
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 100, "The maximum number of concurrent requests vmauth can process per each configured user. "+
|
||||
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
|
||||
"This provides fairness and isolation between users, preventing a single user from consuming all the available resources. "+
|
||||
"It works in conjunction with -maxConcurrentRequests, which sets the global limit across all users. "+
|
||||
"This default can be overridden for individual users via max_concurrent_requests option in per-user config. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
|
||||
maxQueueDuration = flag.Duration("maxQueueDuration", 10*time.Second, "The maximum duration to wait before rejecting incoming requests if concurrency limit "+
|
||||
"specified via -maxConcurrentRequests or -maxConcurrentPerUserRequests command-line flags is reached. "+
|
||||
"Requests are rejected with '429 Too Many Requests' http status code if the limit is still reached after the -maxQueueDuration duration. "+
|
||||
"This allows graceful handling of short spikes in concurrent requests. See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
|
||||
|
||||
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
|
||||
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process. Other requests are rejected with "+
|
||||
"'429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options")
|
||||
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 300, "The maximum number of concurrent requests vmauth can process per each configured user. "+
|
||||
"Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option "+
|
||||
"in per-user config")
|
||||
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
|
||||
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
|
||||
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
|
||||
|
||||
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
|
||||
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size, which can be cached and re-tried at other backends. "+
|
||||
"Bigger values may require more memory. Zero or negative value disables caching of request body. This may be useful when proxying data ingestion requests")
|
||||
backendTLSInsecureSkipVerify = flag.Bool("backend.tlsInsecureSkipVerify", false, "Whether to skip TLS verification when connecting to backends over HTTPS. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup")
|
||||
backendTLSCAFile = flag.String("backend.TLSCAFile", "", "Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. "+
|
||||
@@ -168,6 +151,7 @@ func requestHandlerWithInternalRoutes(w http.ResponseWriter, r *http.Request) bo
|
||||
}
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
|
||||
ats := getAuthTokensFromRequest(r)
|
||||
if len(ats) == 0 {
|
||||
// Process requests for unauthorized users
|
||||
@@ -224,124 +208,26 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
|
||||
ui.requests.Inc()
|
||||
|
||||
ctx, cancel := context.WithTimeout(r.Context(), *maxQueueDuration)
|
||||
defer cancel()
|
||||
|
||||
// Acquire global concurrency limit.
|
||||
if err := beginConcurrencyLimit(ctx); err != nil {
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
return
|
||||
}
|
||||
defer endConcurrencyLimit()
|
||||
|
||||
// Set read deadline for reading the initial chunk for the request body.
|
||||
rc := http.NewResponseController(w)
|
||||
deadline, ok := ctx.Deadline()
|
||||
if !ok {
|
||||
logger.Panicf("BUG: expecting valid deadline for the context")
|
||||
}
|
||||
if err := rc.SetReadDeadline(deadline); err != nil {
|
||||
logger.Panicf("BUG: cannot set read deadline: %s", err)
|
||||
}
|
||||
|
||||
// Read the initial chunk for the request body.
|
||||
userName := ui.name()
|
||||
if userName == "" {
|
||||
userName = "unauthorized"
|
||||
}
|
||||
bb, err := bufferRequestBody(ctx, r.Body, userName)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return
|
||||
}
|
||||
r.Body = bb
|
||||
|
||||
// Disable the read deadline for the rest of the request body.
|
||||
if err := rc.SetReadDeadline(time.Time{}); err != nil {
|
||||
logger.Panicf("BUG: cannot reset read deadline: %s", err)
|
||||
}
|
||||
|
||||
// Acquire concurrency limit for the given user.
|
||||
if err := ui.beginConcurrencyLimit(ctx); err != nil {
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
return
|
||||
}
|
||||
defer ui.endConcurrencyLimit()
|
||||
|
||||
// Process the request.
|
||||
processRequest(w, r, ui)
|
||||
}
|
||||
|
||||
func beginConcurrencyLimit(ctx context.Context) error {
|
||||
// Limit the concurrency of requests to backends
|
||||
concurrencyLimitOnce.Do(concurrencyLimitInit)
|
||||
select {
|
||||
case concurrencyLimitCh <- struct{}{}:
|
||||
return nil
|
||||
if err := ui.beginConcurrencyLimit(); err != nil {
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
<-concurrencyLimitCh
|
||||
return
|
||||
}
|
||||
default:
|
||||
// The -maxConcurrentRequests are executed. Wait until some of the requests are finished,
|
||||
// so the current request could be executed.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
|
||||
select {
|
||||
case concurrencyLimitCh <- struct{}{}:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
err := ctx.Err()
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
// The current request couldn't be executed until the request timeout.
|
||||
concurrentRequestsLimitReached.Inc()
|
||||
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because -maxConcurrentRequests=%d concurrent requests are executed",
|
||||
*maxQueueDuration, cap(concurrencyLimitCh))
|
||||
}
|
||||
return fmt.Errorf("cannot start executing the request because -maxConcurrentRequests=%d concurrent requests are executed: %w", cap(concurrencyLimitCh), err)
|
||||
}
|
||||
concurrentRequestsLimitReached.Inc()
|
||||
err := fmt.Errorf("cannot serve more than -maxConcurrentRequests=%d concurrent requests", cap(concurrencyLimitCh))
|
||||
handleConcurrencyLimitError(w, r, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func endConcurrencyLimit() {
|
||||
processRequest(w, r, ui)
|
||||
ui.endConcurrencyLimit()
|
||||
<-concurrencyLimitCh
|
||||
}
|
||||
|
||||
func bufferRequestBody(ctx context.Context, r io.ReadCloser, userName string) (io.ReadCloser, error) {
|
||||
if r == nil {
|
||||
// This is a GET request with nil reader.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
maxBufSize := max(requestBufferSize.IntN(), maxRequestBodySizeToRetry.IntN())
|
||||
if maxBufSize <= 0 {
|
||||
return r, nil
|
||||
}
|
||||
|
||||
lr := ioutil.GetLimitedReader(r, int64(maxBufSize))
|
||||
defer ioutil.PutLimitedReader(lr)
|
||||
|
||||
start := time.Now()
|
||||
buf, err := io.ReadAll(lr)
|
||||
bufferRequestBodyDuration.UpdateDuration(start)
|
||||
|
||||
if err != nil {
|
||||
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
|
||||
rejectSlowClientRequests.Inc()
|
||||
|
||||
d := time.Since(start)
|
||||
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("reject request from the user %s because the request body couldn't be read in -maxQueueDuration=%s; read %d bytes in %s",
|
||||
userName, *maxQueueDuration, len(buf), d.Truncate(time.Second)),
|
||||
StatusCode: http.StatusBadRequest,
|
||||
}
|
||||
}
|
||||
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("cannot read request body: %w", err),
|
||||
StatusCode: http.StatusBadRequest,
|
||||
}
|
||||
}
|
||||
|
||||
bb := newBufferedBody(r, buf, maxBufSize)
|
||||
return bb, nil
|
||||
}
|
||||
|
||||
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
u := normalizeURL(r.URL)
|
||||
up, hc := ui.getURLPrefixAndHeaders(u, r.Host, r.Header)
|
||||
@@ -367,6 +253,9 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
isDefault = true
|
||||
}
|
||||
|
||||
rtb := newReadTrackingBody(r.Body, maxRequestBodySizeToRetry.IntN())
|
||||
r.Body = rtb
|
||||
|
||||
maxAttempts := up.getBackendsCount()
|
||||
for i := 0; i < maxAttempts; i++ {
|
||||
bu := up.getBackendURL()
|
||||
@@ -374,19 +263,18 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
break
|
||||
}
|
||||
targetURL := bu.url
|
||||
// Don't change path and add request_path query param for default route.
|
||||
if isDefault {
|
||||
// Don't change path and add request_path query param for default route.
|
||||
query := targetURL.Query()
|
||||
query.Set("request_path", u.String())
|
||||
targetURL.RawQuery = query.Encode()
|
||||
} else {
|
||||
// Update path for regular routes.
|
||||
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
|
||||
} else { // Update path for regular routes.
|
||||
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts)
|
||||
}
|
||||
|
||||
wasLocalRetry := false
|
||||
again:
|
||||
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui, bu)
|
||||
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui)
|
||||
if needLocalRetry && !wasLocalRetry {
|
||||
wasLocalRetry = true
|
||||
goto again
|
||||
@@ -396,20 +284,17 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
if ok {
|
||||
return
|
||||
}
|
||||
|
||||
bu.setBroken()
|
||||
ui.backendErrors.Inc()
|
||||
}
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("all the %d backends for the user %q are unavailable", up.getBackendsCount(), ui.name()),
|
||||
StatusCode: http.StatusBadGateway,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
ui.requestErrors.Inc()
|
||||
ui.backendErrors.Inc()
|
||||
}
|
||||
|
||||
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo, bu *backendURL) (bool, bool) {
|
||||
ui.backendRequests.Inc()
|
||||
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo) (bool, bool) {
|
||||
req := sanitizeRequestHeaders(r)
|
||||
|
||||
req.URL = targetURL
|
||||
@@ -423,19 +308,21 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
}
|
||||
}
|
||||
|
||||
bb, bbOK := req.Body.(*bufferedBody)
|
||||
canRetry := !bbOK || bb.canRetry()
|
||||
|
||||
rtb, rtbOK := req.Body.(*readTrackingBody)
|
||||
res, err := ui.rt.RoundTrip(req)
|
||||
|
||||
if errors.Is(r.Context().Err(), context.Canceled) {
|
||||
// Do not retry canceled requests.
|
||||
clientCanceledRequests.Inc()
|
||||
return true, false
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if !canRetry {
|
||||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
// Do not retry canceled or timed out requests
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
// Timed out request must be counted as errors, since this usually means that the backend is slow.
|
||||
ui.backendErrors.Inc()
|
||||
}
|
||||
return false, false
|
||||
}
|
||||
if !rtbOK || !rtb.canRetry() {
|
||||
// Request body cannot be re-sent to another backend. Return the error to the client then.
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("cannot proxy the request to %s: %w", targetURL, err),
|
||||
@@ -443,51 +330,41 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
ui.backendErrors.Inc()
|
||||
ui.requestErrors.Inc()
|
||||
bu.setBroken()
|
||||
return true, false
|
||||
}
|
||||
if netutil.IsTrivialNetworkError(err) {
|
||||
// Retry request at the same backend on trivial network errors, such as proxy idle timeout misconfiguration or socket close by OS
|
||||
if bbOK {
|
||||
bb.resetReader()
|
||||
}
|
||||
return false, true
|
||||
}
|
||||
|
||||
// Retry the request at another backend
|
||||
// Retry the request if its body wasn't read yet. This usually means that the backend isn't reachable.
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed: %s, retrying the request at another backend", remoteAddr, requestURI, targetURL, err)
|
||||
if bbOK {
|
||||
bb.resetReader()
|
||||
}
|
||||
// NOTE: do not use httpserver.GetRequestURI
|
||||
// it explicitly reads request body, which may fail retries.
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because of response error: %s", remoteAddr, req.URL, targetURL, err)
|
||||
return false, false
|
||||
}
|
||||
if slices.Contains(retryStatusCodes, res.StatusCode) {
|
||||
if !canRetry {
|
||||
_ = res.Body.Close()
|
||||
if !rtbOK || !rtb.canRetry() {
|
||||
// If we get an error from the retry_status_codes list, but cannot execute retry,
|
||||
// we consider such a request an error as well.
|
||||
err := &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request at another backend, because the request body has been already consumed",
|
||||
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request on another backend, because the request has been already consumed",
|
||||
res.StatusCode, targetURL),
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
}
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
ui.backendErrors.Inc()
|
||||
ui.requestErrors.Inc()
|
||||
return true, false
|
||||
}
|
||||
|
||||
// Retry requests at other backends if it matches retryStatusCodes.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed, retrying the request at another backend because response status code=%d belongs to retry_status_codes=%d",
|
||||
remoteAddr, requestURI, targetURL, res.StatusCode, retryStatusCodes)
|
||||
if bbOK {
|
||||
bb.resetReader()
|
||||
}
|
||||
// NOTE: do not use httpserver.GetRequestURI
|
||||
// it explicitly reads request body, which may fail retries.
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because response status code=%d belongs to retry_status_codes=%d",
|
||||
remoteAddr, req.URL, targetURL, res.StatusCode, retryStatusCodes)
|
||||
return false, false
|
||||
}
|
||||
removeHopHeaders(res.Header)
|
||||
@@ -495,61 +372,20 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
updateHeadersByConfig(w.Header(), hc.ResponseHeaders)
|
||||
w.WriteHeader(res.StatusCode)
|
||||
|
||||
err = copyStreamToClient(w, res.Body)
|
||||
copyBuf := copyBufPool.Get()
|
||||
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
|
||||
_, err = io.CopyBuffer(w, res.Body, copyBuf.B)
|
||||
copyBufPool.Put(copyBuf)
|
||||
_ = res.Body.Close()
|
||||
|
||||
if errors.Is(r.Context().Err(), context.Canceled) {
|
||||
// Do not retry canceled requests.
|
||||
clientCanceledRequests.Inc()
|
||||
return true, false
|
||||
}
|
||||
|
||||
if err != nil && !netutil.IsTrivialNetworkError(err) {
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||
ui.requestErrors.Inc()
|
||||
return true, false
|
||||
}
|
||||
return true, false
|
||||
}
|
||||
|
||||
func copyStreamToClient(client io.Writer, backend io.Reader) error {
|
||||
copyBuf := copyBufPool.Get()
|
||||
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
|
||||
defer copyBufPool.Put(copyBuf)
|
||||
buf := copyBuf.B
|
||||
|
||||
flusher, ok := client.(http.Flusher)
|
||||
if !ok {
|
||||
logger.Panicf("BUG: client must implement net/http.Flusher interface; got %T", client)
|
||||
}
|
||||
|
||||
for {
|
||||
n, backendErr := backend.Read(buf)
|
||||
if n > 0 {
|
||||
data := buf[:n]
|
||||
n, clientErr := client.Write(data)
|
||||
if clientErr != nil {
|
||||
return fmt.Errorf("cannot write data to client: %w", clientErr)
|
||||
}
|
||||
if n != len(data) {
|
||||
logger.Panicf("BUG: unexpected number of bytes written returned by client.Write; got %d; want %d", n, len(data))
|
||||
}
|
||||
// Flush the read data from the backend to the client as fast as possible
|
||||
// in order to reduce delays for data propagation.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaLogs/issues/667
|
||||
flusher.Flush()
|
||||
}
|
||||
if backendErr != nil {
|
||||
if backendErr == io.EOF {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("cannot read data from backend: %w", backendErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var copyBufPool bytesutil.ByteBufferPool
|
||||
|
||||
func copyHeader(dst, src http.Header) {
|
||||
@@ -636,10 +472,6 @@ var (
|
||||
configReloadRequests = metrics.NewCounter(`vmauth_http_requests_total{path="/-/reload"}`)
|
||||
invalidAuthTokenRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="invalid_auth_token"}`)
|
||||
missingRouteRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`)
|
||||
clientCanceledRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="client_canceled"}`)
|
||||
rejectSlowClientRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="reject_slow_client"}`)
|
||||
|
||||
bufferRequestBodyDuration = metrics.NewSummary(`vmauth_buffer_request_body_duration_seconds`)
|
||||
)
|
||||
|
||||
func newRoundTripper(caFileOpt, certFileOpt, keyFileOpt, serverNameOpt string, insecureSkipVerifyP *bool) (http.RoundTripper, error) {
|
||||
@@ -723,13 +555,6 @@ func handleMissingAuthorizationError(w http.ResponseWriter) {
|
||||
}
|
||||
|
||||
func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err error) {
|
||||
if errors.Is(r.Context().Err(), context.Canceled) {
|
||||
// Do not return any response for the request canceled by the client,
|
||||
// since the connection to the client is already closed.
|
||||
clientCanceledRequests.Inc()
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Add("Retry-After", "10")
|
||||
err = &httpserver.ErrorWithStatusCode{
|
||||
Err: err,
|
||||
@@ -738,76 +563,120 @@ func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err err
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
}
|
||||
|
||||
// bufferedBody serves two purposes:
|
||||
// 1. Enables request retries when the body size does not exceed maxBodySize
|
||||
// by fully buffering the body in memory.
|
||||
// 2. Prevents slow clients from reducing effective server capacity by
|
||||
// buffering the request body before acquiring a per-user concurrency slot.
|
||||
//
|
||||
// See bufferRequestBody for details on how bufferedBody is used.
|
||||
type bufferedBody struct {
|
||||
// r contains reader for reading the data after buf is read.
|
||||
// readTrackingBody must be obtained via getReadTrackingBody()
|
||||
type readTrackingBody struct {
|
||||
// maxBodySize is the maximum body size to cache in buf.
|
||||
//
|
||||
// r is nil if buf contains all the data.
|
||||
// Bigger bodies cannot be retried.
|
||||
maxBodySize int
|
||||
|
||||
// r contains reader for initial data reading
|
||||
r io.ReadCloser
|
||||
|
||||
// buf contains the initial buffer read from r.
|
||||
// buf is a buffer for data read from r. Buf size is limited by maxBodySize.
|
||||
// If more than maxBodySize is read from r, then cannotRetry is set to true.
|
||||
buf []byte
|
||||
|
||||
// bufOffset is the offset at buf for already read bytes.
|
||||
bufOffset int
|
||||
// readBuf points to the cached data at buf, which must be read in the next call to Read().
|
||||
readBuf []byte
|
||||
|
||||
// cannotRetry is set to true after Close() call on non-nil r.
|
||||
// cannotRetry is set to true when more than maxBodySize bytes are read from r.
|
||||
// In this case the read data cannot fit buf, so it cannot be re-read from buf.
|
||||
cannotRetry bool
|
||||
|
||||
// bufComplete is set to true when buf contains complete request body read from r.
|
||||
bufComplete bool
|
||||
}
|
||||
|
||||
func newBufferedBody(r io.ReadCloser, buf []byte, maxBufSize int) *bufferedBody {
|
||||
// Do not use sync.Pool here, since http.RoundTrip may still use request body after return.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
|
||||
|
||||
if len(buf) < maxBufSize {
|
||||
// Read the full request body into buf.
|
||||
r = nil
|
||||
func newReadTrackingBody(r io.ReadCloser, maxBodySize int) *readTrackingBody {
|
||||
// do not use sync.Pool there
|
||||
// since http.RoundTrip may still use request body after return
|
||||
// See this issue for details https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
|
||||
rtb := &readTrackingBody{}
|
||||
if maxBodySize < 0 {
|
||||
maxBodySize = 0
|
||||
}
|
||||
rtb.maxBodySize = maxBodySize
|
||||
|
||||
return &bufferedBody{
|
||||
r: r,
|
||||
buf: buf,
|
||||
if r == nil {
|
||||
// This is GET request without request body
|
||||
r = (*zeroReader)(nil)
|
||||
}
|
||||
rtb.r = r
|
||||
return rtb
|
||||
}
|
||||
|
||||
// Read implements io.Reader interface.
|
||||
func (bb *bufferedBody) Read(p []byte) (int, error) {
|
||||
if bb.cannotRetry {
|
||||
return 0, fmt.Errorf("cannot read already closed body")
|
||||
}
|
||||
if bb.bufOffset < len(bb.buf) {
|
||||
n := copy(p, bb.buf[bb.bufOffset:])
|
||||
bb.bufOffset += n
|
||||
return n, nil
|
||||
}
|
||||
if bb.r == nil {
|
||||
return 0, io.EOF
|
||||
}
|
||||
return bb.r.Read(p)
|
||||
}
|
||||
type zeroReader struct{}
|
||||
|
||||
func (bb *bufferedBody) canRetry() bool {
|
||||
return bb.r == nil
|
||||
func (r *zeroReader) Read(_ []byte) (int, error) {
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
// Close implements io.Closer interface.
|
||||
func (bb *bufferedBody) Close() error {
|
||||
bb.resetReader()
|
||||
if bb.r != nil {
|
||||
bb.cannotRetry = true
|
||||
return bb.r.Close()
|
||||
}
|
||||
func (r *zeroReader) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (bb *bufferedBody) resetReader() {
|
||||
bb.bufOffset = 0
|
||||
// Read implements io.Reader interface.
|
||||
func (rtb *readTrackingBody) Read(p []byte) (int, error) {
|
||||
if len(rtb.readBuf) > 0 {
|
||||
n := copy(p, rtb.readBuf)
|
||||
rtb.readBuf = rtb.readBuf[n:]
|
||||
return n, nil
|
||||
}
|
||||
|
||||
if rtb.r == nil {
|
||||
if rtb.bufComplete {
|
||||
return 0, io.EOF
|
||||
}
|
||||
return 0, fmt.Errorf("cannot read client request body after closing client reader")
|
||||
}
|
||||
|
||||
n, err := rtb.r.Read(p)
|
||||
if rtb.cannotRetry {
|
||||
return n, err
|
||||
}
|
||||
|
||||
if len(rtb.buf)+n > rtb.maxBodySize {
|
||||
rtb.cannotRetry = true
|
||||
return n, err
|
||||
}
|
||||
rtb.buf = append(rtb.buf, p[:n]...)
|
||||
if err == io.EOF {
|
||||
rtb.bufComplete = true
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (rtb *readTrackingBody) canRetry() bool {
|
||||
if rtb.cannotRetry {
|
||||
return false
|
||||
}
|
||||
if rtb.bufComplete {
|
||||
return true
|
||||
}
|
||||
return rtb.r != nil
|
||||
}
|
||||
|
||||
// Close implements io.Closer interface.
|
||||
func (rtb *readTrackingBody) Close() error {
|
||||
if !rtb.cannotRetry {
|
||||
rtb.readBuf = rtb.buf
|
||||
} else {
|
||||
rtb.readBuf = nil
|
||||
}
|
||||
|
||||
// Close rtb.r only if the request body is completely read or if it is too big.
|
||||
// http.Roundtrip performs body.Close call even without any Read calls,
|
||||
// so this hack allows us to reuse request body.
|
||||
if rtb.bufComplete || rtb.cannotRetry {
|
||||
if rtb.r == nil {
|
||||
return nil
|
||||
}
|
||||
err := rtb.r.Close()
|
||||
rtb.r = nil
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func debugInfo(u *url.URL, r *http.Request) string {
|
||||
|
||||
@@ -2,7 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
@@ -11,7 +10,6 @@ import (
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
)
|
||||
@@ -516,11 +514,6 @@ func (w *fakeResponseWriter) getResponse() string {
|
||||
return w.bb.String()
|
||||
}
|
||||
|
||||
// Flush implements net/http.Flusher
|
||||
func (w *fakeResponseWriter) Flush() {
|
||||
// Nothing to do.
|
||||
}
|
||||
|
||||
func (w *fakeResponseWriter) Header() http.Header {
|
||||
if w.h == nil {
|
||||
w.h = http.Header{}
|
||||
@@ -548,300 +541,28 @@ func (w *fakeResponseWriter) WriteHeader(statusCode int) {
|
||||
}
|
||||
}
|
||||
|
||||
// This is needed for net/http.ResponseController
|
||||
func (w *fakeResponseWriter) SetReadDeadline(deadline time.Time) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestBufferRequestBody_Success(t *testing.T) {
|
||||
defaultRequestBufferSize := requestBufferSize.String()
|
||||
defer func() {
|
||||
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
|
||||
t.Fatalf("cannot reset requestBufferSize: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
|
||||
defer func() {
|
||||
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
|
||||
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
f := func(body *bytes.Buffer, requestBufferSizeFlag, maxRequestBodySizeToRetryFlag string) {
|
||||
t.Helper()
|
||||
|
||||
expectedResponse := "statusCode=200"
|
||||
if body.Len() > 0 {
|
||||
expectedResponse += "\n" + body.String()
|
||||
}
|
||||
|
||||
if err := requestBufferSize.Set(requestBufferSizeFlag); err != nil {
|
||||
t.Fatalf("cannot set requestBufferSize: %s", err)
|
||||
}
|
||||
if err := maxRequestBodySizeToRetry.Set(maxRequestBodySizeToRetryFlag); err != nil {
|
||||
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
|
||||
var backendCalled bool
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
backendCalled = true
|
||||
|
||||
b, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("cannot read body: %s", err), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if _, err := w.Write(b); err != nil {
|
||||
http.Error(w, fmt.Sprintf("cannot write body: %s", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
// regular url_prefix
|
||||
cfgStr := strings.ReplaceAll(`
|
||||
unauthorized_user:
|
||||
url_prefix: {BACKEND}/foo`, "{BACKEND}", ts.URL)
|
||||
|
||||
cfgOrigP := authConfigData.Load()
|
||||
if _, err := reloadAuthConfigData([]byte(cfgStr)); err != nil {
|
||||
t.Fatalf("cannot load config data: %s", err)
|
||||
}
|
||||
defer func() {
|
||||
cfgOrig := []byte("unauthorized_user:\n url_prefix: http://foo/bar")
|
||||
if cfgOrigP != nil {
|
||||
cfgOrig = *cfgOrigP
|
||||
}
|
||||
_, err := reloadAuthConfigData(cfgOrig)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot load the original config: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
r, err := http.NewRequest(http.MethodPost, `http://some-host.com`, body)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot initialize http request: %s", err)
|
||||
}
|
||||
|
||||
w := &fakeResponseWriter{}
|
||||
if !requestHandlerWithInternalRoutes(w, r) {
|
||||
t.Fatalf("unexpected false is returned from requestHandler")
|
||||
}
|
||||
|
||||
response := w.getResponse()
|
||||
response = strings.ReplaceAll(response, "\r\n", "\n")
|
||||
response = strings.TrimSpace(response)
|
||||
|
||||
if response != expectedResponse {
|
||||
t.Fatalf("unexpected response\ngot\n%s\nwant\n%s", response, expectedResponse)
|
||||
}
|
||||
if !backendCalled {
|
||||
t.Fatalf("backend is not called")
|
||||
}
|
||||
}
|
||||
|
||||
// no body, no buffering, no retry
|
||||
f(bytes.NewBuffer(nil), "0", "0")
|
||||
|
||||
// no body, buffering on, no retry
|
||||
f(bytes.NewBuffer(nil), "100", "0")
|
||||
|
||||
// no body, no buffering, retry on
|
||||
f(bytes.NewBuffer(nil), "0", "100")
|
||||
|
||||
// no body, buffering on, retry on
|
||||
f(bytes.NewBuffer(nil), "100", "100")
|
||||
|
||||
// body smaller than buffer, retry max on
|
||||
f(bytes.NewBufferString(strings.Repeat("abcdf", 100)), "101", "101")
|
||||
|
||||
// body smaller than buffer
|
||||
f(bytes.NewBufferString(strings.Repeat("abcdf", 100)), "501", "0")
|
||||
|
||||
// body same size as buffer
|
||||
f(bytes.NewBufferString(strings.Repeat("abcdf", 100)), "500", "0")
|
||||
|
||||
// body bigger than a buffer
|
||||
f(bytes.NewBufferString(strings.Repeat("abcdf", 100)), "499", "0")
|
||||
|
||||
// body bigger than tmpBuf 8KiB used in buffering
|
||||
f(bytes.NewBufferString(strings.Repeat("a", 32*1024)), "16384", "")
|
||||
|
||||
f(bytes.NewBufferString(strings.Repeat("a", 32*1024)), "16385", "")
|
||||
|
||||
f(bytes.NewBufferString(strings.Repeat("a", 32*1024)), "16383", "")
|
||||
}
|
||||
|
||||
func TestBufferRequestBody_Failure(t *testing.T) {
|
||||
defaultRequestBufferSize := requestBufferSize.String()
|
||||
defer func() {
|
||||
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
|
||||
t.Fatalf("cannot reset requestBufferSize: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
|
||||
defer func() {
|
||||
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
|
||||
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
defaultMaxQueueDuration := *maxQueueDuration
|
||||
defer func() {
|
||||
*maxQueueDuration = defaultMaxQueueDuration
|
||||
}()
|
||||
|
||||
f := func(body *mockBody, expectedResponse string) {
|
||||
t.Helper()
|
||||
|
||||
if err := maxRequestBodySizeToRetry.Set("0"); err != nil {
|
||||
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
if err := requestBufferSize.Set("2048"); err != nil {
|
||||
t.Fatalf("cannot set requestBufferSize: %s", err)
|
||||
}
|
||||
*maxQueueDuration = 100 * time.Millisecond
|
||||
|
||||
var backendCalled bool
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
backendCalled = true
|
||||
|
||||
b, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("cannot read body: %s", err), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if _, err := w.Write(b); err != nil {
|
||||
http.Error(w, fmt.Sprintf("cannot write body: %s", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
// regular url_prefix
|
||||
cfgStr := strings.ReplaceAll(`
|
||||
unauthorized_user:
|
||||
url_prefix: {BACKEND}/foo`, "{BACKEND}", ts.URL)
|
||||
|
||||
cfgOrigP := authConfigData.Load()
|
||||
if _, err := reloadAuthConfigData([]byte(cfgStr)); err != nil {
|
||||
t.Fatalf("cannot load config data: %s", err)
|
||||
}
|
||||
defer func() {
|
||||
cfgOrig := []byte("unauthorized_user:\n url_prefix: http://foo/bar")
|
||||
if cfgOrigP != nil {
|
||||
cfgOrig = *cfgOrigP
|
||||
}
|
||||
_, err := reloadAuthConfigData(cfgOrig)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot load the original config: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
r, err := http.NewRequest(http.MethodPost, `http://some-host.com`, body)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot initialize http request: %s", err)
|
||||
}
|
||||
|
||||
w := &fakeResponseWriter{}
|
||||
if !requestHandlerWithInternalRoutes(w, r) {
|
||||
t.Fatalf("unexpected false is returned from requestHandler")
|
||||
}
|
||||
|
||||
response := w.getResponse()
|
||||
response = strings.ReplaceAll(response, "\r\n", "\n")
|
||||
response = strings.TrimSpace(response)
|
||||
|
||||
if response != expectedResponse {
|
||||
t.Fatalf("unexpected response\ngot\n%s\nwant\n%s", response, expectedResponse)
|
||||
}
|
||||
if backendCalled {
|
||||
t.Fatalf("backend is called")
|
||||
}
|
||||
}
|
||||
|
||||
// an error at the beginning of reading
|
||||
f(&mockBody{err: fmt.Errorf("an error")}, `statusCode=400
|
||||
cannot read request body: an error`)
|
||||
|
||||
// an error after reading 1024 bytes, buffer size is 2048 bytes
|
||||
f(&mockBody{head: make([]byte, 1024), err: fmt.Errorf("an error")}, `statusCode=400
|
||||
cannot read request body: an error`)
|
||||
}
|
||||
|
||||
type mockBody struct {
|
||||
head []byte
|
||||
err error
|
||||
tail []byte
|
||||
}
|
||||
|
||||
func (r *mockBody) Read(p []byte) (n int, err error) {
|
||||
if len(r.head) > 0 {
|
||||
n = copy(p, r.head)
|
||||
r.head = r.head[n:]
|
||||
return n, nil
|
||||
}
|
||||
|
||||
if r.err != nil {
|
||||
return 0, r.err
|
||||
}
|
||||
|
||||
if len(r.tail) > 0 {
|
||||
n = copy(p, r.tail)
|
||||
r.tail = r.tail[n:]
|
||||
return n, nil
|
||||
}
|
||||
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
func TestBufferedBody_RetrySuccess(t *testing.T) {
|
||||
func TestReadTrackingBody_RetrySuccess(t *testing.T) {
|
||||
f := func(s string, maxBodySize int) {
|
||||
t.Helper()
|
||||
|
||||
defaultRequestBufferSize := requestBufferSize.String()
|
||||
defer func() {
|
||||
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
|
||||
t.Fatalf("cannot reset requestBufferSize: %s", err)
|
||||
}
|
||||
}()
|
||||
if err := requestBufferSize.Set(fmt.Sprintf("%d", maxBodySize)); err != nil {
|
||||
t.Fatalf("cannot set requestBufferSize: %s", err)
|
||||
}
|
||||
rtb := newReadTrackingBody(io.NopCloser(bytes.NewBufferString(s)), maxBodySize)
|
||||
|
||||
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
|
||||
defer func() {
|
||||
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
|
||||
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
}()
|
||||
if err := maxRequestBodySizeToRetry.Set("0"); err != nil {
|
||||
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
bb, ok := rb.(*bufferedBody)
|
||||
canRetry := !ok || bb.canRetry()
|
||||
|
||||
if !canRetry {
|
||||
if !rtb.canRetry() {
|
||||
t.Fatalf("canRetry() must return true before reading anything")
|
||||
}
|
||||
for i := 0; i < 5; i++ {
|
||||
data, err := io.ReadAll(rb)
|
||||
data, err := io.ReadAll(rtb)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when reading all the data at iteration %d: %s", i, err)
|
||||
}
|
||||
if string(data) != s {
|
||||
t.Fatalf("unexpected data read at iteration %d\ngot\n%s\nwant\n%s", i, data, s)
|
||||
}
|
||||
if err := rb.Close(); err != nil {
|
||||
t.Fatalf("unexpected error when closing bufferedBody at iteration %d: %s", i, err)
|
||||
if err := rtb.Close(); err != nil {
|
||||
t.Fatalf("unexpected error when closing readTrackingBody at iteration %d: %s", i, err)
|
||||
}
|
||||
if !rtb.canRetry() {
|
||||
t.Fatalf("canRetry() must return true at iteration %d", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -851,48 +572,19 @@ func TestBufferedBody_RetrySuccess(t *testing.T) {
|
||||
f("", 100)
|
||||
f("foo", 100)
|
||||
f("foobar", 100)
|
||||
f(newTestString(1000), 1001)
|
||||
f(newTestString(1000), 1000)
|
||||
}
|
||||
|
||||
func TestBufferedBody_RetrySuccessPartialRead(t *testing.T) {
|
||||
func TestReadTrackingBody_RetrySuccessPartialRead(t *testing.T) {
|
||||
f := func(s string, maxBodySize int) {
|
||||
t.Helper()
|
||||
|
||||
// Check the case with partial read
|
||||
defaultRequestBufferSize := requestBufferSize.String()
|
||||
defer func() {
|
||||
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
|
||||
t.Fatalf("cannot reset requestBufferSize: %s", err)
|
||||
}
|
||||
}()
|
||||
if err := requestBufferSize.Set(fmt.Sprintf("%d", maxBodySize)); err != nil {
|
||||
t.Fatalf("cannot set requestBufferSize: %s", err)
|
||||
}
|
||||
rtb := newReadTrackingBody(io.NopCloser(bytes.NewBufferString(s)), maxBodySize)
|
||||
|
||||
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
|
||||
defer func() {
|
||||
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
|
||||
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
}()
|
||||
if err := maxRequestBodySizeToRetry.Set("0"); err != nil {
|
||||
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
bb, ok := rb.(*bufferedBody)
|
||||
canRetry := !ok || bb.canRetry()
|
||||
|
||||
if !canRetry {
|
||||
t.Fatalf("canRetry must return true")
|
||||
}
|
||||
for i := 0; i < len(s); i++ {
|
||||
buf := make([]byte, i)
|
||||
n, err := io.ReadFull(rb, buf)
|
||||
n, err := io.ReadFull(rtb, buf)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when reading %d bytes: %s", i, err)
|
||||
}
|
||||
@@ -902,20 +594,26 @@ func TestBufferedBody_RetrySuccessPartialRead(t *testing.T) {
|
||||
if string(buf) != s[:i] {
|
||||
t.Fatalf("unexpected data read with the length %d\ngot\n%s\nwant\n%s", i, buf, s[:i])
|
||||
}
|
||||
if err := rb.Close(); err != nil {
|
||||
if err := rtb.Close(); err != nil {
|
||||
t.Fatalf("unexpected error when closing reader after reading %d bytes", i)
|
||||
}
|
||||
if !rtb.canRetry() {
|
||||
t.Fatalf("canRetry() must return true after closing the reader after reading %d bytes", i)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(rb)
|
||||
data, err := io.ReadAll(rtb)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when reading all the data: %s", err)
|
||||
}
|
||||
if string(data) != s {
|
||||
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", data, s)
|
||||
}
|
||||
if err := rb.Close(); err != nil {
|
||||
t.Fatalf("unexpected error when closing bufferedBody: %s", err)
|
||||
if err := rtb.Close(); err != nil {
|
||||
t.Fatalf("unexpected error when closing readTrackingBody: %s", err)
|
||||
}
|
||||
if !rtb.canRetry() {
|
||||
t.Fatalf("canRetry() must return true after closing the reader after reading all the input")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -924,53 +622,30 @@ func TestBufferedBody_RetrySuccessPartialRead(t *testing.T) {
|
||||
f("", 100)
|
||||
f("foo", 100)
|
||||
f("foobar", 100)
|
||||
f(newTestString(1000), 1001)
|
||||
f(newTestString(1000), 1000)
|
||||
}
|
||||
|
||||
func TestBufferedBody_RetryFailureTooBigBody(t *testing.T) {
|
||||
func TestReadTrackingBody_RetryFailureTooBigBody(t *testing.T) {
|
||||
f := func(s string, maxBodySize int) {
|
||||
t.Helper()
|
||||
|
||||
defaultRequestBufferSize := requestBufferSize.String()
|
||||
defer func() {
|
||||
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
|
||||
t.Fatalf("cannot reset requestBufferSize: %s", err)
|
||||
}
|
||||
}()
|
||||
if err := requestBufferSize.Set("0"); err != nil {
|
||||
t.Fatalf("cannot set requestBufferSize: %s", err)
|
||||
}
|
||||
rtb := newReadTrackingBody(io.NopCloser(bytes.NewBufferString(s)), maxBodySize)
|
||||
|
||||
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
|
||||
defer func() {
|
||||
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
|
||||
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
}()
|
||||
if err := maxRequestBodySizeToRetry.Set(fmt.Sprintf("%d", maxBodySize)); err != nil {
|
||||
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
bb, ok := rb.(*bufferedBody)
|
||||
canRetry := !ok || bb.canRetry()
|
||||
|
||||
if canRetry {
|
||||
t.Fatalf("canRetry() must return false because of too big request body")
|
||||
if !rtb.canRetry() {
|
||||
t.Fatalf("canRetry() must return true before reading anything")
|
||||
}
|
||||
buf := make([]byte, 1)
|
||||
n, err := io.ReadFull(rb, buf)
|
||||
n, err := io.ReadFull(rtb, buf)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when reading a single byte: %s", err)
|
||||
}
|
||||
if n != 1 {
|
||||
t.Fatalf("unexpected number of bytes read; got %d; want 1", n)
|
||||
}
|
||||
data, err := io.ReadAll(rb)
|
||||
if !rtb.canRetry() {
|
||||
t.Fatalf("canRetry() must return true after reading one byte")
|
||||
}
|
||||
data, err := io.ReadAll(rtb)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when reading all the data: %s", err)
|
||||
}
|
||||
@@ -978,11 +653,14 @@ func TestBufferedBody_RetryFailureTooBigBody(t *testing.T) {
|
||||
if dataRead != s {
|
||||
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", dataRead, s)
|
||||
}
|
||||
if err := rb.Close(); err != nil {
|
||||
t.Fatalf("unexpected error when closing bufferedBody: %s", err)
|
||||
if err := rtb.Close(); err != nil {
|
||||
t.Fatalf("unexpected error when closing readTrackingBody: %s", err)
|
||||
}
|
||||
if rtb.canRetry() {
|
||||
t.Fatalf("canRetry() must return false after closing the reader")
|
||||
}
|
||||
|
||||
data, err = io.ReadAll(rb)
|
||||
data, err = io.ReadAll(rtb)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
@@ -996,48 +674,35 @@ func TestBufferedBody_RetryFailureTooBigBody(t *testing.T) {
|
||||
f(newTestString(2*maxBodySize), maxBodySize)
|
||||
}
|
||||
|
||||
func TestBufferedBody_RetryFailureZeroOrNegativeMaxBodySize(t *testing.T) {
|
||||
func TestReadTrackingBody_RetryFailureZeroOrNegativeMaxBodySize(t *testing.T) {
|
||||
f := func(s string, maxBodySize int) {
|
||||
t.Helper()
|
||||
|
||||
defaultRequestBufferSize := requestBufferSize.String()
|
||||
defer func() {
|
||||
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
|
||||
t.Fatalf("cannot reset requestBufferSize: %s", err)
|
||||
}
|
||||
}()
|
||||
if err := requestBufferSize.Set(fmt.Sprintf("%d", maxBodySize)); err != nil {
|
||||
t.Fatalf("cannot set requestBufferSize: %s", err)
|
||||
}
|
||||
rtb := newReadTrackingBody(io.NopCloser(bytes.NewBufferString(s)), maxBodySize)
|
||||
|
||||
ctx := context.Background()
|
||||
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
bb, ok := rb.(*bufferedBody)
|
||||
canRetry := !ok || bb.canRetry()
|
||||
|
||||
if !canRetry {
|
||||
if !rtb.canRetry() {
|
||||
t.Fatalf("canRetry() must return true before reading anything")
|
||||
}
|
||||
data, err := io.ReadAll(rb)
|
||||
data, err := io.ReadAll(rtb)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when reading all the data: %s", err)
|
||||
}
|
||||
if string(data) != s {
|
||||
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", data, s)
|
||||
}
|
||||
if err := rb.Close(); err != nil {
|
||||
t.Fatalf("unexpected error when closing bufferedBody: %s", err)
|
||||
if err := rtb.Close(); err != nil {
|
||||
t.Fatalf("unexpected error when closing readTrackingBody: %s", err)
|
||||
}
|
||||
|
||||
data, err = io.ReadAll(rb)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in io.ReadAll: %s", err)
|
||||
if rtb.canRetry() {
|
||||
t.Fatalf("canRetry() must return false after closing the reader")
|
||||
}
|
||||
if string(data) != s {
|
||||
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", data, s)
|
||||
data, err = io.ReadAll(rtb)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if len(data) != 0 {
|
||||
t.Fatalf("unexpected non-empty data read: %q", data)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,42 +8,29 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func mergeURLs(uiURL, requestURI *url.URL, dropSrcPathPrefixParts int, mergeQueryArgs []string) *url.URL {
|
||||
func mergeURLs(uiURL, requestURI *url.URL, dropSrcPathPrefixParts int) *url.URL {
|
||||
targetURL := *uiURL
|
||||
|
||||
srcPath := dropPrefixParts(requestURI.Path, dropSrcPathPrefixParts)
|
||||
if strings.HasPrefix(srcPath, "/") {
|
||||
targetURL.Path = strings.TrimSuffix(targetURL.Path, "/")
|
||||
}
|
||||
targetURL.Path += srcPath
|
||||
requestParams := requestURI.Query()
|
||||
|
||||
// fast path
|
||||
if len(requestParams) == 0 {
|
||||
return &targetURL
|
||||
}
|
||||
|
||||
// Merge client query args with backend query args
|
||||
targetParams := targetURL.Query()
|
||||
uiParams := url.Values{}
|
||||
|
||||
// Copy all the target query args
|
||||
for k, v := range targetParams {
|
||||
for i := range v {
|
||||
uiParams.Add(k, v[i])
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the client query args if they do not clash with target args.
|
||||
// merge query parameters from requests.
|
||||
uiParams := targetURL.Query()
|
||||
for k, v := range requestParams {
|
||||
if targetParams.Has(k) && !slices.Contains(mergeQueryArgs, k) {
|
||||
// Skip clashed client query params for security reasons
|
||||
// skip clashed query params from original request
|
||||
if exist := uiParams.Get(k); len(exist) > 0 {
|
||||
continue
|
||||
}
|
||||
for i := range v {
|
||||
uiParams.Add(k, v[i])
|
||||
}
|
||||
}
|
||||
|
||||
targetURL.RawQuery = uiParams.Encode()
|
||||
return &targetURL
|
||||
}
|
||||
|
||||
@@ -101,7 +101,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
return
|
||||
}
|
||||
bu := up.getBackendURL()
|
||||
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
|
||||
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts)
|
||||
bu.put()
|
||||
|
||||
gotTarget := target.String()
|
||||
@@ -352,7 +352,7 @@ func TestUserInfoGetBackendURL_SRV(t *testing.T) {
|
||||
return
|
||||
}
|
||||
bu := up.getBackendURL()
|
||||
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
|
||||
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts)
|
||||
bu.put()
|
||||
|
||||
gotTarget := target.String()
|
||||
@@ -528,43 +528,3 @@ func (r *fakeResolver) LookupIPAddr(_ context.Context, host string) ([]net.IPAdd
|
||||
func (r *fakeResolver) LookupMX(_ context.Context, _ string) ([]*net.MX, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func TestMergeURLs(t *testing.T) {
|
||||
f := func(clientURL, backendURL string, dropSrcPathPrefixParts int, mergeQueryArgs []string, resultURLExpected string) {
|
||||
t.Helper()
|
||||
|
||||
cu, err := url.Parse(clientURL)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse client url %q: %s", clientURL, err)
|
||||
}
|
||||
cu = normalizeURL(cu)
|
||||
|
||||
bu, err := url.Parse(backendURL)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse backend url %q: %s", backendURL, err)
|
||||
}
|
||||
|
||||
ru := mergeURLs(bu, cu, dropSrcPathPrefixParts, mergeQueryArgs)
|
||||
resultURL := ru.String()
|
||||
if resultURL != resultURLExpected {
|
||||
t.Fatalf("unexpected resultURL\ngot\n%s\nwant\n%s", resultURL, resultURLExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("http://foo:1234", "https://backend/foo/bar?baz=abc&de", 0, nil, "https://backend/foo/bar?baz=abc&de")
|
||||
f("http://foo:1234", "https://backend/foo/bar/?baz=abc&de", 0, nil, "https://backend/foo/bar/?baz=abc&de")
|
||||
f("https://foo:1234/", "https://backend/foo/bar?baz=abc&de", 0, nil, "https://backend/foo/bar?baz=abc&de")
|
||||
f("https://foo:1234/", "http://backend:8888/foo/bar/?baz=abc&de", 0, nil, "http://backend:8888/foo/bar/?baz=abc&de")
|
||||
|
||||
// merge paths
|
||||
f("http://foo:1234/x/y?z=xxx", "https://backend/foo/bar?baz=abc&de", 0, nil, "https://backend/foo/bar/x/y?baz=abc&de=&z=xxx")
|
||||
|
||||
// "hacky" url
|
||||
f("http://foo:1234/../../x/../y?z=xxx", "https://backend/foo/bar?baz=abc&de", 0, nil, "https://backend/foo/bar/y?baz=abc&de=&z=xxx")
|
||||
|
||||
// make sure that the client args are overridden by server args by default
|
||||
f("http://foo:1234/x/y?password=hack&qqq=www", "https://backend/foo/bar?password=abc", 0, nil, "https://backend/foo/bar/x/y?password=abc&qqq=www")
|
||||
|
||||
// allow overriding the selected query args
|
||||
f("http://foo:1234/x/y?baz=xxx&qqq=www", "https://backend/foo/bar?baz=abc", 0, []string{"baz"}, "https://backend/foo/bar/x/y?baz=abc&baz=xxx&qqq=www")
|
||||
}
|
||||
|
||||
@@ -31,9 +31,6 @@ vmbackup-linux-ppc64le-prod:
|
||||
vmbackup-linux-386-prod:
|
||||
APP_NAME=vmbackup EXTRA_GO_BUILD_TAGS=$(VMBACKUP_GO_BUILD_TAGS) $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmbackup-linux-s390x-prod:
|
||||
APP_NAME=vmbackup EXTRA_GO_BUILD_TAGS=$(VMBACKUP_GO_BUILD_TAGS) $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmbackup-darwin-amd64-prod:
|
||||
APP_NAME=vmbackup EXTRA_GO_BUILD_TAGS=$(VMBACKUP_GO_BUILD_TAGS) $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -115,7 +115,7 @@ func main() {
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create backup: %s", err)
|
||||
}
|
||||
pushmetrics.StopAndPush()
|
||||
pushmetrics.Stop()
|
||||
|
||||
startTime := time.Now()
|
||||
logger.Infof("gracefully shutting down http server for metrics at %q", listenAddrs)
|
||||
@@ -212,7 +212,7 @@ func newSrcFS() (*fslocal.FS, error) {
|
||||
}
|
||||
|
||||
func newDstFS(ctx context.Context) (common.RemoteFS, error) {
|
||||
fs, err := actions.NewRemoteFS(ctx, *dst, nil)
|
||||
fs, err := actions.NewRemoteFS(ctx, *dst)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `-dst`=%q: %w", *dst, err)
|
||||
}
|
||||
@@ -255,7 +255,7 @@ func newOriginFS(ctx context.Context) (common.OriginFS, error) {
|
||||
if len(*origin) == 0 {
|
||||
return &fsnil.FS{}, nil
|
||||
}
|
||||
fs, err := actions.NewRemoteFS(ctx, *origin, nil)
|
||||
fs, err := actions.NewRemoteFS(ctx, *origin)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `-origin`=%q: %w", *origin, err)
|
||||
}
|
||||
@@ -266,7 +266,7 @@ func newRemoteOriginFS(ctx context.Context) (common.RemoteFS, error) {
|
||||
if len(*origin) == 0 {
|
||||
return nil, fmt.Errorf("-origin cannot be empty when -snapshotName and -snapshot.createURL aren't set")
|
||||
}
|
||||
fs, err := actions.NewRemoteFS(ctx, *origin, nil)
|
||||
fs, err := actions.NewRemoteFS(ctx, *origin)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `-origin`=%q: %w", *origin, err)
|
||||
}
|
||||
|
||||
@@ -27,9 +27,6 @@ vmctl-linux-ppc64le-prod:
|
||||
vmctl-linux-386-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmctl-linux-s390x-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmctl-darwin-amd64-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -7,8 +7,6 @@ import (
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@@ -57,7 +55,6 @@ func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
|
||||
return attempt, err // fail fast if not recoverable
|
||||
}
|
||||
attempt++
|
||||
retriesTotal.Inc()
|
||||
backoff := float64(b.minDuration) * math.Pow(b.factor, float64(i))
|
||||
dur := time.Duration(backoff)
|
||||
logger.Errorf("got error: %s on attempt: %d; will retry in %v", err, attempt, dur)
|
||||
@@ -77,7 +74,3 @@ func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
|
||||
}
|
||||
return attempt, fmt.Errorf("execution failed after %d retry attempts", b.retries)
|
||||
}
|
||||
|
||||
var (
|
||||
retriesTotal = metrics.NewCounter(`vmctl_backoff_retries_total`)
|
||||
)
|
||||
|
||||
@@ -14,12 +14,6 @@ const (
|
||||
globalSilent = "s"
|
||||
globalVerbose = "verbose"
|
||||
globalDisableProgressBar = "disable-progress-bar"
|
||||
|
||||
globalPushMetricsURL = "pushmetrics.url"
|
||||
globalPushMetricsInterval = "pushmetrics.interval"
|
||||
globalPushExtraLabels = "pushmetrics.extraLabel"
|
||||
globalPushHeaders = "pushmetrics.header"
|
||||
globalPushDisableCompression = "pushmetrics.disableCompression"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -39,29 +33,6 @@ var (
|
||||
Value: false,
|
||||
Usage: "Whether to disable progress bar during the import.",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: globalPushMetricsURL,
|
||||
Usage: "Optional URL to push metrics. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics",
|
||||
},
|
||||
&cli.DurationFlag{
|
||||
Name: globalPushMetricsInterval,
|
||||
Value: 10 * time.Second,
|
||||
Usage: "Interval for pushing metrics to every -pushmetrics.url",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: globalPushExtraLabels,
|
||||
Usage: "Extra labels to add to pushed metrics. In case of collision, label value defined by flag will have priority. " +
|
||||
"Flag can be set multiple times, to add few additional labels. " +
|
||||
"For example, -pushmetrics.extraLabel='instance=\"foo\"' adds instance=\"foo\" label to all the metrics pushed to every -pushmetrics.url",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: globalPushHeaders,
|
||||
Usage: "Optional HTTP headers to add to pushed metrics. Flag can be set multiple times, to add few additional headers.",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: globalPushDisableCompression,
|
||||
Usage: "Whether to disable compression when pushing metrics.",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
@@ -152,32 +123,32 @@ var (
|
||||
Name: vmExtraLabel,
|
||||
Value: nil,
|
||||
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
|
||||
" will have priority. Flag can be set multiple times, to add few additional labels.",
|
||||
"will have priority. Flag can be set multiple times, to add few additional labels.",
|
||||
},
|
||||
&cli.Int64Flag{
|
||||
Name: vmRateLimit,
|
||||
Usage: "Optional data transfer rate limit in bytes per second.\n" +
|
||||
"By default, the rate limit is disabled. It can be useful for limiting load on configured via '--vm-addr' destination.",
|
||||
"By default, the rate limit is disabled. It can be useful for limiting load on configured via '--vmAddr' destination.",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmCertFile,
|
||||
Usage: "Optional path to client-side TLS certificate file to use when connecting to '--vm-addr'",
|
||||
Usage: "Optional path to client-side TLS certificate file to use when connecting to '--vmAddr'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmKeyFile,
|
||||
Usage: "Optional path to client-side TLS key to use when connecting to '--vm-addr'",
|
||||
Usage: "Optional path to client-side TLS key to use when connecting to '--vmAddr'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmCAFile,
|
||||
Usage: "Optional path to TLS CA file to use for verifying connections to '--vm-addr'. By default, system CA is used",
|
||||
Usage: "Optional path to TLS CA file to use for verifying connections to '--vmAddr'. By default, system CA is used",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmServerName,
|
||||
Usage: "Optional TLS server name to use for connections to '--vm-addr'. By default, the server name from '--vm-addr' is used",
|
||||
Usage: "Optional TLS server name to use for connections to '--vmAddr'. By default, the server name from '--vmAddr' is used",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmInsecureSkipVerify,
|
||||
Usage: "Whether to skip tls verification when connecting to '--vm-addr'",
|
||||
Usage: "Whether to skip tls verification when connecting to '--vmAddr'",
|
||||
Value: false,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
@@ -497,7 +468,7 @@ var (
|
||||
Name: vmNativeFilterMatch,
|
||||
Usage: "Time series selector to match series for export. For example, select {instance!=\"localhost\"} will " +
|
||||
"match all series with \"instance\" label different to \"localhost\".\n" +
|
||||
" See more details here https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format",
|
||||
" See more details here https://github.com/VictoriaMetrics/VictoriaMetrics#how-to-export-data-in-native-format",
|
||||
Value: `{__name__!=""}`,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
@@ -627,7 +598,7 @@ var (
|
||||
Name: vmExtraLabel,
|
||||
Value: nil,
|
||||
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
|
||||
" will have priority. Flag can be set multiple times, to add few additional labels.",
|
||||
"will have priority. Flag can be set multiple times, to add few additional labels.",
|
||||
},
|
||||
&cli.Int64Flag{
|
||||
Name: vmRateLimit,
|
||||
@@ -654,8 +625,8 @@ var (
|
||||
&cli.BoolFlag{
|
||||
Name: vmNativeDisableBinaryProtocol,
|
||||
Usage: "Whether to use https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-json-line-format " +
|
||||
"instead of https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format API. " +
|
||||
"Binary export/import API protocol implies less network and resource usage, as it transfers compressed binary data blocks. " +
|
||||
"instead of https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format API." +
|
||||
"Binary export/import API protocol implies less network and resource usage, as it transfers compressed binary data blocks." +
|
||||
"Non-binary export/import API is less efficient, but supports deduplication if it is configured on vm-native-src-addr side.",
|
||||
Value: false,
|
||||
},
|
||||
@@ -718,15 +689,15 @@ var (
|
||||
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
Layout: time.RFC3339,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: remoteReadFilterLabel,
|
||||
Usage: "Prometheus label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
|
||||
DefaultText: "__name__",
|
||||
&cli.StringFlag{
|
||||
Name: remoteReadFilterLabel,
|
||||
Usage: "Prometheus label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
|
||||
Value: "__name__",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: remoteReadFilterLabelValue,
|
||||
Usage: fmt.Sprintf("Prometheus regular expression to filter label from %q flag.", remoteReadFilterLabelValue),
|
||||
DefaultText: ".*",
|
||||
&cli.StringFlag{
|
||||
Name: remoteReadFilterLabelValue,
|
||||
Usage: fmt.Sprintf("Prometheus regular expression to filter label from %q flag.", remoteReadFilterLabelValue),
|
||||
Value: ".*",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: remoteRead,
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
@@ -40,7 +37,7 @@ func newInfluxProcessor(ic *influx.Client, im *vm.Importer, cc int, separator st
|
||||
}
|
||||
}
|
||||
|
||||
func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
func (ip *influxProcessor) run() error {
|
||||
series, err := ip.ic.Explore()
|
||||
if err != nil {
|
||||
return fmt.Errorf("explore query failed: %s", err)
|
||||
@@ -50,11 +47,10 @@ func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
}
|
||||
|
||||
question := fmt.Sprintf("Found %d timeseries to import. Continue?", len(series))
|
||||
if !prompt(ctx, question) {
|
||||
if !prompt(question) {
|
||||
return nil
|
||||
}
|
||||
|
||||
influxSeriesTotal.Add(len(series))
|
||||
bar := barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing series"), len(series))
|
||||
if err := barpool.Start(); err != nil {
|
||||
return err
|
||||
@@ -66,18 +62,18 @@ func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
ip.im.ResetStats()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for range ip.cc {
|
||||
wg.Go(func() {
|
||||
wg.Add(ip.cc)
|
||||
for i := 0; i < ip.cc; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for s := range seriesCh {
|
||||
if err := ip.do(s); err != nil {
|
||||
influxErrorsTotal.Inc()
|
||||
errCh <- fmt.Errorf("request failed for %q.%q: %s", s.Measurement, s.Field, err)
|
||||
return
|
||||
}
|
||||
influxSeriesProcessed.Inc()
|
||||
bar.Increment()
|
||||
}
|
||||
})
|
||||
}()
|
||||
}
|
||||
|
||||
// any error breaks the import
|
||||
@@ -86,7 +82,6 @@ func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
case infErr := <-errCh:
|
||||
return fmt.Errorf("influx error: %s", infErr)
|
||||
case vmErr := <-ip.im.Errors():
|
||||
influxErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, ip.isVerbose))
|
||||
case seriesCh <- s:
|
||||
}
|
||||
@@ -99,7 +94,6 @@ func (ip *influxProcessor) run(ctx context.Context) error {
|
||||
// drain import errors channel
|
||||
for vmErr := range ip.im.Errors() {
|
||||
if vmErr.Err != nil {
|
||||
influxErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, ip.isVerbose))
|
||||
}
|
||||
}
|
||||
@@ -174,9 +168,3 @@ func (ip *influxProcessor) do(s *influx.Series) error {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
influxSeriesTotal = metrics.NewCounter(`vmctl_influx_migration_series_total`)
|
||||
influxSeriesProcessed = metrics.NewCounter(`vmctl_influx_migration_series_processed`)
|
||||
influxErrorsTotal = metrics.NewCounter(`vmctl_influx_migration_errors_total`)
|
||||
)
|
||||
|
||||
@@ -4,8 +4,6 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
)
|
||||
|
||||
@@ -47,16 +45,9 @@ func (l *Limiter) Register(dataLen int) {
|
||||
t := timerpool.Get(d)
|
||||
<-t.C
|
||||
timerpool.Put(t)
|
||||
limiterThrottleEventsTotal.Inc()
|
||||
}
|
||||
l.budget += limit
|
||||
l.deadline = time.Now().Add(time.Second)
|
||||
}
|
||||
l.budget -= int64(dataLen)
|
||||
limiterBytesProcessed.Add(dataLen)
|
||||
}
|
||||
|
||||
var (
|
||||
limiterBytesProcessed = metrics.NewCounter(`vmctl_limiter_bytes_processed_total`)
|
||||
limiterThrottleEventsTotal = metrics.NewCounter(`vmctl_limiter_throttle_events_total`)
|
||||
)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user