Compare commits

..

1 Commits

Author SHA1 Message Date
Max Kotliar
e9261be945 lib/promutil: Weak pointer based labels compressor 2025-08-20 20:02:11 +03:00
3042 changed files with 203218 additions and 246596 deletions

View File

@@ -5,7 +5,7 @@ body:
- type: textarea
id: describe-the-component
attributes:
label: Is your question related to a specific component?
label: Is your question request related to a specific component?
placeholder: |
VictoriaMetrics, vmagent, vmalert, vmui, etc...
validations:

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env sh
set -e
CHANGELOG_FILE="docs/victoriametrics/changelog/CHANGELOG.md"
GITHUB_BASE_REF=${GITHUB_BASE_REF:-"master"}
GIT_REMOTE=${GIT_REMOTE:-"origin"}
git diff "${GIT_REMOTE}/${GITHUB_BASE_REF}"...HEAD -- $CHANGELOG_FILE > diff.txt
if ! grep -q "^+" diff.txt; then
echo "No additions in CHANGELOG.md"
exit 0
fi
ADDED_LINES=$(grep "^+\S" diff.txt | sed 's/^+//')
START_TIP=$(grep -n "^## tip" "$CHANGELOG_FILE" | head -1 | cut -d: -f1)
if [ -z "$START_TIP" ]; then
echo "ERROR: ${CHANGELOG_FILE} does not contain a ## tip section"
exit 1
fi
END_TIP=$(awk "NR>$START_TIP && /^## / {print NR; exit}" "${CHANGELOG_FILE}")
if [ -z "$END_TIP" ]; then
END_TIP=$(wc -l < "$CHANGELOG_FILE")
fi
BAD=0
while IFS= read -r line; do
# Grep exact line inside the file and get line numbers
MATCHES=$(grep -n -F "$line" "$CHANGELOG_FILE" | cut -d: -f1)
for m in $MATCHES; do
if [ "$m" -lt "$START_TIP" ] || [ "$m" -gt "$END_TIP" ]; then
echo "'$line' on line ${m} is outside ## tip section (lines ${START_TIP}-${END_TIP})"
BAD=1
fi
done
done << EOF
$ADDED_LINES
EOF
if [ "$BAD" -ne 0 ]; then
echo "CHANGELOG modifications must be placed inside the ## tip section."
exit 1
fi
echo "CHANGELOG modifications are valid."

View File

@@ -47,8 +47,6 @@ jobs:
arch: arm
- os: linux
arch: ppc64le
- os: linux
arch: s390x
- os: darwin
arch: amd64
- os: darwin
@@ -61,18 +59,17 @@ jobs:
arch: amd64
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
go-version: stable
- name: Build victoria-metrics for ${{ matrix.os }}-${{ matrix.arch }}
run: make victoria-metrics-${{ matrix.os }}-${{ matrix.arch }}

View File

@@ -1,19 +0,0 @@
name: 'changelog-linter'
on:
pull_request:
paths:
- "docs/victoriametrics/changelog/CHANGELOG.md"
jobs:
tip-lint:
runs-on: 'ubuntu-latest'
steps:
- uses: 'actions/checkout@v6'
with:
# needed for proper diff
fetch-depth: 0
- name: 'Validate that changelog changes are under ## tip'
run: |
GITHUB_BASE_REF=${{ github.base_ref }} ./.github/scripts/lint-changelog-tip.sh

View File

@@ -1,37 +0,0 @@
name: check-commit-signed
on:
pull_request:
jobs:
check-commit-signed:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
fetch-depth: 0 # we need full history for commit verification
- name: Check commit signatures
run: |
if [ "${{ github.event_name }}" != "pull_request" ]; then
echo "Not a PR event, skipping signature check"
exit 0
fi
RANGE="${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
echo "Checking commits in PR range: $RANGE"
if [ -z "$(git rev-list $RANGE)" ]; then
echo "No new commits in this PR, skipping signature check"
exit 0
fi
unsigned=$(git log --pretty="%H %G?" $RANGE | grep -vE " (G|E)$" || true)
if [ -n "$unsigned" ]; then
echo "Found unsigned commits:"
echo "$unsigned"
exit 1
fi
echo "All commits in PR are signed (G or E)"

View File

@@ -19,13 +19,11 @@ jobs:
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'
go-version: stable
cache: false
- run: go version
- name: Cache Go artifacts
uses: actions/cache@v4
with:
@@ -34,7 +32,7 @@ jobs:
~/go/pkg/mod
~/go/bin
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
- name: Check License
run: make check-licenses

View File

@@ -29,15 +29,14 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Set up Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache: false
go-version-file: 'go.mod'
- run: go version
go-version: stable
- name: Cache Go artifacts
uses: actions/cache@v4
@@ -47,17 +46,17 @@ jobs:
~/go/bin
~/go/pkg/mod
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
uses: github/codeql-action/init@v3
with:
languages: go
- name: Autobuild
uses: github/codeql-action/autobuild@v4
uses: github/codeql-action/autobuild@v3
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4
uses: github/codeql-action/analyze@v3
with:
category: 'language:go'

View File

@@ -16,12 +16,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
path: __vm
- name: Checkout private code
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
repository: VictoriaMetrics/vmdocs
token: ${{ secrets.VM_BOT_GH_TOKEN }}

View File

@@ -32,19 +32,18 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
go-version: stable
- run: go version
- name: Cache golangci-lint
uses: actions/cache@v4
@@ -52,7 +51,7 @@ jobs:
path: |
~/.cache/golangci-lint
~/go/bin
key: golangci-lint-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('.golangci.yml') }}
key: golangci-lint-${{ runner.os }}-${{ hashFiles('.golangci.yml') }}
- name: Run check-all
run: |
@@ -72,18 +71,17 @@ jobs:
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
go-version: stable
- name: Run tests
run: GOGC=10 make ${{ matrix.scenario}}
@@ -93,24 +91,23 @@ jobs:
with:
files: ./coverage.txt
apptest:
name: apptest
integration:
name: integration
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Go
id: go
uses: actions/setup-go@v6
uses: actions/setup-go@v5
with:
cache-dependency-path: |
go.sum
Makefile
app/**/Makefile
go-version-file: 'go.mod'
- run: go version
go-version: stable
- name: Run app tests
run: make apptest
- name: Run integration tests
run: make integration-test

View File

@@ -32,41 +32,35 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Code checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Cache node_modules
id: cache
uses: actions/cache@v5
- name: Setup Node
uses: actions/setup-node@v4
with:
path: app/vmui/packages/vmui/node_modules
key: vmui-deps-${{ runner.os }}-${{ hashFiles('app/vmui/packages/vmui/package-lock.json', 'app/vmui/Dockerfile-build') }}
restore-keys: |
vmui-deps-${{ runner.os }}-
node-version: '24.x'
- name: Install dependencies
if: steps.cache.outputs.cache-hit != 'true'
run: make vmui-install
- name: Cache node-modules
uses: actions/cache@v4
with:
path: |
app/vmui/packages/vmui/node_modules
key: vmui-artifacts-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
restore-keys: vmui-artifacts-${{ runner.os }}-
- name: Run lint
id: lint
run: make vmui-lint
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run tests
id: test
run: make vmui-test
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Run typecheck
id: typecheck
run: make vmui-typecheck
continue-on-error: true
env:
VMUI_SKIP_INSTALL: true
- name: Annotate Code Linting Results
uses: ataylorme/eslint-annotate-action@v3

View File

@@ -175,7 +175,7 @@
END OF TERMS AND CONDITIONS
Copyright 2019-2026 VictoriaMetrics, Inc.
Copyright 2019-2025 VictoriaMetrics, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -17,7 +17,7 @@ EXTRA_GO_BUILD_TAGS ?=
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
TAR_OWNERSHIP ?= --owner=1000 --group=1000
GOLANGCI_LINT_VERSION := 2.9.0
GOLANGCI_LINT_VERSION := 2.4.0
.PHONY: $(MAKECMDGOALS)
@@ -125,15 +125,6 @@ vmutils-linux-ppc64le: \
vmrestore-linux-ppc64le \
vmctl-linux-ppc64le
vmutils-linux-s390x: \
vmagent-linux-s390x \
vmalert-linux-s390x \
vmalert-tool-linux-s390x \
vmauth-linux-s390x \
vmbackup-linux-s390x \
vmrestore-linux-s390x \
vmctl-linux-s390x
vmutils-darwin-amd64: \
vmagent-darwin-amd64 \
vmalert-darwin-amd64 \
@@ -266,7 +257,6 @@ release-victoria-metrics: \
release-victoria-metrics-linux-amd64 \
release-victoria-metrics-linux-arm \
release-victoria-metrics-linux-arm64 \
release-victoria-metrics-linux-s390x \
release-victoria-metrics-darwin-amd64 \
release-victoria-metrics-darwin-arm64 \
release-victoria-metrics-freebsd-amd64 \
@@ -285,9 +275,6 @@ release-victoria-metrics-linux-arm:
release-victoria-metrics-linux-arm64:
GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch
release-victoria-metrics-linux-s390x:
GOOS=linux GOARCH=s390x $(MAKE) release-victoria-metrics-goos-goarch
release-victoria-metrics-darwin-amd64:
GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
@@ -327,7 +314,6 @@ release-vmutils: \
release-vmutils-linux-amd64 \
release-vmutils-linux-arm64 \
release-vmutils-linux-arm \
release-vmutils-linux-s390x \
release-vmutils-darwin-amd64 \
release-vmutils-darwin-arm64 \
release-vmutils-freebsd-amd64 \
@@ -346,9 +332,6 @@ release-vmutils-linux-arm64:
release-vmutils-linux-arm:
GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch
release-vmutils-linux-s390x:
GOOS=linux GOARCH=s390x $(MAKE) release-vmutils-goos-goarch
release-vmutils-darwin-amd64:
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
@@ -435,7 +418,7 @@ release-vmutils-windows-goarch: \
vmctl-windows-$(GOARCH)-prod.exe
pprof-cpu:
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics $(PPROF_FILE)
go tool pprof -trim_path=github.com/VictoriaMetrics/VictoriaMetrics@ $(PPROF_FILE)
fmt:
gofmt -l -w -s ./lib
@@ -443,7 +426,7 @@ fmt:
gofmt -l -w -s ./apptest
vet:
go vet -tags 'synctest' ./lib/...
GOEXPERIMENT=synctest go vet ./lib/...
go vet ./app/...
go vet ./apptest/...
@@ -452,52 +435,39 @@ check-all: fmt vet golangci-lint govulncheck
clean-checkers: remove-golangci-lint remove-govulncheck
test:
go test -tags 'synctest' ./lib/... ./app/...
GOEXPERIMENT=synctest go test ./lib/... ./app/...
test-race:
go test -tags 'synctest' -race ./lib/... ./app/...
GOEXPERIMENT=synctest go test -race ./lib/... ./app/...
test-pure:
CGO_ENABLED=0 go test -tags 'synctest' ./lib/... ./app/...
GOEXPERIMENT=synctest CGO_ENABLED=0 go test ./lib/... ./app/...
test-full:
go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
GOEXPERIMENT=synctest go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
test-full-386:
GOARCH=386 go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
integration-test:
$(MAKE) apptest
apptest:
$(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
go test ./apptest/... -skip="^Test(Cluster|Legacy).*"
apptest-legacy: victoria-metrics vmbackup vmrestore
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
VERSION=v1.132.0; \
VMSINGLE=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}.tar.gz; \
VMCLUSTER=victoria-metrics-$${OS}-$${ARCH}-$${VERSION}-cluster.tar.gz; \
URL=https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/$${VERSION}; \
DIR=/tmp/$${VERSION}; \
test -d $${DIR} || (mkdir $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
); \
VM_LEGACY_VMSINGLE_PATH=$${DIR}/victoria-metrics-prod \
VM_LEGACY_VMSTORAGE_PATH=$${DIR}/vmstorage-prod \
go test ./apptest/tests -run="^TestLegacySingle.*"
go test ./apptest/... -skip="^TestCluster.*"
benchmark:
go test -run=NO_TESTS -bench=. ./lib/...
go test -run=NO_TESTS -bench=. ./app/...
GOEXPERIMENT=synctest go test -bench=. ./lib/...
go test -bench=. ./app/...
benchmark-pure:
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./lib/...
CGO_ENABLED=0 go test -run=NO_TESTS -bench=. ./app/...
GOEXPERIMENT=synctest CGO_ENABLED=0 go test -bench=. ./lib/...
CGO_ENABLED=0 go test -bench=. ./app/...
vendor-update:
go get -u ./lib/...
go get -u ./app/...
go mod tidy -compat=1.26
go mod tidy -compat=1.24
go mod vendor
app-local:
@@ -513,15 +483,14 @@ app-local-windows-goarch:
CGO_ENABLED=0 GOOS=windows GOARCH=$(GOARCH) go build $(RACE) -ldflags "$(GO_BUILDINFO)" -tags "$(EXTRA_GO_BUILD_TAGS)" -o bin/$(APP_NAME)-windows-$(GOARCH)$(RACE).exe $(PKG_PREFIX)/app/$(APP_NAME)
quicktemplate-gen: install-qtc
qtc -dir=lib
qtc -dir=app
qtc
install-qtc:
which qtc || go install github.com/valyala/quicktemplate/qtc@latest
golangci-lint: install-golangci-lint
golangci-lint run --build-tags 'synctest'
GOEXPERIMENT=synctest golangci-lint run
install-golangci-lint:
which golangci-lint && (golangci-lint --version | grep -q $(GOLANGCI_LINT_VERSION)) || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell go env GOPATH)/bin v$(GOLANGCI_LINT_VERSION)

View File

@@ -3,7 +3,7 @@
[![Latest Release](https://img.shields.io/github/v/release/VictoriaMetrics/VictoriaMetrics?sort=semver&label=&filter=!*-victorialogs&logo=github&labelColor=gray&color=gray&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Freleases%2Flatest)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
![Docker Pulls](https://img.shields.io/docker/pulls/victoriametrics/victoria-metrics?label=&logo=docker&logoColor=white&labelColor=2496ED&color=2496ED&link=https%3A%2F%2Fhub.docker.com%2Fr%2Fvictoriametrics%2Fvictoria-metrics)
[![Go Report](https://goreportcard.com/badge/github.com/VictoriaMetrics/VictoriaMetrics?link=https%3A%2F%2Fgoreportcard.com%2Freport%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics)](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml/badge.svg?branch=master&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Factions)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml)
[![Build Status](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml/badge.svg?branch=master&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Factions)](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml)
[![codecov](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics/branch/master/graph/badge.svg?link=https%3A%2F%2Fcodecov.io%2Fgh%2FVictoriaMetrics%2FVictoriaMetrics)](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
[![License](https://img.shields.io/github/license/VictoriaMetrics/VictoriaMetrics?labelColor=green&label=&link=https%3A%2F%2Fgithub.com%2FVictoriaMetrics%2FVictoriaMetrics%2Fblob%2Fmaster%2FLICENSE)](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
![Slack](https://img.shields.io/badge/Join-4A154B?logo=slack&link=https%3A%2F%2Fslack.victoriametrics.com)
@@ -16,21 +16,16 @@
<img src="docs/victoriametrics/logo.webp" width="300" alt="VictoriaMetrics logo">
</picture>
VictoriaMetrics is a fast, cost-effective, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
VictoriaMetrics is a fast, cost-saving, and scalable solution for monitoring and managing time series data. It delivers high performance and reliability, making it an ideal choice for businesses of all sizes.
Here are some resources and information about VictoriaMetrics:
- **Case studies**: [Grammarly, Roblox, Wix, Spotify,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
- **Available**: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), Docker images on [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
- **Deployment types**: [Single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) under [Apache License 2.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE).
- **Getting started:** Read [key concepts](https://docs.victoriametrics.com/victoriametrics/keyconcepts/) and follow the
[quick start guide](https://docs.victoriametrics.com/victoriametrics/quick-start/).
- **Community**: [Slack](https://slack.victoriametrics.com/) (join via [Slack Inviter](https://slack.victoriametrics.com/)), [X (Twitter)](https://x.com/VictoriaMetrics), [YouTube](https://www.youtube.com/@VictoriaMetrics). See full list [here](https://docs.victoriametrics.com/victoriametrics/#community-and-contributions).
- **Changelog**: Project evolves fast - check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).
- **Enterprise support:** [Contact us](mailto:info@victoriametrics.com) for commercial support with additional [enterprise features](https://docs.victoriametrics.com/victoriametrics/enterprise/).
- **Enterprise releases:** Enterprise and [long-term support releases (LTS)](https://docs.victoriametrics.com/victoriametrics/lts-releases/) are publicly available and can be evaluated for free
using a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
- **Security:** we achieved [security certifications](https://victoriametrics.com/security/) for Database Software Development and Software-Based Monitoring Services.
- Documentation: [docs.victoriametrics.com](https://docs.victoriametrics.com)
- Case studies: [Grammarly, Roblox, Wix,...](https://docs.victoriametrics.com/victoriametrics/casestudies/).
- Available: [Binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), docker images [Docker Hub](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and [Quay](https://quay.io/repository/victoriametrics/victoria-metrics), [Source code](https://github.com/VictoriaMetrics/VictoriaMetrics)
- Deployment types: [Single-node version](https://docs.victoriametrics.com/), [Cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/), and [Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/)
- Changelog: [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/), and [How to upgrade](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics)
- Community: [Slack](https://slack.victoriametrics.com/), [X (Twitter)](https://x.com/VictoriaMetrics), [LinkedIn](https://www.linkedin.com/company/victoriametrics/), [YouTube](https://www.youtube.com/@VictoriaMetrics)
Yes, we open-source both the single-node VictoriaMetrics and the cluster version.

View File

@@ -4,11 +4,12 @@
The following versions of VictoriaMetrics receive regular security fixes:
| Version | Supported |
|--------------------------------------------------------------------------------|--------------------|
| [Latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
| [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| other releases | :x: |
| Version | Supported |
|---------|--------------------|
| [latest release](https://docs.victoriametrics.com/victoriametrics/changelog/) | :white_check_mark: |
| v1.102.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| v1.110.x [LTS line](https://docs.victoriametrics.com/victoriametrics/lts-releases/) | :white_check_mark: |
| other releases | :x: |
See [this page](https://victoriametrics.com/security/) for more details.

View File

@@ -27,9 +27,6 @@ victoria-metrics-linux-ppc64le-prod:
victoria-metrics-linux-386-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386
victoria-metrics-linux-s390x-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-s390x
victoria-metrics-darwin-amd64-prod:
APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64

View File

@@ -134,7 +134,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>Single-node VictoriaMetrics</h2></br>")
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/'>https://docs.victoriametrics.com/</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{
@@ -170,7 +169,7 @@ func usage() {
const s = `
victoria-metrics is a time series database and monitoring solution.
See the docs at https://docs.victoriametrics.com/victoriametrics/
See the docs at https://docs.victoriametrics.com/
`
flagutil.Usage(s)
}

View File

@@ -10,11 +10,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
)
@@ -29,9 +27,11 @@ var selfScraperWG sync.WaitGroup
func startSelfScraper() {
selfScraperStopCh = make(chan struct{})
selfScraperWG.Go(func() {
selfScraperWG.Add(1)
go func() {
defer selfScraperWG.Done()
selfScraper(*selfScrapeInterval)
})
}()
}
func stopSelfScraper() {
@@ -48,7 +48,6 @@ func selfScraper(scrapeInterval time.Duration) {
var bb bytesutil.ByteBuffer
var rows prometheus.Rows
var metadataRows prometheus.MetadataRows
var mrs []storage.MetricRow
var labels []prompb.Label
t := time.NewTicker(scrapeInterval)
@@ -58,12 +57,8 @@ func selfScraper(scrapeInterval time.Duration) {
appmetrics.WritePrometheusMetrics(&bb)
s := bytesutil.ToUnsafeString(bb.B)
rows.Reset()
// Parse metrics and optionally metadata when enabled
if prommetadata.IsEnabled() {
rows, metadataRows = prometheus.UnmarshalWithMetadata(rows, metadataRows, s, nil)
} else {
rows.UnmarshalWithErrLogger(s, nil)
}
// VictoriaMetrics components don't expose metadata yet, only need to parse samples
rows.UnmarshalWithErrLogger(s, nil)
mrs = mrs[:0]
for i := range rows.Rows {
r := &rows.Rows[i]
@@ -96,19 +91,6 @@ func selfScraper(scrapeInterval time.Duration) {
if err := vmstorage.AddRows(mrs); err != nil {
logger.Errorf("cannot store self-scraped metrics: %s", err)
}
if len(metadataRows.Rows) > 0 {
mms := make([]metricsmetadata.Row, 0, len(metadataRows.Rows))
for _, mm := range metadataRows.Rows {
mms = append(mms, metricsmetadata.Row{
MetricFamilyName: bytesutil.ToUnsafeBytes(mm.Metric),
Help: bytesutil.ToUnsafeBytes(mm.Help),
Type: mm.Type,
})
}
if err := vmstorage.AddMetadataRows(mms); err != nil {
logger.Errorf("cannot store self-scraped metrics metadata: %s", err)
}
}
}
for {
select {

View File

@@ -27,9 +27,6 @@ vmagent-linux-ppc64le-prod:
vmagent-linux-386-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-386
vmagent-linux-s390x-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-linux-s390x
vmagent-darwin-amd64-prod:
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64

View File

@@ -27,7 +27,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/promremotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/vmimport"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@@ -75,7 +74,7 @@ var (
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config and /remotewrite-.*-config pages. It must be passed via authKey query arg. It overrides -httpAuth.*")
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config page. It must be passed via authKey query arg. It overrides -httpAuth.*")
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
@@ -245,7 +244,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, "<h2>vmagent</h2>")
fmt.Fprintf(w, "Version %s<br>", buildinfo.Version)
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victoriametrics/vmagent/'>https://docs.victoriametrics.com/victoriametrics/vmagent/</a></br>")
fmt.Fprintf(w, "Useful endpoints:</br>")
httpserver.WriteAPIHelp(w, [][2]string{
@@ -254,8 +252,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
{"metric-relabel-debug", "debug metric relabeling"},
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
{"config", "-promscrape.config contents"},
{"remotewrite-relabel-config", "-remoteWrite.relabelConfig contents"},
{"remotewrite-url-relabel-config", "-remoteWrite.urlRelabelConfig contents"},
{"metrics", "available service metrics"},
{"flags", "command-line flags"},
{"-/reload", "reload configuration"},
@@ -352,17 +348,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
}
firehose.WriteSuccessResponse(w, r)
return true
case "/zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(nil, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "/newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
@@ -492,42 +477,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
promscrape.WriteConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/remotewrite-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
remotewrite.WriteRelabelConfigData(w)
return true
case "/api/v1/status/remotewrite-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteStatusRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
remotewrite.WriteRelabelConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/remotewrite-url-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteURLRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
remotewrite.WriteURLRelabelConfigData(w)
return true
case "/api/v1/status/remotewrite-url-relabel-config":
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
return true
}
remoteWriteStatusURLRelabelConfigRequests.Inc()
w.Header().Set("Content-Type", "application/json")
var bb bytesutil.ByteBuffer
remotewrite.WriteURLRelabelConfigData(&bb)
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
return true
case "/prometheus/-/reload", "/-/reload":
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
return true
@@ -657,17 +606,6 @@ func processMultitenantRequest(w http.ResponseWriter, r *http.Request, path stri
}
firehose.WriteSuccessResponse(w, r)
return true
case "zabbixconnector/api/v1/history":
zabbixconnectorHistoryRequests.Inc()
if err := zabbixconnector.InsertHandlerForHTTP(at, r); err != nil {
zabbixconnectorHistoryErrors.Inc()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, `{"error":%q}`, err.Error())
return true
}
w.WriteHeader(http.StatusOK)
return true
case "newrelic":
newrelicCheckRequest.Inc()
w.Header().Set("Content-Type", "application/json")
@@ -789,9 +727,6 @@ var (
opentelemetryPushRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
opentelemetryPushErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/opentelemetry/v1/metrics", protocol="opentelemetry"}`)
zabbixconnectorHistoryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
zabbixconnectorHistoryErrors = metrics.NewCounter(`vmagent_http_request_errors_total{path="/zabbixconnector/api/v1/history", protocol="zabbixconnector"}`)
newrelicWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
newrelicWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/newrelic/infra/v2/metrics/events/bulk", protocol="newrelic"}`)
@@ -812,12 +747,6 @@ var (
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`)
remoteWriteRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-relabel-config"}`)
remoteWriteStatusRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-relabel-config"}`)
remoteWriteURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-url-relabel-config"}`)
remoteWriteStatusURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-url-relabel-config"}`)
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
)

View File

@@ -78,7 +78,7 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompb.Label)
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(samplesCount)
rowsInserted.Add(len(rows))
if at != nil {
rowsTenantInserted.Get(at).Add(samplesCount)
}

View File

@@ -2,14 +2,13 @@ package opentelemetry
import (
"fmt"
"io"
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/firehose"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
@@ -25,13 +24,6 @@ var (
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
)
// InsertHandlerForReader processes metrics from given reader.
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
return insertRows(at, tss, mms, nil)
})
}
// InsertHandler processes opentelemetry metrics.
func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
@@ -76,7 +68,7 @@ func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMeta
ctx.WriteRequest.Timeseries = tssDst
var metadataTotal int
if prommetadata.IsEnabled() {
if promscrape.IsMetadataEnabled() {
var accountID, projectID uint32
if at != nil {
accountID = at.AccountID

View File

@@ -7,8 +7,8 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
@@ -36,7 +36,7 @@ func InsertHandler(at *auth.Token, req *http.Request) error {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, defaultTimestamp, encoding, true, prommetadata.IsEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
return stream.Parse(req.Body, defaultTimestamp, encoding, true, promscrape.IsMetadataEnabled(), func(rows []prometheus.Row, mms []prometheus.Metadata) error {
return insertRows(at, rows, mms, extraLabels)
}, func(s string) {
httpserver.LogError(req, s)

View File

@@ -6,8 +6,8 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/promremotewrite/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
@@ -71,7 +71,7 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
ctx.WriteRequest.Timeseries = tssDst
var metadataTotal int
if prommetadata.IsEnabled() {
if promscrape.IsMetadataEnabled() {
var accountID, projectID uint32
if at != nil {
accountID = at.AccountID

View File

@@ -15,6 +15,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@@ -202,10 +203,14 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
return float64(concurrency)
return float64(*queues)
})
for range concurrency {
c.wg.Go(c.runWorker)
for i := 0; i < concurrency; i++ {
c.wg.Add(1)
go func() {
defer c.wg.Done()
c.runWorker()
}()
}
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
}
@@ -458,6 +463,12 @@ again:
// - Real-world implementations of v1 use both 400 and 415 status codes.
// See more in research: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/8462#issuecomment-2786918054
case 415, 400:
if c.canDowngradeVMProto.Swap(false) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Downgrading protocol from VictoriaMetrics to Prometheus remote write for all future requests. "+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
c.useVMProto.Store(false)
}
if encoding.IsZstd(block) {
logger.Infof("received unsupported media type or bad request from remote storage at %q. Re-packing the block to Prometheus remote write and retrying."+
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol", c.sanitizedURL)
@@ -549,9 +560,9 @@ func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.D
// For more details, see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9417
func repackBlockFromZstdToSnappy(zstdBlock []byte) ([]byte, error) {
plainBlock := make([]byte, 0, len(zstdBlock)*2)
plainBlock, err := encoding.DecompressZSTD(plainBlock, zstdBlock)
plainBlock, err := zstd.Decompress(plainBlock, zstdBlock)
if err != nil {
return nil, err
return nil, fmt.Errorf("zstd: decompress: %s", err)
}
return snappy.Encode(nil, plainBlock), nil

View File

@@ -93,7 +93,10 @@ func TestParseRetryAfterHeader(t *testing.T) {
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
func helper(d time.Duration) time.Duration {
dv := min(d/10, 10*time.Second)
dv := d / 10
if dv > 10*time.Second {
dv = 10 * time.Second
}
return d + dv
}

View File

@@ -48,7 +48,11 @@ func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite *atomic.Boo
ps.wr.significantFigures = significantFigures
ps.wr.roundDigits = roundDigits
ps.stopCh = make(chan struct{})
ps.periodicFlusherWG.Go(ps.periodicFlusher)
ps.periodicFlusherWG.Add(1)
go func() {
defer ps.periodicFlusherWG.Done()
ps.periodicFlusher()
}()
return &ps
}

View File

@@ -3,24 +3,22 @@ package remotewrite
import (
"flag"
"fmt"
"io"
"strconv"
"strings"
"sync"
"sync/atomic"
"github.com/VictoriaMetrics/metrics"
"gopkg.in/yaml.v2"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/metrics"
)
var (
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to all -remoteWrite.url.")
unparsedLabelsGlobal = flagutil.NewArrayString("remoteWrite.label", "Optional label in the form 'name=value' to add to all the metrics before sending them to -remoteWrite.url. "+
"Pass multiple -remoteWrite.label flags in order to add multiple labels to metrics before sending them to remote storage")
relabelConfigPathGlobal = flag.String("remoteWrite.relabelConfig", "", "Optional path to file with relabeling configs, which are applied "+
"to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. "+
"The path can point either to local file or to http url. "+
@@ -34,12 +32,9 @@ var (
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
)
var labelsGlobal []prompb.Label
var (
labelsGlobal []prompb.Label
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
remoteWriteURLRelabelConfigData atomic.Pointer[[]interface{}]
relabelConfigReloads *metrics.Counter
relabelConfigReloadErrors *metrics.Counter
relabelConfigSuccess *metrics.Gauge
@@ -72,42 +67,6 @@ func initRelabelConfigs() {
}
}
// WriteRelabelConfigData writes -remoteWrite.relabelConfig contents to w
func WriteRelabelConfigData(w io.Writer) {
p := remoteWriteRelabelConfigData.Load()
if p == nil {
// Nothing to write to w
return
}
_, _ = w.Write(*p)
}
// WriteURLRelabelConfigData writes -remoteWrite.urlRelabelConfig contents to w
func WriteURLRelabelConfigData(w io.Writer) {
p := remoteWriteURLRelabelConfigData.Load()
if p == nil {
// Nothing to write to w
return
}
type urlRelabelCfg struct {
Url string `yaml:"url"`
RelabelConfig interface{} `yaml:"relabel_config"`
}
var cs []urlRelabelCfg
for i, url := range *remoteWriteURLs {
cfgData := (*p)[i]
if !*showRemoteWriteURL {
url = fmt.Sprintf("%d:secret-url", i+1)
}
cs = append(cs, urlRelabelCfg{
Url: url,
RelabelConfig: cfgData,
})
}
d, _ := yaml.Marshal(cs)
_, _ = w.Write(d)
}
func reloadRelabelConfigs() {
rcs := allRelabelConfigs.Load()
if !rcs.isSet() {
@@ -131,43 +90,28 @@ func reloadRelabelConfigs() {
func loadRelabelConfigs() (*relabelConfigs, error) {
var rcs relabelConfigs
if *relabelConfigPathGlobal != "" {
global, rawCfg, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
if err != nil {
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
}
remoteWriteRelabelConfigData.Store(&rawCfg)
rcs.global = global
}
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
}
var urlRelabelCfgs []interface{}
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
for i, path := range *relabelConfigPaths {
if len(path) == 0 {
urlRelabelCfgs = append(urlRelabelCfgs, nil)
// Skip empty relabel config.
continue
}
prc, rawCfg, err := promrelabel.LoadRelabelConfigs(path)
prc, err := promrelabel.LoadRelabelConfigs(path)
if err != nil {
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
}
rcs.perURL[i] = prc
var parsedCfg interface{}
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
}
if len(*remoteWriteURLs) > len(*relabelConfigPaths) {
// fill the urlRelabelCfgs with empty relabel configs if not set
for i := len(*relabelConfigPaths); i < len(*remoteWriteURLs); i++ {
urlRelabelCfgs = append(urlRelabelCfgs, nil)
}
}
remoteWriteURLRelabelConfigData.Store(&urlRelabelCfgs)
return &rcs, nil
}
@@ -176,9 +120,19 @@ type relabelConfigs struct {
perURL []*promrelabel.ParsedConfigs
}
// isSet indicates whether (global or per-URL) command-line flags is set
func (rcs *relabelConfigs) isSet() bool {
return *relabelConfigPathGlobal != "" || len(*relabelConfigPaths) > 0
if rcs == nil {
return false
}
if rcs.global.Len() > 0 {
return true
}
for _, pc := range rcs.perURL {
if pc.Len() > 0 {
return true
}
}
return false
}
// initLabelsGlobal must be called after parsing command-line flags.

View File

@@ -27,7 +27,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
"github.com/VictoriaMetrics/metrics"
@@ -59,7 +58,7 @@ var (
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
queues = flagutil.NewArrayInt("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
"isn't enough for sending high volume of collected data to remote storage. "+
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
@@ -176,6 +175,13 @@ func Init() {
})
}
if *queues > maxQueues {
*queues = maxQueues
}
if *queues <= 0 {
*queues = 1
}
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
"see https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages")
@@ -208,7 +214,9 @@ func Init() {
dropDanglingQueues()
// Start config reloader.
configReloaderWG.Go(func() {
configReloaderWG.Add(1)
go func() {
defer configReloaderWG.Done()
for {
select {
case <-configReloaderStopCh:
@@ -218,7 +226,7 @@ func Init() {
reloadRelabelConfigs()
reloadStreamAggrConfigs()
}
})
}()
}
func dropDanglingQueues() {
@@ -258,6 +266,17 @@ func initRemoteWriteCtxs(urls []string) {
if len(urls) == 0 {
logger.Panicf("BUG: urls must be non-empty")
}
maxInmemoryBlocks := memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
if maxInmemoryBlocks / *queues > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 100 * *queues
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
rwctxs := make([]*remoteWriteCtx, len(urls))
rwctxIdx := make([]int, len(urls))
if retryMaxTime.String() != "" {
@@ -272,7 +291,7 @@ func initRemoteWriteCtxs(urls []string) {
if *showRemoteWriteURL {
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
}
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, sanitizedURL)
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
rwctxIdx[i] = i
}
@@ -466,9 +485,6 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
matchIdxs.B = sas.Push(tssBlock, matchIdxs.B)
if !*streamAggrGlobalKeepInput {
tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput)
} else if *streamAggrGlobalDropInput {
// if both keep_input and drop_input are true, we keep only the aggregated series
tssBlock = dropUnaggregatedSeries(tssBlock, matchIdxs.B)
}
matchIdxsPool.Put(matchIdxs)
}
@@ -538,9 +554,11 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
// Push metadata to remote storage systems in parallel to reduce
// the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup
wg.Add(len(rwctxs))
var anyPushFailed atomic.Bool
for _, rwctx := range rwctxs {
wg.Go(func() {
go func(rwctx *remoteWriteCtx) {
defer wg.Done()
if !rwctx.tryPushMetadataInternal(mms) {
rwctx.pushFailures.Inc()
if forceDropSamplesOnFailure {
@@ -549,7 +567,7 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
}
anyPushFailed.Store(true)
}
})
}(rwctx)
}
wg.Wait()
return !anyPushFailed.Load()
@@ -581,13 +599,15 @@ func tryPushTimeSeriesToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prom
// Push tssBlock to remote storage systems in parallel to reduce
// the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup
wg.Add(len(rwctxs))
var anyPushFailed atomic.Bool
for _, rwctx := range rwctxs {
wg.Go(func() {
go func(rwctx *remoteWriteCtx) {
defer wg.Done()
if !rwctx.TryPushTimeSeries(tssBlock, forceDropSamplesOnFailure) {
anyPushFailed.Store(true)
}
})
}(rwctx)
}
wg.Wait()
return !anyPushFailed.Load()
@@ -609,11 +629,13 @@ func tryShardingTimeSeriesAmongRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock
if len(shard) == 0 {
continue
}
wg.Go(func() {
if !rwctx.TryPushTimeSeries(shard, forceDropSamplesOnFailure) {
wg.Add(1)
go func(rwctx *remoteWriteCtx, tss []prompb.TimeSeries) {
defer wg.Done()
if !rwctx.TryPushTimeSeries(tss, forceDropSamplesOnFailure) {
anyPushFailed.Store(true)
}
})
}(rwctx, shard)
}
wg.Wait()
return !anyPushFailed.Load()
@@ -822,7 +844,7 @@ type remoteWriteCtx struct {
rowsDroppedOnPushFailure *metrics.Counter
}
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string) *remoteWriteCtx {
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
// strip query params, otherwise changing params resets pq
pqURL := *remoteWriteURL
pqURL.RawQuery = ""
@@ -837,23 +859,6 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
}
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
queuesSize := queues.GetOptionalArg(argIdx)
if queuesSize > maxQueues {
queuesSize = maxQueues
} else if queuesSize <= 0 {
queuesSize = 1
}
maxInmemoryBlocks := memory.Allowed() / len(*remoteWriteURLs) / *maxRowsPerBlock / 100
if maxInmemoryBlocks/queuesSize > 100 {
// There is no much sense in keeping higher number of blocks in memory,
// since this means that the producer outperforms consumer and the queue
// will continue growing. It is better storing the queue to file.
maxInmemoryBlocks = 100 * queuesSize
}
if maxInmemoryBlocks < 2 {
maxInmemoryBlocks = 2
}
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
return float64(fq.GetPendingBytes())
@@ -871,16 +876,16 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
var c *client
switch remoteWriteURL.Scheme {
case "http", "https":
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, queuesSize)
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
default:
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
}
c.init(argIdx, queuesSize, sanitizedURL)
c.init(argIdx, *queues, sanitizedURL)
// Initialize pss
sf := significantFigures.GetOptionalArg(argIdx)
rd := roundDigits.GetOptionalArg(argIdx)
pssLen := queuesSize
pssLen := *queues
if n := cgroup.AvailableCPUs(); pssLen > n {
// There is no sense in running more than availableCPUs concurrent pendingSeries,
// since every pendingSeries can saturate up to a single CPU.
@@ -983,17 +988,7 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
tss = append(*v, tss...)
}
tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput)
} else if rwctx.streamAggrDropInput {
// if both keep_input and drop_input are true, we keep only the aggregated series
if rctx == nil {
rctx = getRelabelCtx()
// Make a copy of tss before dropping aggregated series
v = tssPool.Get().(*[]prompb.TimeSeries)
tss = append(*v, tss...)
}
tss = dropUnaggregatedSeries(tss, matchIdxs.B)
}
matchIdxsPool.Put(matchIdxs)
}
if rwctx.deduplicator != nil {
@@ -1016,10 +1011,9 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
return false
}
var matchIdxsPool slicesutil.BufferPool[uint32]
var matchIdxsPool bytesutil.ByteBufferPool
// dropAggregatedSeries drops matched series, also the unmatched if dropInput is true.
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput bool) []prompb.TimeSeries {
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []byte, dropInput bool) []prompb.TimeSeries {
dst := src[:0]
if !dropInput {
for i, match := range matchIdxs {
@@ -1034,20 +1028,6 @@ func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput
return dst
}
// dropUnaggregatedSeries drops unmatched series.
func dropUnaggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32) []prompb.TimeSeries {
dst := src[:0]
for i, match := range matchIdxs {
if match == 0 {
continue
}
dst = append(dst, src[i])
}
tail := src[len(dst):]
clear(tail)
return dst
}
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) {
if rwctx.tryPushTimeSeriesInternal(tss) {
return
@@ -1080,7 +1060,7 @@ func (rwctx *remoteWriteCtx) tryPushTimeSeriesInternal(tss []prompb.TimeSeries)
}()
if len(labelsGlobal) > 0 {
// Make a copy of tss before adding extra labels to prevent
// Make a copy of tss before adding extra labels in order to prevent
// from affecting time series for other remoteWrite.url configs.
rctx = getRelabelCtx()
v = tssPool.Get().(*[]prompb.TimeSeries)

View File

@@ -10,8 +10,6 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
@@ -59,8 +57,8 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
f(10)
}
func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string, expectedRowsPushedAfterRelabel, expectedPushedSample int) {
func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string) {
t.Helper()
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
if err != nil {
@@ -73,16 +71,10 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
}
allRelabelConfigs.Store(rcs)
path := "fast-queue-write-test"
fs.MustRemoveDir(path)
fq := persistentqueue.MustOpenFastQueue(path, "test", 100, 0, false)
defer fs.MustRemoveDir(path)
defer fq.MustClose()
pss := make([]*pendingSeries, 1)
isVMProto := &atomic.Bool{}
isVMProto.Store(true)
pss[0] = newPendingSeries(fq, isVMProto, 0, 100)
pss[0] = newPendingSeries(nil, isVMProto, 0, 100)
rwctx := &remoteWriteCtx{
idx: 0,
streamAggrKeepInput: keepInput,
@@ -91,8 +83,6 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
}
defer metrics.UnregisterAllMetrics()
if dedupInterval > 0 {
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
}
@@ -114,27 +104,23 @@ func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
expectedTss := make([]prompb.TimeSeries, len(inputTss))
// check inputTss is not modified after TryPushTimeSeries
// copy inputTss to make sure it is not mutated during TryPush call
copy(expectedTss, inputTss)
if !rwctx.TryPushTimeSeries(inputTss, false) {
t.Fatalf("cannot push samples to rwctx")
}
if int(rwctx.rowsPushedAfterRelabel.Get()) != expectedRowsPushedAfterRelabel {
t.Fatalf("unexpected number of rows after relabel; got %d; want %d", rwctx.rowsPushedAfterRelabel.Get(), expectedRowsPushedAfterRelabel)
}
if len(pss[0].wr.tss) != expectedPushedSample {
t.Fatalf("unexpected number of pushed samples; got %d; want %d", len(pss[0].wr.tss), expectedPushedSample)
}
if !reflect.DeepEqual(expectedTss, inputTss) {
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
}
}
// relabeling
f(``, `
f(`
- interval: 1m
outputs: [sum_samples]
- interval: 2m
outputs: [count_series]
`, `
- action: keep
source_labels: [env]
regex: "dev"
@@ -143,66 +129,53 @@ metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 2, 2)
// relabeling + aggregation
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, `
- action: keep
source_labels: [env]
regex: ".*"
`, false, 0, false, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 2)
// aggregation + keepInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, true, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 4)
// aggregation + dropInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, false, true, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`, 4, 0)
// aggregation + keepInput + dropInput
f(`
- match: '{env="dev"}'
interval: 1m
outputs: [sum_samples]
`, ``, false, 0, true, true, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="bar"} 25
`, 3, 1)
// aggregation + deduplication
`)
f(``, ``, true, time.Hour, false, false, `
metric{env="dev"} 10
metric{env="foo"} 20
metric{env="dev"} 15
metric{env="foo"} 25
`, 4, 0)
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, false, false, `
metric{env="dev"} 10
metric{env="bar"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, true, false, `
metric{env="test"} 10
metric{env="dev"} 20
metric{env="foo"} 15
metric{env="dev"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, false, true, `
metric{env="foo"} 10
metric{env="dev"} 20
metric{env="foo"} 15
metric{env="dev"} 25
`)
f(``, `
- action: keep
source_labels: [env]
regex: "dev"
`, true, time.Hour, true, true, `
metric{env="dev"} 10
metric{env="test"} 20
metric{env="dev"} 15
metric{env="bar"} 25
`)
}
func TestShardAmountRemoteWriteCtx(t *testing.T) {

View File

@@ -18,12 +18,12 @@ var (
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples that match any rule in "+
"-streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
"are written to the remote storage. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop input samples that not matching any rule in "+
"-streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
"are written to the remote storage. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation "+
"with -streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to remote storages write. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation "+
"with -remoteWrite.streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to remote storages write. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
"aggregator before optional aggregation with -streamAggr.config . "+
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
@@ -43,11 +43,11 @@ var (
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop input samples that not matching any rule in "+
"the corresponding -remoteWrite.streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop all the input samples after the aggregation "+
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples that match any rule in "+
"the corresponding -remoteWrite.streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep all the input samples after the aggregation "+
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")

View File

@@ -1,80 +0,0 @@
package zabbixconnector
import (
"net/http"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/zabbixconnector/stream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
)
var (
rowsInserted = metrics.NewCounter(`vmagent_rows_inserted_total{type="zabbixconnector"}`)
rowsTenantInserted = tenantmetrics.NewCounterMap(`vmagent_tenant_inserted_rows_total{type="zabbixconnector"}`)
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="zabbixconnector"}`)
)
// InsertHandlerForHTTP processes remote write for ZabbixConnector POST /zabbixconnector/v1/history request.
func InsertHandlerForHTTP(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)
if err != nil {
return err
}
encoding := req.Header.Get("Content-Encoding")
return stream.Parse(req.Body, encoding, func(rows []zabbixconnector.Row) error {
return insertRows(at, rows, extraLabels)
})
}
func insertRows(at *auth.Token, rows []zabbixconnector.Row, extraLabels []prompb.Label) error {
ctx := common.GetPushCtx()
defer common.PutPushCtx(ctx)
rowsTotal := len(rows)
tssDst := ctx.WriteRequest.Timeseries[:0]
labels := ctx.Labels[:0]
samples := ctx.Samples[:0]
for i := range rows {
r := &rows[i]
labelsLen := len(labels)
for j := range r.Tags {
tag := &r.Tags[j]
labels = append(labels, prompb.Label{
Name: bytesutil.ToUnsafeString(tag.Key),
Value: bytesutil.ToUnsafeString(tag.Value),
})
}
labels = append(labels, extraLabels...)
samplesLen := len(samples)
samples = append(samples, prompb.Sample{
Value: r.Value,
Timestamp: r.Timestamp,
})
tssDst = append(tssDst, prompb.TimeSeries{
Labels: labels[labelsLen:],
Samples: samples[samplesLen:],
})
}
ctx.WriteRequest.Timeseries = tssDst
ctx.Labels = labels
ctx.Samples = samples
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
return remotewrite.ErrQueueFullHTTPRetry
}
rowsInserted.Add(rowsTotal)
if at != nil {
rowsTenantInserted.Get(at).Add(rowsTotal)
}
rowsPerInsert.Update(float64(rowsTotal))
return nil
}

View File

@@ -27,9 +27,6 @@ vmalert-tool-linux-ppc64le-prod:
vmalert-tool-linux-386-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
vmalert-tool-linux-s390x-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-s390x
vmalert-tool-darwin-amd64-prod:
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64

View File

@@ -34,7 +34,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"github.com/VictoriaMetrics/metrics"
)
@@ -85,8 +84,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
defer server.Close()
} else {
httpListenAddr = httpListenPort
ln, err := net.Listen(netutil.GetTCPNetwork(), fmt.Sprintf(":%s", httpListenPort))
ln, err := net.Listen("tcp", fmt.Sprintf(":%s", httpListenPort))
if err != nil {
logger.Fatalf("cannot listen on port %s: %v", httpListenPort, err)
}
@@ -132,7 +130,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
}
labels[s[:n]] = s[n+1:]
}
err = notifier.Init(labels, externalURL)
_, err = notifier.Init(nil, labels, externalURL)
if err != nil {
logger.Fatalf("failed to init notifier: %v", err)
}
@@ -379,7 +377,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
if len(g.Rules) == 0 {
continue
}
errs := g.ExecOnce(context.Background(), rw, ts)
errs := g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, rw, ts)
for err := range errs {
if err != nil {
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,

View File

@@ -27,9 +27,6 @@ vmalert-linux-ppc64le-prod:
vmalert-linux-386-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-386
vmalert-linux-s390x-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-linux-s390x
vmalert-darwin-amd64-prod:
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64

View File

@@ -31,7 +31,7 @@ type Group struct {
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
EvalDelay *promutil.Duration `yaml:"eval_delay,omitempty"`
Limit *int `yaml:"limit,omitempty"`
Limit int `yaml:"limit,omitempty"`
Rules []Rule `yaml:"rules"`
Concurrency int `yaml:"concurrency"`
// Labels is a set of label value pairs, that will be added to every rule.
@@ -91,8 +91,8 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
if g.EvalOffset != nil && g.EvalDelay != nil {
return fmt.Errorf("eval_offset cannot be used with eval_delay")
}
if g.Limit != nil && *g.Limit < 0 {
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", *g.Limit)
if g.Limit < 0 {
return fmt.Errorf("invalid limit %d, shouldn't be less than 0", g.Limit)
}
if g.Concurrency < 0 {
return fmt.Errorf("invalid concurrency %d, shouldn't be less than 0", g.Concurrency)

View File

@@ -116,7 +116,7 @@ func TestParse_Failure(t *testing.T) {
f([]string{"testdata/rules/rules_interval_bad.rules"}, "eval_offset should be smaller than interval")
f([]string{"testdata/rules/rules0-bad.rules"}, "unexpected token")
f([]string{"testdata/dir/rules0-bad.rules"}, "invalid annotations")
f([]string{"testdata/dir/rules0-bad.rules"}, "error parsing annotation")
f([]string{"testdata/dir/rules1-bad.rules"}, "duplicate in file")
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
@@ -181,10 +181,9 @@ func TestGroupValidate_Failure(t *testing.T) {
EvalOffset: promutil.NewDuration(2 * time.Minute),
}, false, "eval_offset should be smaller than interval")
limit := -1
f(&Group{
Name: "wrong limit",
Limit: &limit,
Limit: -1,
}, false, "invalid limit")
f(&Group{
@@ -343,6 +342,7 @@ func TestGroupValidate_Failure(t *testing.T) {
},
},
}, true, "bad prometheus expr")
}
func TestGroupValidate_Success(t *testing.T) {

View File

@@ -76,14 +76,11 @@ func (t *Type) ValidateExpr(expr string) error {
if err != nil {
return fmt.Errorf("bad LogsQL expr: %q, err: %w", expr, err)
}
labels, err := q.GetStatsLabels()
if err != nil {
return fmt.Errorf("cannot obtain labels from LogsQL expr: %q, err: %w", expr, err)
}
for i := range labels {
fields, _ := q.GetStatsByFields()
for i := range fields {
// VictoriaLogs inserts `_time` field as a label in result when query with `stats by (_time:step)`,
// making the result meaningless and may lead to cardinality issues.
if labels[i] == "_time" {
if fields[i] == "_time" {
return fmt.Errorf("bad LogsQL expr: %q, err: cannot contain time buckets stats pipe `stats by (_time:step)`", expr)
}
}

View File

@@ -173,26 +173,22 @@ func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result,
return Result{}, nil, fmt.Errorf("second attempt: %w", err)
}
}
defer func() { _ = resp.Body.Close() }()
// Process the received response.
var parseFn func(resp *http.Response) (Result, error)
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType {
case datasourcePrometheus:
parseFn = parsePrometheusInstantResponse
parseFn = parsePrometheusResponse
case datasourceGraphite:
parseFn = parseGraphiteResponse
case datasourceVLogs:
parseFn = parseVLogsInstantResponse
parseFn = parseVLogsResponse
default:
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
}
result, err := parseFn(resp)
if err != nil {
return Result{}, nil, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
}
return result, req, nil
result, err := parseFn(req, resp)
_ = resp.Body.Close()
return result, req, err
}
// QueryRange executes the given query on the given time range.
@@ -233,23 +229,19 @@ func (c *Client) QueryRange(ctx context.Context, query string, start, end time.T
return res, fmt.Errorf("second attempt: %w", err)
}
}
defer func() { _ = resp.Body.Close() }()
// Process the received response.
var parseFn func(resp *http.Response) (Result, error)
var parseFn func(req *http.Request, resp *http.Response) (Result, error)
switch c.dataSourceType {
case datasourcePrometheus:
parseFn = parsePrometheusRangeResponse
parseFn = parsePrometheusResponse
case datasourceVLogs:
parseFn = parseVLogsRangeResponse
parseFn = parseVLogsResponse
default:
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
}
res, err = parseFn(resp)
if err != nil {
return Result{}, fmt.Errorf("error parsing response from %q: %w", req.URL.Redacted(), err)
}
res, err = parseFn(req, resp)
_ = resp.Body.Close()
return res, err
}

View File

@@ -33,10 +33,10 @@ func (r graphiteResponse) metrics() []Metric {
return ms
}
func parseGraphiteResponse(resp *http.Response) (Result, error) {
func parseGraphiteResponse(req *http.Request, resp *http.Response) (Result, error) {
r := &graphiteResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
return Result{}, fmt.Errorf("error parsing graphite metrics: %w", err)
return Result{}, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
}
return Result{Data: r.metrics()}, nil
}

View File

@@ -172,26 +172,17 @@ const (
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
)
func parsePromResponse(resp *http.Response) (*promResponse, error) {
func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
r := &promResponse{}
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
return res, fmt.Errorf("error parsing response from %s: %w", req.URL.Redacted(), err)
}
if r.Status == statusError {
return nil, fmt.Errorf("response error %q: %s", r.ErrorType, r.Error)
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
}
if r.Status != statusSuccess {
return nil, fmt.Errorf("unknown response status %q", r.Status)
return res, fmt.Errorf("unknown status: %s, Expected success or error", r.Status)
}
return r, nil
}
func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error) {
r, err := parsePromResponse(resp)
if err != nil {
return res, fmt.Errorf("failed to parse response: %w", err)
}
var parseFn func() ([]Metric, error)
switch r.Data.ResultType {
case rtVector:
@@ -200,6 +191,12 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
}
parseFn = pi.metrics
case rtMatrix:
var pr promRange
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
return res, err
}
parseFn = pr.metrics
case rScalar:
var ps promScalar
if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
@@ -209,6 +206,7 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
default:
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
}
ms, err := parseFn()
if err != nil {
return res, err
@@ -224,34 +222,6 @@ func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error)
return res, nil
}
func parsePrometheusRangeResponse(resp *http.Response) (res Result, err error) {
r, err := parsePromResponse(resp)
if err != nil {
return res, fmt.Errorf("failed to parse response: %w", err)
}
if r.Data.ResultType != rtMatrix {
return res, fmt.Errorf("unexpected result type %q; expected result type %q", r.Data.ResultType, rtMatrix)
}
var pr promRange
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
return res, err
}
ms, err := pr.metrics()
if err != nil {
return res, err
}
res = Result{Data: ms, IsPartial: r.IsPartial}
if r.Stats.SeriesFetched != nil {
intV, err := strconv.Atoi(*r.Stats.SeriesFetched)
if err != nil {
return res, fmt.Errorf("failed to convert stats.seriesFetched to int: %w", err)
}
res.SeriesFetched = &intV
}
return res, nil
}
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
if c.appendTypePrefix {
r.URL.Path += "/prometheus"

View File

@@ -65,23 +65,21 @@ func TestVMInstantQuery(t *testing.T) {
case 3:
w.Write([]byte(`{"status":"unknown"}`))
case 4:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector"}}`))
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
case 5:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
case 6:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
case 7:
case 6:
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
case 8:
case 7:
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
case 9:
case 8:
w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
}
})
mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) {
c++
switch c {
case 10:
case 9:
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
}
})
@@ -104,9 +102,9 @@ func TestVMInstantQuery(t *testing.T) {
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
}
switch c {
case 11:
case 10:
w.Write([]byte("[]"))
case 12:
case 11:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
}
})
@@ -125,7 +123,6 @@ func TestVMInstantQuery(t *testing.T) {
ts := time.Now()
expErr := func(query, err string) {
t.Helper()
_, _, gotErr := pq.Query(ctx, query, ts)
if gotErr == nil {
t.Fatalf("expected %q got nil", err)
@@ -138,11 +135,10 @@ func TestVMInstantQuery(t *testing.T) {
expErr(vmQuery, "500") // 0
expErr(vmQuery, "error parsing response") // 1
expErr(vmQuery, "response error") // 2
expErr(vmQuery, "unknown response status") // 3
expErr(vmQuery, "unknown status") // 3
expErr(vmQuery, "unexpected end of JSON input") // 4
expErr(vmQuery, "unknown result type") // 5
res, _, err := pq.Query(ctx, vmQuery, ts) // 6 - vector
res, _, err := pq.Query(ctx, vmQuery, ts) // 5 - vector
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -163,7 +159,7 @@ func TestVMInstantQuery(t *testing.T) {
}
metricsEqual(t, res.Data, expected)
res, req, err := pq.Query(ctx, vmQuery, ts) // 7 - scalar
res, req, err := pq.Query(ctx, vmQuery, ts) // 6 - scalar
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -188,7 +184,7 @@ func TestVMInstantQuery(t *testing.T) {
res.SeriesFetched)
}
res, _, err = pq.Query(ctx, vmQuery, ts) // 8 - scalar with stats
res, _, err = pq.Query(ctx, vmQuery, ts) // 7 - scalar with stats
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -209,7 +205,7 @@ func TestVMInstantQuery(t *testing.T) {
*res.SeriesFetched)
}
res, _, err = pq.Query(ctx, vmQuery, ts) // 9
res, _, err = pq.Query(ctx, vmQuery, ts) // 8
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -220,7 +216,7 @@ func TestVMInstantQuery(t *testing.T) {
// test graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
res, _, err = gq.Query(ctx, queryRender, ts) // 10 - graphite
res, _, err = gq.Query(ctx, queryRender, ts) // 9 - graphite
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -240,9 +236,9 @@ func TestVMInstantQuery(t *testing.T) {
vlogs := datasourceVLogs
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
expErr(vlogsQuery, "error parsing response") // 11
expErr(vlogsQuery, "error parsing response") // 10
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 12
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 11
if err != nil {
t.Fatalf("unexpected %s", err)
}
@@ -394,8 +390,6 @@ func TestVMRangeQuery(t *testing.T) {
switch c {
case 0:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
case 1:
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[1583786142, "1"]}}`))
}
})
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
@@ -428,7 +422,7 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
}
switch c {
case 2:
case 1:
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
}
})
@@ -452,13 +446,13 @@ func TestVMRangeQuery(t *testing.T) {
start, end := time.Now().Add(-time.Minute), time.Now()
res, err := pq.QueryRange(ctx, vmQuery, start, end) // case 0
res, err := pq.QueryRange(ctx, vmQuery, start, end)
if err != nil {
t.Fatalf("unexpected %s", err)
}
m := res.Data
if len(m) != 1 {
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
}
expected := Metric{
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}},
@@ -469,9 +463,6 @@ func TestVMRangeQuery(t *testing.T) {
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
}
_, err = pq.QueryRange(ctx, vmQuery, start, end) // case 1
expectError(t, err, "unexpected result type")
// test unsupported graphite
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})

View File

@@ -40,28 +40,8 @@ func (c *Client) setVLogsRangeReqParams(r *http.Request, query string, start, en
c.setReqParams(r, query)
}
func parseVLogsInstantResponse(resp *http.Response) (res Result, err error) {
res, err = parsePrometheusInstantResponse(resp)
if err != nil {
return Result{}, err
}
for i := range res.Data {
m := &res.Data[i]
for j := range m.Labels {
// reserve the stats func result name with a new label `stats_result` instead of dropping it,
// since there could be multiple stats results in a single query, for instance:
// _time:5m | stats quantile(0.5, request_duration_seconds) p50, quantile(0.9, request_duration_seconds) p90
if m.Labels[j].Name == "__name__" {
m.Labels[j].Name = "stats_result"
break
}
}
}
return
}
func parseVLogsRangeResponse(resp *http.Response) (res Result, err error) {
res, err = parsePrometheusRangeResponse(resp)
func parseVLogsResponse(req *http.Request, resp *http.Response) (res Result, err error) {
res, err = parsePrometheusResponse(req, resp)
if err != nil {
return Result{}, err
}

View File

@@ -132,7 +132,10 @@ func (ls Labels) String() string {
// a=[]Label{{Name: "a", Value: "2"}},b=[]Label{{Name: "a", Value: "1"}}, return 1
// a=[]Label{{Name: "a", Value: "1"}},b=[]Label{{Name: "a", Value: "1"}}, return 0
func LabelCompare(a, b Labels) int {
l := min(len(b), len(a))
l := len(a)
if len(b) < l {
l = len(b)
}
for i := 0; i < l; i++ {
if a[i].Name != b[i].Name {

View File

@@ -7,6 +7,7 @@ import (
"net/url"
"os"
"sort"
"strconv"
"strings"
"sync"
"time"
@@ -76,12 +77,15 @@ absolute path to all .tpl files in root.
`Link to VMUI: -external.alert.source='vmui/#/?g0.expr={{.Expr|queryEscape}}'. `+
`If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
externalLabels = flagutil.NewArrayString("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
"In case of conflicts, original labels are kept with prefix 'exported_'.")
"In case of conflicts, original labels are kept with prefix `exported_`.")
dryRun = flag.Bool("dryRun", false, "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified.")
)
var extURL *url.URL
var (
alertURLGeneratorFn notifier.AlertURLGenerator
extURL *url.URL
)
func main() {
// Write flags and help message to stdout, since it is easier to grep or pipe.
@@ -117,7 +121,7 @@ func main() {
return
}
err = notifier.InitAlertURLGeneratorFn(extURL, *externalAlertSource, *validateTemplates)
alertURLGeneratorFn, err = getAlertURLGenerator(extURL, *externalAlertSource, *validateTemplates)
if err != nil {
logger.Fatalf("failed to init `external.alert.source`: %s", err)
}
@@ -159,7 +163,7 @@ func main() {
ctx, cancel := context.WithCancel(context.Background())
manager, err := newManager(ctx)
if err != nil {
logger.Fatalf("failed to create manager: %s", err)
logger.Fatalf("failed to init: %s", err)
}
logger.Infof("reading rules configuration file from %q", strings.Join(*rulePath, ";"))
groupsCfg, err := config.Parse(*rulePath, validateTplFn, *validateExpressions)
@@ -224,13 +228,14 @@ func newManager(ctx context.Context) (*manager, error) {
labels[s[:n]] = s[n+1:]
}
err = notifier.Init(labels, *externalURL)
nts, err := notifier.Init(alertURLGeneratorFn, labels, *externalURL)
if err != nil {
return nil, fmt.Errorf("failed to init notifier: %w", err)
}
manager := &manager{
groups: make(map[uint64]*rule.Group),
querierBuilder: q,
notifiers: nts,
labels: labels,
}
rw, err := remotewrite.Init(ctx)
@@ -287,6 +292,35 @@ func getHostnameAsExternalURL(addr string, isSecure bool) (*url.URL, error) {
return url.Parse(fmt.Sprintf("%s%s%s", schema, hname, port))
}
func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, validateTemplate bool) (notifier.AlertURLGenerator, error) {
if externalAlertSource == "" {
return func(a notifier.Alert) string {
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, paramGroupID, gID, paramAlertID, aID)
}, nil
}
if validateTemplate {
if err := notifier.ValidateTemplates(map[string]string{
"tpl": externalAlertSource,
}); err != nil {
return nil, fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
}
}
m := map[string]string{
"tpl": externalAlertSource,
}
return func(alert notifier.Alert) string {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
}
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
if err != nil {
logger.Errorf("cannot template alert source: %s", err)
}
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
}, nil
}
func usage() {
const s = `
vmalert processes alerts and recording rules.

View File

@@ -49,6 +49,30 @@ func TestGetExternalURL(t *testing.T) {
}
}
func TestGetAlertURLGenerator(t *testing.T) {
testAlert := notifier.Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
u, _ := url.Parse("https://victoriametrics.com/path")
fn, err := getAlertURLGenerator(u, "", false)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", paramGroupID, paramAlertID)
if exp != fn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
}
_, err = getAlertURLGenerator(nil, "foo?{{invalid}}", true)
if err == nil {
t.Fatalf("expected template validation error got nil")
}
fn, err = getAlertURLGenerator(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != fn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, fn(testAlert))
}
}
func TestConfigReload(t *testing.T) {
originalRulePath := *rulePath
originalExternalURL := extURL
@@ -96,10 +120,9 @@ groups:
querierBuilder: &datasource.FakeQuerier{},
groups: make(map[uint64]*rule.Group),
labels: map[string]string{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
rw: &remotewrite.Client{},
}
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
syncCh := make(chan struct{})
sighupCh := procutil.NewSighupChan()

View File

@@ -3,7 +3,6 @@ package main
import (
"context"
"fmt"
"strconv"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
@@ -17,6 +16,7 @@ import (
// manager controls group states
type manager struct {
querierBuilder datasource.QuerierBuilder
notifiers func() []notifier.Notifier
rw remotewrite.RWClient
// remote read builder.
@@ -29,8 +29,25 @@ type manager struct {
groups map[uint64]*rule.Group
}
// groupAPI generates apiGroup object from group by its ID(hash)
func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
// ruleAPI generates apiRule object from alert by its ID(hash)
func (m *manager) ruleAPI(gID, rID uint64) (apiRule, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
g, ok := m.groups[gID]
if !ok {
return apiRule{}, fmt.Errorf("can't find group with id %d", gID)
}
for _, rule := range g.Rules {
if rule.ID() == rID {
return ruleToAPI(rule), nil
}
}
return apiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
}
// alertAPI generates apiAlert object from alert by its ID(hash)
func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
@@ -38,47 +55,13 @@ func (m *manager) groupAPI(gID uint64) (*rule.ApiGroup, error) {
if !ok {
return nil, fmt.Errorf("can't find group with id %d", gID)
}
return g.ToAPI(), nil
}
// ruleAPI generates apiRule object from alert by its ID(hash)
func (m *manager) ruleAPI(gID, rID uint64) (rule.ApiRule, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
group, ok := m.groups[gID]
if !ok {
return rule.ApiRule{}, fmt.Errorf("can't find group with id %d", gID)
}
g := group.ToAPI()
ruleID := strconv.FormatUint(rID, 10)
for _, r := range g.Rules {
if r.ID == ruleID {
return r, nil
}
}
return rule.ApiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
}
// alertAPI generates apiAlert object from alert by its ID(hash)
func (m *manager) alertAPI(gID, aID uint64) (*rule.ApiAlert, error) {
m.groupsMu.RLock()
defer m.groupsMu.RUnlock()
group, ok := m.groups[gID]
if !ok {
return nil, fmt.Errorf("can't find group with id %d", gID)
}
g := group.ToAPI()
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
ar, ok := r.(*rule.AlertingRule)
if !ok {
continue
}
alertID := strconv.FormatUint(aID, 10)
for _, a := range r.Alerts {
if a.ID == alertID {
return a, nil
}
if apiAlert := alertToAPI(ar, aID); apiAlert != nil {
return apiAlert, nil
}
}
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)
@@ -99,16 +82,17 @@ func (m *manager) close() {
}
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
m.wg.Add(1)
id := g.GetID()
g.Init()
m.wg.Go(func() {
go func() {
defer m.wg.Done()
if restore {
g.Start(ctx, m.rw, m.rr)
g.Start(ctx, m.notifiers, m.rw, m.rr)
} else {
g.Start(ctx, m.rw, nil)
g.Start(ctx, m.notifiers, m.rw, nil)
}
})
}()
m.groups[id] = g
return nil
}
@@ -135,7 +119,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
if rrPresent && m.rw == nil {
return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set")
}
if arPresent && notifier.GetTargets() == nil {
if arPresent && m.notifiers == nil {
return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set")
}
@@ -172,15 +156,15 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
if len(toUpdate) > 0 {
var wg sync.WaitGroup
for _, item := range toUpdate {
oldG := item.old
newG := item.new
wg.Go(func() {
// cancel evaluation so the Update will be applied as fast as possible.
// it is important to call InterruptEval before the update, because cancel fn
// can be re-assigned during the update.
oldG.InterruptEval()
oldG.UpdateWith(newG)
})
wg.Add(1)
// cancel evaluation so the Update will be applied as fast as possible.
// it is important to call InterruptEval before the update, because cancel fn
// can be re-assigned during the update.
item.old.InterruptEval()
go func(oldGroup *rule.Group, newGroup *rule.Group) {
oldGroup.UpdateWith(newGroup)
wg.Done()
}(item.old, item.new)
}
wg.Wait()
}

View File

@@ -40,11 +40,10 @@ func TestManagerEmptyRulesDir(t *testing.T) {
// execution of configuration update.
// Should be executed with -race flag
func TestManagerUpdateConcurrent(t *testing.T) {
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
m := &manager{
groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
}
paths := []string{
"config/testdata/dir/rules0-good.rules",
@@ -65,9 +64,11 @@ func TestManagerUpdateConcurrent(t *testing.T) {
const workers = 500
const iterations = 10
var wg sync.WaitGroup
for n := range workers {
wg.Go(func() {
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func(n int) {
defer wg.Done()
r := rand.New(rand.NewSource(int64(n)))
for i := 0; i < iterations; i++ {
rnd := r.Intn(len(paths))
@@ -77,7 +78,7 @@ func TestManagerUpdateConcurrent(t *testing.T) {
}
_ = m.update(context.Background(), cfg, false)
}
})
}(i)
}
wg.Wait()
}
@@ -126,9 +127,8 @@ func TestManagerUpdate_Success(t *testing.T) {
m := &manager{
groups: make(map[uint64]*rule.Group),
querierBuilder: &datasource.FakeQuerier{},
notifiers: func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} },
}
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
cfgInit := loadCfg(t, []string{initPath}, true, true)
if err := m.update(ctx, cfgInit, false); err != nil {
@@ -259,7 +259,7 @@ func compareGroups(t *testing.T, a, b *rule.Group) {
for i, r := range a.Rules {
got, want := r, b.Rules[i]
if a.CreateID() != b.CreateID() {
t.Fatalf("expected to have rule %d; got %d", want.ID(), got.ID())
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
}
if err := rule.CompareRules(t, want, got); err != nil {
t.Fatalf("comparison error: %s", err)
@@ -277,8 +277,7 @@ func TestManagerUpdate_Failure(t *testing.T) {
rw: rw,
}
if notifiers != nil {
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
m.notifiers = func() []notifier.Notifier { return notifiers }
}
err := m.update(context.Background(), []config.Group{cfg}, false)
if err == nil {

View File

@@ -80,15 +80,14 @@ func (as AlertState) String() string {
// AlertTplData is used to execute templating
type AlertTplData struct {
Type string
Labels map[string]string
Value float64
Expr string
AlertID uint64
GroupID uint64
ActiveAt time.Time
For time.Duration
IsPartial bool
Type string
Labels map[string]string
Value float64
Expr string
AlertID uint64
GroupID uint64
ActiveAt time.Time
For time.Duration
}
var tplHeaders = []string{
@@ -102,7 +101,6 @@ var tplHeaders = []string{
"{{ $groupID := .GroupID }}",
"{{ $activeAt := .ActiveAt }}",
"{{ $for := .For }}",
"{{ $isPartial := .IsPartial }}",
}
// ExecTemplate executes the Alert template for given
@@ -168,8 +166,8 @@ func templateAnnotations(annotations map[string]string, data AlertTplData, tmpl
ctmpl, _ := tmpl.Clone()
ctmpl = ctmpl.Option("missingkey=zero")
if err := templateAnnotation(&buf, builder.String(), tData, ctmpl, execute); err != nil {
r[key] = err.Error()
eg.Add(fmt.Errorf("(key: %q, value: %q): %w", key, text, err))
r[key] = text
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
continue
}
r[key] = buf.String()
@@ -186,13 +184,13 @@ type tplData struct {
func templateAnnotation(dst io.Writer, text string, data tplData, tpl *textTpl.Template, execute bool) error {
tpl, err := tpl.Parse(text)
if err != nil {
return fmt.Errorf("error parsing template: %w", err)
return fmt.Errorf("error parsing annotation template: %w", err)
}
if !execute {
return nil
}
if err = tpl.Execute(dst, data); err != nil {
return fmt.Errorf("error evaluating template: %w", err)
return fmt.Errorf("error evaluating annotation template: %w", err)
}
return nil
}

View File

@@ -20,7 +20,7 @@ func TestAlertExecTemplate(t *testing.T) {
)
extLabels["cluster"] = extCluster
extLabels["dc"] = extDC
err := Init(extLabels, extURL)
_, err := Init(nil, extLabels, extURL)
checkErr(t, err)
f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) {

View File

@@ -3,7 +3,6 @@ package notifier
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"net/http"
@@ -14,6 +13,7 @@ import (
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
@@ -22,11 +22,10 @@ import (
// AlertManager represents integration provider with Prometheus alert manager
// https://github.com/prometheus/alertmanager
type AlertManager struct {
addr *url.URL
argFunc AlertURLGenerator
client *http.Client
timeout time.Duration
lastError string
addr *url.URL
argFunc AlertURLGenerator
client *http.Client
timeout time.Duration
authCfg *promauth.Config
// stores already parsed RelabelConfigs object
@@ -72,42 +71,24 @@ func (am AlertManager) Addr() string {
return am.addr.Redacted()
}
func (am *AlertManager) LastError() string {
return am.lastError
}
// Send an alert or resolve message
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
if len(alerts) != len(alertLabels) {
return fmt.Errorf("mismatched number of alerts and label sets after global alert relabeling")
}
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[string]string) error {
am.metrics.alertsSent.Add(len(alerts))
startTime := time.Now()
err := am.send(ctx, alerts, alertLabels, headers)
err := am.send(ctx, alerts, headers)
am.metrics.alertsSendDuration.UpdateDuration(startTime)
if err != nil {
// the context can be cancelled on graceful shutdown
// or on group update. So no need to handle the error as usual.
if errors.Is(err, context.Canceled) {
return nil
}
am.metrics.alertsSendErrors.Add(len(alerts))
am.lastError = err.Error()
} else {
am.lastError = ""
}
return err
}
func (am *AlertManager) send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[string]string) error {
b := &bytes.Buffer{}
alertsToSend := make([]Alert, 0, len(alerts))
lblss := make([][]prompb.Label, 0, len(alerts))
for i, a := range alerts {
lbls := alertLabels[i]
if am.relabelConfigs != nil {
lbls = am.relabelConfigs.Apply(lbls, 0)
}
for _, a := range alerts {
lbls := a.applyRelabelingIfNeeded(am.relabelConfigs)
if len(lbls) == 0 {
continue
}
@@ -171,6 +152,11 @@ const alertManagerPath = "/api/v2/alerts"
func NewAlertManager(alertManagerURL string, fn AlertURLGenerator, authCfg promauth.HTTPClientConfig,
relabelCfg *promrelabel.ParsedConfigs, timeout time.Duration,
) (*AlertManager, error) {
if err := httputil.CheckURL(alertManagerURL); err != nil {
return nil, fmt.Errorf("invalid alertmanager URL: %w", err)
}
tls := &promauth.TLSConfig{}
if authCfg.TLSConfig != nil {
tls = authCfg.TLSConfig

View File

@@ -11,7 +11,6 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)
@@ -146,11 +145,11 @@ func TestAlertManager_Send(t *testing.T) {
t.Fatalf("unexpected error: %s", err)
}
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
t.Fatalf("expected connection error got nil")
}
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
t.Fatalf("expected wrong http code error got nil")
}
@@ -161,7 +160,7 @@ func TestAlertManager_Send(t *testing.T) {
End: time.Now().UTC(),
Labels: map[string]string{"alertname": "alert0"},
Annotations: map[string]string{"a": "b", "c": "d"},
}}, [][]prompb.Label{{{Name: "alertname", Value: "alert0"}}}, map[string]string{headerKey: "bar"}); err != nil {
}}, map[string]string{headerKey: "bar"}); err != nil {
t.Fatalf("unexpected error %s", err)
}
@@ -175,7 +174,7 @@ func TestAlertManager_Send(t *testing.T) {
Name: "alert2",
Labels: map[string]string{"rule": "test", "tenant": "1"},
},
}, [][]prompb.Label{{{Name: "rule", Value: "test"}, {Name: "tenant", Value: "0"}}, {{Name: "rule", Value: "test"}, {Name: "tenant", Value: "1"}}}, map[string]string{headerKey: "bar"}); err != nil {
}, map[string]string{headerKey: "bar"}); err != nil {
t.Fatalf("unexpected error %s", err)
}
@@ -188,7 +187,7 @@ func TestAlertManager_Send(t *testing.T) {
Name: "alert2",
Labels: map[string]string{},
},
}, [][]prompb.Label{{{Name: "rule", Value: "test"}}, {{}}}, map[string]string{}); err != nil {
}, map[string]string{}); err != nil {
t.Fatalf("unexpected error %s", err)
}

View File

@@ -27,9 +27,15 @@ type Config struct {
// PathPrefix is added to URL path before adding alertManagerPath value
PathPrefix string `yaml:"path_prefix,omitempty"`
ConsulSDConfigs []ConsulSDConfigs `yaml:"consul_sd_configs,omitempty"`
DNSSDConfigs []DNSSDConfigs `yaml:"dns_sd_configs,omitempty"`
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
// ConsulSDConfigs contains list of settings for service discovery via Consul
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"`
// DNSSDConfigs contains list of settings for service discovery via DNS.
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"`
// StaticConfigs contains list of static targets
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
// HTTPClientConfig contains HTTP configuration for Notifier clients
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
@@ -56,29 +62,14 @@ type Config struct {
parsedAlertRelabelConfigs *promrelabel.ParsedConfigs
}
// staticConfig contains list of static targets in the following form:
// StaticConfig contains list of static targets in the following form:
//
// targets:
// [ - '<host>' ]
type StaticConfig struct {
Targets []string `yaml:"targets"`
// HTTPClientConfig contains HTTP configuration for the Targets
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
}
// ConsulSDConfigs contains list of settings for service discovery via Consul,
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
type ConsulSDConfigs struct {
consul.SDConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
}
// DNSSDConfigs contains list of settings for service discovery via DNS,
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
type DNSSDConfigs struct {
dns.SDConfig `yaml:",inline"`
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@@ -104,31 +95,6 @@ func (cfg *Config) UnmarshalYAML(unmarshal func(any) error) error {
}
cfg.parsedAlertRelabelConfigs = arCfg
for _, s := range cfg.StaticConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in static_config: %w", err)
}
}
}
for _, s := range cfg.ConsulSDConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in consul_sd_config: %w", err)
}
}
}
for _, s := range cfg.DNSSDConfigs {
if len(s.AlertRelabelConfigs) > 0 {
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
if err != nil {
return fmt.Errorf("failed to parse alert_relabel_configs in dns_sd_config: %w", err)
}
}
}
b, err := yaml.Marshal(cfg)
if err != nil {
return fmt.Errorf("failed to marshal configuration for checksum: %w", err)

View File

@@ -35,6 +35,4 @@ func TestParseConfig_Failure(t *testing.T) {
f("testdata/unknownFields.bad.yaml", "unknown field")
f("non-existing-file", "error reading")
f("testdata/consul.bad.yaml", "failed to parse alert_relabel_configs in consul_sd_config")
f("testdata/dns.bad.yaml", "failed to parse alert relabeling config")
}

View File

@@ -8,7 +8,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
@@ -29,7 +28,11 @@ type configWatcher struct {
targets map[TargetType][]Target
}
func newWatcher(cfg *Config, gen AlertURLGenerator) (*configWatcher, error) {
func newWatcher(path string, gen AlertURLGenerator) (*configWatcher, error) {
cfg, err := parseConfig(path)
if err != nil {
return nil, err
}
cw := &configWatcher{
cfg: cfg,
wg: sync.WaitGroup{},
@@ -85,15 +88,18 @@ func (cw *configWatcher) reload(path string) error {
return cw.start()
}
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn getTargets) error {
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
for _, err := range errors {
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
}
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
cw.wg.Go(func() {
cw.wg.Add(1)
go func() {
defer cw.wg.Done()
ticker := time.NewTicker(interval)
defer ticker.Stop()
@@ -103,77 +109,62 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn
return
case <-ticker.C:
}
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
for _, err := range errors {
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
}
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
}
})
}()
return nil
}
type targetMetadata struct {
*promutil.Labels
alertRelabelConfigs *promrelabel.ParsedConfigs
}
func getTargetMetadata(targetsFn getTargets, cfg *Config) (map[string]targetMetadata, []error) {
metaLabelsList, alertRelabelCfgs, err := targetsFn()
func getTargetMetadata(labelsFn getLabels, cfg *Config) (map[string]*promutil.Labels, []error) {
metaLabels, err := labelsFn()
if err != nil {
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
}
targetMts := make(map[string]targetMetadata, len(metaLabelsList))
targetMetadata := make(map[string]*promutil.Labels, len(metaLabels))
var errors []error
duplicates := make(map[string]struct{})
for i := range metaLabelsList {
metaLabels := metaLabelsList[i]
alertRelabelCfg := alertRelabelCfgs[i]
for _, labels := range metaLabels {
target := labels.Get("__address__")
u, processedLabels, err := parseLabels(target, labels, cfg)
if err != nil {
errors = append(errors, err)
continue
}
if len(u) == 0 {
continue
}
// check for duplicated targets
// targets with same address but different alert_relabel_configs are still considered duplicates since it's mostly due to misconfiguration and could cause duplicated notifications.
if _, ok := duplicates[u]; ok {
if !*suppressDuplicateTargetErrors {
logger.Errorf("skipping duplicate target with identical address %q; "+
"make sure service discovery and relabeling is set up properly; "+
"original labels: %s; resulting labels: %s",
u, labels, processedLabels)
}
continue
}
duplicates[u] = struct{}{}
targetMts[u] = targetMetadata{
Labels: processedLabels,
alertRelabelConfigs: alertRelabelCfg,
}
for _, labels := range metaLabels {
target := labels.Get("__address__")
u, processedLabels, err := parseLabels(target, labels, cfg)
if err != nil {
errors = append(errors, err)
continue
}
if len(u) == 0 {
continue
}
if _, ok := duplicates[u]; ok { // check for duplicates
if !*suppressDuplicateTargetErrors {
logger.Errorf("skipping duplicate target with identical address %q; "+
"make sure service discovery and relabeling is set up properly; "+
"original labels: %s; resulting labels: %s",
u, labels, processedLabels)
}
continue
}
duplicates[u] = struct{}{}
targetMetadata[u] = processedLabels
}
return targetMts, errors
return targetMetadata, errors
}
type getTargets func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error)
type getLabels func() ([]*promutil.Labels, error)
func (cw *configWatcher) start() error {
if len(cw.cfg.StaticConfigs) > 0 {
var targets []Target
for i, cfg := range cw.cfg.StaticConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.StaticConfigs[i].AlertRelabelConfigs)
for _, cfg := range cw.cfg.StaticConfigs {
httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig)
for _, target := range cfg.Targets {
address, labels, err := parseLabels(target, nil, cw.cfg)
if err != nil {
return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
}
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, alertRelabelConfig, cw.cfg.Timeout.Duration())
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
if err != nil {
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
}
@@ -187,20 +178,17 @@ func (cw *configWatcher) start() error {
}
if len(cw.cfg.ConsulSDConfigs) > 0 {
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
var labels [][]*promutil.Labels
var alertRelabelConfigs []*promrelabel.ParsedConfigs
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]*promutil.Labels, error) {
var labels []*promutil.Labels
for i := range cw.cfg.ConsulSDConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.ConsulSDConfigs[i].AlertRelabelConfigs)
sdc := &cw.cfg.ConsulSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
if err != nil {
return nil, nil, fmt.Errorf("got labels err: %w", err)
return nil, fmt.Errorf("got labels err: %w", err)
}
labels = append(labels, targetLabels)
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
labels = append(labels, targetLabels...)
}
return labels, alertRelabelConfigs, nil
return labels, nil
})
if err != nil {
return fmt.Errorf("failed to start consulSD discovery: %w", err)
@@ -208,21 +196,17 @@ func (cw *configWatcher) start() error {
}
if len(cw.cfg.DNSSDConfigs) > 0 {
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
var labels [][]*promutil.Labels
var alertRelabelConfigs []*promrelabel.ParsedConfigs
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]*promutil.Labels, error) {
var labels []*promutil.Labels
for i := range cw.cfg.DNSSDConfigs {
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.DNSSDConfigs[i].AlertRelabelConfigs)
sdc := &cw.cfg.DNSSDConfigs[i]
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
if err != nil {
return nil, nil, fmt.Errorf("got labels err: %w", err)
return nil, fmt.Errorf("got labels err: %w", err)
}
labels = append(labels, targetLabels)
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
labels = append(labels, targetLabels...)
}
return labels, alertRelabelConfigs, nil
return labels, nil
})
if err != nil {
return fmt.Errorf("failed to start DNSSD discovery: %w", err)
@@ -256,30 +240,30 @@ func (cw *configWatcher) setTargets(key TargetType, targets []Target) {
cw.targetsMu.Unlock()
}
func (cw *configWatcher) updateTargets(key TargetType, targetMts map[string]targetMetadata, cfg *Config, genFn AlertURLGenerator) {
func (cw *configWatcher) updateTargets(key TargetType, targetMetadata map[string]*promutil.Labels, cfg *Config, genFn AlertURLGenerator) {
cw.targetsMu.Lock()
defer cw.targetsMu.Unlock()
oldTargets := cw.targets[key]
var updatedTargets []Target
for _, ot := range oldTargets {
if _, ok := targetMts[ot.Addr()]; !ok {
if _, ok := targetMetadata[ot.Addr()]; !ok {
// if target not exists in currentTargets, close it
ot.Close()
} else {
updatedTargets = append(updatedTargets, ot)
delete(targetMts, ot.Addr())
delete(targetMetadata, ot.Addr())
}
}
// create new resources for the new targets
for addr, metadata := range targetMts {
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, metadata.alertRelabelConfigs, cfg.Timeout.Duration())
for addr, labels := range targetMetadata {
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
if err != nil {
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
continue
}
updatedTargets = append(updatedTargets, Target{
Notifier: am,
Labels: metadata.Labels,
Labels: labels,
})
}

View File

@@ -7,7 +7,6 @@ import (
"net/http/httptest"
"os"
"sync"
"sync/atomic"
"testing"
"time"
@@ -29,11 +28,7 @@ static_configs:
- localhost:9093
- localhost:9094
`)
cfg, err := parseConfig(f.Name())
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
cw, err := newWatcher(f.Name(), nil)
if err != nil {
t.Fatalf("failed to start config watcher: %s", err)
}
@@ -88,64 +83,33 @@ consul_sd_configs:
- server: %s
services:
- alertmanager
- server: %s
services:
- alertmanager
alert_relabel_configs:
- target_label: "foo"
replacement: "tar"
`, consulSDServer.URL, consulSDServer.URL))
`, consulSDServer.URL))
cfg, err := parseConfig(consulSDFile.Name())
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
cw, err := newWatcher(consulSDFile.Name(), nil)
if err != nil {
t.Fatalf("failed to start config watcher: %s", err)
}
defer cw.mustStop()
if len(cw.notifiers()) != 3 {
t.Fatalf("expected to get 3 notifiers; got %d", len(cw.notifiers()))
if len(cw.notifiers()) != 2 {
t.Fatalf("expected to get 2 notifiers; got %d", len(cw.notifiers()))
}
expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1)
expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2)
expAddr3 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService3)
n1, n2, n3 := cw.notifiers()[0], cw.notifiers()[1], cw.notifiers()[2]
n1, n2 := cw.notifiers()[0], cw.notifiers()[1]
if n1.Addr() != expAddr1 {
t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr())
}
if n2.Addr() != expAddr2 {
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
}
if n3.Addr() != expAddr3 {
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
}
if n1.(*AlertManager).relabelConfigs.String() != "" {
t.Fatalf("unexpected relabel configs: %q", n1.(*AlertManager).relabelConfigs.String())
}
if n2.(*AlertManager).relabelConfigs.String() != "" {
t.Fatalf("unexpected relabel configs: %q", n2.(*AlertManager).relabelConfigs.String())
}
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
}
f := func() bool { return len(cw.notifiers()) == 1 }
if !waitFor(f, time.Second) {
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
}
n3 = cw.notifiers()[0]
if n3.Addr() != expAddr3 {
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
}
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
}
}
// TestConfigWatcherReloadConcurrent supposed to test concurrent
@@ -200,11 +164,7 @@ consul_sd_configs:
"unknownFields.bad.yaml",
}
cfg, err := parseConfig(paths[0])
if err != nil {
t.Fatalf("failed to parse config: %s", err)
}
cw, err := newWatcher(cfg, nil)
cw, err := newWatcher(paths[0], nil)
if err != nil {
t.Fatalf("failed to start config watcher: %s", err)
}
@@ -212,16 +172,18 @@ consul_sd_configs:
const workers = 500
const iterations = 10
var wg sync.WaitGroup
for n := range workers {
wg.Go(func() {
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func(n int) {
defer wg.Done()
r := rand.New(rand.NewSource(int64(n)))
for i := 0; i < iterations; i++ {
rnd := r.Intn(len(paths))
_ = cw.reload(paths[rnd]) // update can fail and this is expected
_ = cw.notifiers()
}
})
}(i)
}
wg.Wait()
}
@@ -240,11 +202,10 @@ func checkErr(t *testing.T, err error) {
const (
fakeConsulService1 = "127.0.0.1:9093"
fakeConsulService2 = "127.0.0.1:9095"
fakeConsulService3 = "127.0.0.1:9097"
)
func newFakeConsulServer() *httptest.Server {
var requestCount atomic.Int32
requestCount := 0
mux := http.NewServeMux()
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
@@ -259,7 +220,7 @@ func newFakeConsulServer() *httptest.Server {
}`))
})
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
if requestCount.Load() == 0 {
if requestCount == 0 {
rw.Header().Set("X-Consul-Index", "1")
rw.Write([]byte(`
[
@@ -399,7 +360,7 @@ func newFakeConsulServer() *httptest.Server {
}
]`))
}
requestCount.Add(1)
requestCount++
})
return httptest.NewServer(mux)

View File

@@ -5,8 +5,6 @@ import (
"fmt"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
// FakeNotifier is a mock notifier
@@ -17,32 +15,14 @@ type FakeNotifier struct {
counter int
}
// InitFakeNotifier initializes global notifier to FakeNotifier,
// and returns a cleanup function to restore the original getActiveNotifiers.
func InitFakeNotifier() (*FakeNotifier, func()) {
originalGetActiveNotifiers := getActiveNotifiers
fn := &FakeNotifier{}
getActiveNotifiers = func() []Notifier {
return []Notifier{fn}
}
return fn, func() {
getActiveNotifiers = originalGetActiveNotifiers
}
}
// Close does nothing
func (*FakeNotifier) Close() {}
// LastError returns last error message
func (*FakeNotifier) LastError() string {
return ""
}
// Addr returns ""
func (*FakeNotifier) Addr() string { return "" }
// Send sets alerts and increases counter
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error {
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error {
fn.Lock()
defer fn.Unlock()
fn.counter += len(alerts)

View File

@@ -1,22 +1,14 @@
package notifier
import (
"context"
"flag"
"fmt"
"net/url"
"strconv"
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
)
@@ -65,61 +57,11 @@ var (
sendTimeout = flagutil.NewArrayDuration("notifier.sendTimeout", 10*time.Second, "Timeout when sending alerts to the corresponding -notifier.url")
)
// AlertURLGeneratorFn returns a URL to the passed alert object.
// Call InitAlertURLGeneratorFn before using this function.
var AlertURLGeneratorFn AlertURLGenerator
// InitAlertURLGeneratorFn populates AlertURLGeneratorFn
func InitAlertURLGeneratorFn(externalURL *url.URL, externalAlertSource string, validateTemplate bool) error {
if externalAlertSource == "" {
AlertURLGeneratorFn = func(a Alert) string {
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, "group_id", gID, "alert_id", aID)
}
return nil
}
if validateTemplate {
if err := ValidateTemplates(map[string]string{
"tpl": externalAlertSource,
}); err != nil {
return fmt.Errorf("error validating source template %s: %w", externalAlertSource, err)
}
}
m := map[string]string{
"tpl": externalAlertSource,
}
AlertURLGeneratorFn = func(alert Alert) string {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported for alert source template")
}
templated, err := alert.ExecTemplate(qFn, alert.Labels, m)
if err != nil {
logger.Errorf("cannot template alert source: %s", err)
}
return fmt.Sprintf("%s/%s", externalURL, templated["tpl"])
}
return nil
}
var (
// getActiveNotifiers returns the current list of Notifier objects.
getActiveNotifiers func() []Notifier
// globalRelabelCfg stores the parsed alert relabeling config from the config file if there is
globalRelabelCfg *promrelabel.ParsedConfigs
// cw holds a configWatcher for configPath configuration file
// configWatcher provides a list of Notifier objects discovered
// from static config or via service discovery.
// cw is not nil only if configPath is provided.
cw *configWatcher
// externalLabels is a global variable for holding external labels configured via flags
// It is supposed to be inited via Init function only.
externalLabels map[string]string
// externalURL is a global variable for holding external URL value configured via flag
// It is supposed to be inited via Init function only.
externalURL string
)
// cw holds a configWatcher for configPath configuration file
// configWatcher provides a list of Notifier objects discovered
// from static config or via service discovery.
// cw is not nil only if configPath is provided.
var cw *configWatcher
// Reload checks the changes in configPath configuration file
// and applies changes if any.
@@ -130,62 +72,66 @@ func Reload() error {
return cw.reload(*configPath)
}
var staticNotifiersFn func() []Notifier
var (
// externalLabels is a global variable for holding external labels configured via flags
// It is supposed to be inited via Init function only.
externalLabels map[string]string
// externalURL is a global variable for holding external URL value configured via flag
// It is supposed to be inited via Init function only.
externalURL string
)
// Init returns a function for retrieving actual list of Notifier objects.
// Init works in two mods:
// - configuration via flags (for backward compatibility). Is always static
// and don't support live reloads.
// - configuration via file. Supports live reloads and service discovery.
//
// Init returns an error if both mods are used.
func Init(extLabels map[string]string, extURL string) error {
func Init(gen AlertURLGenerator, extLabels map[string]string, extURL string) (func() []Notifier, error) {
externalURL = extURL
externalLabels = extLabels
_, err := url.Parse(externalURL)
if err != nil {
return fmt.Errorf("failed to parse external URL: %w", err)
return nil, fmt.Errorf("failed to parse external URL: %w", err)
}
if *blackHole {
if len(*addrs) > 0 || *configPath != "" {
return fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
return nil, fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
}
notifier := newBlackHoleNotifier()
getActiveNotifiers = func() []Notifier {
staticNotifiersFn = func() []Notifier {
return []Notifier{notifier}
}
return nil
return staticNotifiersFn, nil
}
if *configPath == "" && len(*addrs) == 0 {
return nil
return nil, nil
}
if *configPath != "" && len(*addrs) > 0 {
return fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
return nil, fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
}
if len(*addrs) > 0 {
notifiers, err := notifiersFromFlags(AlertURLGeneratorFn)
notifiers, err := notifiersFromFlags(gen)
if err != nil {
return fmt.Errorf("failed to create notifier from flag values: %w", err)
return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
}
getActiveNotifiers = func() []Notifier {
staticNotifiersFn = func() []Notifier {
return notifiers
}
return nil
return staticNotifiersFn, nil
}
cfg, err := parseConfig(*configPath)
cw, err = newWatcher(*configPath, gen)
if err != nil {
return err
return nil, fmt.Errorf("failed to init config watcher: %w", err)
}
if cfg.AlertRelabelConfigs != nil {
globalRelabelCfg = cfg.parsedAlertRelabelConfigs
}
cw, err = newWatcher(cfg, AlertURLGeneratorFn)
if err != nil {
return fmt.Errorf("failed to init config watcher: %w", err)
}
getActiveNotifiers = cw.notifiers
return nil
return cw.notifiers, nil
}
// InitSecretFlags must be called after flag.Parse and before any logging
@@ -229,9 +175,6 @@ func notifiersFromFlags(gen AlertURLGenerator) ([]Notifier, error) {
Headers: []string{headers.GetOptionalArg(i)},
}
if err := httputil.CheckURL(addr); err != nil {
return nil, fmt.Errorf("invalid notifier.url %q: %w", addr, err)
}
addr = strings.TrimSuffix(addr, "/")
am, err := NewAlertManager(addr+alertManagerPath, gen, authCfg, nil, sendTimeout.GetOptionalArg(i))
if err != nil {
@@ -263,58 +206,23 @@ const (
// GetTargets returns list of static or discovered targets
// via notifier configuration.
//
// Must be called after Init.
func GetTargets() map[TargetType][]Target {
if getActiveNotifiers == nil {
return nil
var targets = make(map[TargetType][]Target)
if staticNotifiersFn != nil {
for _, ns := range staticNotifiersFn() {
targets[TargetStatic] = append(targets[TargetStatic], Target{
Notifier: ns,
})
}
}
targets := make(map[TargetType][]Target)
// use cached targets from configWatcher instead of getActiveNotifiers for the extra target labels
if cw != nil {
cw.targetsMu.RLock()
for key, ns := range cw.targets {
targets[key] = append(targets[key], ns...)
}
cw.targetsMu.RUnlock()
return targets
}
// static notifiers don't have labels
for _, ns := range getActiveNotifiers() {
targets[TargetStatic] = append(targets[TargetStatic], Target{
Notifier: ns,
})
}
return targets
}
// Send sends alerts to all active notifiers
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) chan error {
alertsToSend := make([]Alert, 0, len(alerts))
lblss := make([][]prompb.Label, 0, len(alerts))
// apply global relabel config first without modifying original alerts in alerts
for _, a := range alerts {
lbls := a.applyRelabelingIfNeeded(globalRelabelCfg)
if len(lbls) == 0 {
continue
}
alertsToSend = append(alertsToSend, a)
lblss = append(lblss, lbls)
}
wg := sync.WaitGroup{}
activeNotifiers := getActiveNotifiers()
errCh := make(chan error, len(activeNotifiers))
defer close(errCh)
for i := range activeNotifiers {
nt := activeNotifiers[i]
wg.Go(func() {
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
errCh <- fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err)
}
})
}
wg.Wait()
return errCh
}

View File

@@ -1,17 +1,9 @@
package notifier
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"os"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
)
func TestInit(t *testing.T) {
@@ -20,13 +12,14 @@ func TestInit(t *testing.T) {
*addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"}
err := Init(nil, "")
fn, err := Init(nil, nil, "")
if err != nil {
t.Fatalf("%s", err)
}
if len(getActiveNotifiers()) != 2 {
t.Fatalf("expected to get 2 notifiers; got %d", len(getActiveNotifiers()))
nfs := fn()
if len(nfs) != 2 {
t.Fatalf("expected to get 2 notifiers; got %d", len(nfs))
}
targets := GetTargets()
@@ -55,22 +48,19 @@ func TestInitNegative(t *testing.T) {
*blackHole = oldBlackHole
}()
f := func(path string, addr []string, bh bool) {
f := func(path, addr string, bh bool) {
*configPath = path
*addrs = flagutil.ArrayString(addr)
*addrs = flagutil.ArrayString{addr}
*blackHole = bh
if err := Init(nil, ""); err == nil {
if _, err := Init(nil, nil, ""); err == nil {
t.Fatalf("expected to get error; got nil instead")
}
}
// *configPath, *addrs and *blackhole are mutually exclusive
f("/dummy/path", []string{"127.0.0.1"}, false)
f("/dummy/path", []string{}, true)
f("", []string{"127.0.0.1"}, true)
// addr cannot be ""
f("", []string{""}, false)
f("", []string{"127.0.0.1", ""}, false)
f("/dummy/path", "127.0.0.1", false)
f("/dummy/path", "", true)
f("", "127.0.0.1", true)
}
func TestBlackHole(t *testing.T) {
@@ -79,13 +69,14 @@ func TestBlackHole(t *testing.T) {
*blackHole = true
err := Init(nil, "")
fn, err := Init(nil, nil, "")
if err != nil {
t.Fatalf("%s", err)
}
if len(getActiveNotifiers()) != 1 {
t.Fatalf("expected to get 1 notifier; got %d", len(getActiveNotifiers()))
nfs := fn()
if len(nfs) != 1 {
t.Fatalf("expected to get 1 notifier; got %d", len(nfs))
}
targets := GetTargets()
@@ -100,114 +91,3 @@ func TestBlackHole(t *testing.T) {
t.Fatalf("expected to get \"blackhole\"; got %q instead", nf1.Addr())
}
}
func TestGetAlertURLGenerator(t *testing.T) {
oldAlertURLGeneratorFn := AlertURLGeneratorFn
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
testAlert := Alert{GroupID: 42, ID: 2, Value: 4, Labels: map[string]string{"tenant": "baz"}}
u, _ := url.Parse("https://victoriametrics.com/path")
err := InitAlertURLGeneratorFn(u, "", false)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", "group_id", "alert_id")
if exp != AlertURLGeneratorFn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
}
err = InitAlertURLGeneratorFn(nil, "foo?{{invalid}}", true)
if err == nil {
t.Fatalf("expected template validation error got nil")
}
err = InitAlertURLGeneratorFn(u, "foo?query={{$value}}&ds={{ $labels.tenant }}", true)
if err != nil {
t.Fatalf("unexpected error %s", err)
}
if exp := "https://victoriametrics.com/path/foo?query=4&ds=baz"; exp != AlertURLGeneratorFn(testAlert) {
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
}
}
func TestSendAlerts(t *testing.T) {
oldAlertURLGeneratorFn := AlertURLGeneratorFn
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
AlertURLGeneratorFn = func(alert Alert) string {
return ""
}
mux := http.NewServeMux()
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
t.Fatalf("should not be called")
})
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
var a []struct {
Labels map[string]string `json:"labels"`
}
if err := json.NewDecoder(r.Body).Decode(&a); err != nil {
t.Fatalf("can not unmarshal data into alert %s", err)
}
if len(a) != 2 {
t.Fatalf("expected 2 alert in array got %d", len(a))
}
if len(a[0].Labels) != 4 {
t.Fatalf("expected 4 labels got %d", len(a[0].Labels))
}
if a[0].Labels["env"] != "prod" {
t.Fatalf("expected env label to be prod during relabeling, got %s", a[0].Labels["env"])
}
if a[0].Labels["c"] != "baz" {
t.Fatalf("expected c label to be baz during relabeling, got %s", a[0].Labels["c"])
}
if len(a[1].Labels) != 1 {
t.Fatalf("expected 1 labels got %d", len(a[1].Labels))
}
})
srv := httptest.NewServer(mux)
defer srv.Close()
f, err := os.CreateTemp("", "")
if err != nil {
t.Fatal(err)
}
defer fs.MustRemovePath(f.Name())
rawConfig := `
static_configs:
- targets:
- %s
alert_relabel_configs:
- source_labels: [b]
target_label: "c"
alert_relabel_configs:
- source_labels: [a]
target_label: "b"
- target_label: "env"
replacement: "prod"
`
config := fmt.Sprintf(rawConfig, srv.URL+alertManagerPath)
writeToFile(f.Name(), config)
oldConfigPath := configPath
defer func() { configPath = oldConfigPath }()
*configPath = f.Name()
err = Init(nil, "")
if err != nil {
t.Fatalf("unexpected error when parse notifier config: %s", err)
}
firingAlerts := []Alert{
{
Name: "alert1",
Labels: map[string]string{"a": "baz"},
},
{
Name: "alert2",
Labels: map[string]string{},
},
}
errG := Send(context.Background(), firingAlerts, nil)
for err := range errG {
if err != nil {
t.Errorf("unexpected error when sending alerts: %s", err)
}
}
}

View File

@@ -1,21 +1,15 @@
package notifier
import (
"context"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
import "context"
// Notifier is a common interface for alert manager provider
type Notifier interface {
// Send sends the given list of alerts.
// Returns an error if fails to send the alerts.
// Must unblock if the given ctx is cancelled.
Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, notifierHeaders map[string]string) error
Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) error
// Addr returns address where alerts are sent.
Addr() string
// LastError returns error, that occured during last attempt to send data
LastError() string
// Close is a destructor for the Notifier
Close()
}

View File

@@ -1,10 +1,6 @@
package notifier
import (
"context"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
)
import "context"
// blackHoleNotifier is a Notifier stub, used when no notifications need
// to be sent.
@@ -14,7 +10,7 @@ type blackHoleNotifier struct {
}
// Send will send no notifications, but increase the metric.
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error { //nolint:revive
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error { //nolint:revive
bh.metrics.alertsSent.Add(len(alerts))
return nil
}
@@ -29,11 +25,6 @@ func (bh *blackHoleNotifier) Close() {
bh.metrics.close()
}
// LastError return last notifier's error
func (bh *blackHoleNotifier) LastError() string {
return ""
}
// newBlackHoleNotifier creates a new blackHoleNotifier
func newBlackHoleNotifier() *blackHoleNotifier {
address := "blackhole"

View File

@@ -5,7 +5,6 @@ import (
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
metricset "github.com/VictoriaMetrics/metrics"
)
@@ -17,7 +16,7 @@ func TestBlackHoleNotifier_Send(t *testing.T) {
Start: time.Now().UTC(),
End: time.Now().UTC(),
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
}}, [][]prompb.Label{{}}, nil); err != nil {
}}, nil); err != nil {
t.Fatalf("unexpected error %s", err)
}
@@ -35,7 +34,7 @@ func TestBlackHoleNotifier_Close(t *testing.T) {
Start: time.Now().UTC(),
End: time.Now().UTC(),
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
}}, [][]prompb.Label{{}}, nil); err != nil {
}}, nil); err != nil {
t.Fatalf("unexpected error %s", err)
}

View File

@@ -1,19 +0,0 @@
consul_sd_configs:
- server: localhost:8500
scheme: http
services:
- alertmanager
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "prod"
- server: localhost:8500
services:
- consul
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "(abc"
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"

View File

@@ -1,13 +0,0 @@
dns_sd_configs:
- names:
- cloudflare.com
type: 'A'
port: 9093
relabel_configs:
- source_labels: [__meta_dns_name]
replacement: '${1}'
target_label: dns_name
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "(abc"

View File

@@ -2,19 +2,12 @@ static_configs:
- targets:
- localhost:9093
- localhost:9095
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "static"
consul_sd_configs:
- server: localhost:8500
scheme: http
services:
- alertmanager
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "consul"
- server: localhost:8500
services:
- consul
@@ -24,10 +17,6 @@ dns_sd_configs:
- cloudflare.com
type: 'A'
port: 9093
alert_relabel_configs:
- action: keep
source_labels: [env]
regex: "dns"
relabel_configs:
- source_labels: [__meta_consul_tags]
@@ -36,4 +25,4 @@ relabel_configs:
target_label: __scheme__
- source_labels: [__meta_dns_name]
replacement: '${1}'
target_label: dns_name
target_label: dns_name

View File

@@ -1,14 +1,22 @@
headers:
- 'CustomHeader: foo'
static_configs:
- targets:
- http://192.168.0.101:9093
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"
- localhost:9093
- localhost:9095
- https://localhost:9093/test/api/v2/alerts
basic_auth:
username: foo
password: bar
- targets:
- http://192.168.0.101:9093
alert_relabel_configs:
- target_label: "foo"
replacement: "ccc"
- localhost:9096
- localhost:9097
basic_auth:
username: foo
password: baz
alert_relabel_configs:
- target_label: "foo"
replacement: "aaa"

View File

@@ -1,19 +0,0 @@
package notifier
// ApiNotifier represents a Notifier configuration for WEB view
type ApiNotifier struct {
// Kind is a Notifier type
Kind TargetType `json:"kind"`
// Targets is a list of Notifier targets
Targets []*ApiTarget `json:"targets"`
}
// ApiTarget represents a specific Notifier target for WEB view
type ApiTarget struct {
// Address is a URL for sending notifications
Address string `json:"address"`
// Labels is a list of labels to add to each sent notification
Labels map[string]string `json:"labels"`
// LastError contains the error faced while sending to notifier.
LastError string `json:"lastError"`
}

View File

@@ -14,9 +14,9 @@ import (
)
var (
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect. "+
"Remote read is used to restore alerts state. "+
"This configuration makes sense only if vmalert was configured with '-remoteWrite.url' before and has been successfully persisted its state. "+
addr = flag.String("remoteRead.url", "", "Optional URL to datasource compatible with MetricsQL. It can be single node VictoriaMetrics or vmselect."+
"Remote read is used to restore alerts state."+
"This configuration makes sense only if `vmalert` was configured with `remoteWrite.url` before and has been successfully persisted its state. "+
"Supports address in the form of IP address with a port (e.g., http://127.0.0.1:8428) or DNS SRV record. "+
"See also '-remoteRead.disablePathAppend', '-remoteRead.showURL'.")

View File

@@ -173,8 +173,9 @@ func (c *Client) run(ctx context.Context) {
cancel()
}
c.wg.Go(func() {
c.wg.Add(1)
go func() {
defer c.wg.Done()
defer ticker.Stop()
for {
select {
@@ -196,7 +197,7 @@ func (c *Client) run(ctx context.Context) {
}
}
}
})
}()
}
var (

View File

@@ -2,7 +2,6 @@ package rule
import (
"context"
"errors"
"fmt"
"hash/fnv"
"math"
@@ -188,54 +187,6 @@ func (ar *AlertingRule) ID() uint64 {
return ar.RuleID
}
// ToAPI returns ApiRule representation of ar
func (ar *AlertingRule) ToAPI() ApiRule {
state := ar.state
lastState := state.getLast()
r := ApiRule{
Type: TypeAlerting,
DatasourceType: ar.Type.String(),
Name: ar.Name,
Query: ar.Expr,
Duration: ar.For.Seconds(),
KeepFiringFor: ar.KeepFiringFor.Seconds(),
Labels: ar.Labels,
Annotations: ar.Annotations,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
State: "inactive",
Alerts: ar.AlertsToAPI(),
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: state.size(),
Updates: state.getAll(),
Debug: ar.Debug,
// encode as strings to avoid rounding in JSON
ID: fmt.Sprintf("%d", ar.ID()),
GroupID: fmt.Sprintf("%d", ar.GroupID),
GroupName: ar.GroupName,
File: ar.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
// satisfy apiRule.State logic
if len(r.Alerts) > 0 {
r.State = notifier.StatePending.String()
stateFiring := notifier.StateFiring.String()
for _, a := range r.Alerts {
if a.State == stateFiring {
r.State = stateFiring
break
}
}
}
return r
}
// GetAlerts returns active alerts of rule
func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
ar.alertsMu.RLock()
@@ -247,6 +198,16 @@ func (ar *AlertingRule) GetAlerts() []*notifier.Alert {
return alerts
}
// GetAlert returns alert if id exists
func (ar *AlertingRule) GetAlert(id uint64) *notifier.Alert {
ar.alertsMu.RLock()
defer ar.alertsMu.RUnlock()
if ar.alerts == nil {
return nil
}
return ar.alerts[id]
}
func (ar *AlertingRule) logDebugf(at time.Time, a *notifier.Alert, format string, args ...any) {
if !ar.Debug {
return
@@ -312,11 +273,6 @@ type labelSet struct {
// On k conflicts in origin set, the original value is preferred and copied
// to processed with `exported_%k` key. The copy happens only if passed v isn't equal to origin[k] value.
func (ls *labelSet) add(k, v string) {
// do not add label with empty value, since it has no meaning.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
if v == "" {
return
}
ls.processed[k] = v
ov, ok := ls.origin[k]
if !ok {
@@ -346,13 +302,14 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
ls.processed[l.Name] = l.Value
}
// labels only support limited templating variables,
// including `labels`, `value` and `expr`, to avoid breaking alert states or causing cardinality issue with results
extraLabels, err := notifier.ExecTemplate(qFn, ar.Labels, notifier.AlertTplData{
Labels: ls.origin,
Value: m.Values[0],
Expr: ar.Expr,
})
if err != nil {
return nil, fmt.Errorf("failed to expand labels: %w", err)
}
for k, v := range extraLabels {
ls.add(k, v)
}
@@ -363,7 +320,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
if !*disableAlertGroupLabel && ar.GroupName != "" {
ls.add(alertGroupNameLabel, ar.GroupName)
}
return ls, err
return ls, nil
}
// execRange executes alerting rule on the given time range similarly to exec.
@@ -384,12 +341,16 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
return []datasource.Metric{{Timestamps: []int64{0}, Values: []float64{math.NaN()}}}, nil
}
for _, s := range res.Data {
ls, err := ar.expandLabelTemplates(s, qFn)
ls, err := ar.expandLabelTemplates(s)
if err != nil {
return nil, err
}
alertID := hash(ls.processed)
a := ar.newAlert(s, time.Time{}, ls.processed, nil) // initial alert
as, err := ar.expandAnnotationTemplates(s, qFn, time.Time{}, ls)
if err != nil {
return nil, err
}
a := ar.newAlert(s, time.Time{}, ls.processed, as) // initial alert
prevT := time.Time{}
for i := range s.Values {
@@ -405,6 +366,8 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
// reset to Pending if there are gaps > EvalInterval between DPs
a.State = notifier.StatePending
a.ActiveAt = at
// re-template the annotations as active timestamp is changed
a.Annotations, _ = ar.expandAnnotationTemplates(s, qFn, at, ls)
a.Start = time.Time{}
} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
a.State = notifier.StateFiring
@@ -450,7 +413,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
defer func() {
ar.state.add(curState)
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
if curState.Err != nil {
ar.metrics.errors.Inc()
}
}()
@@ -459,8 +422,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
return nil, fmt.Errorf("failed to execute query %q: %w", ar.Expr, err)
}
isPartial := isPartialResponse(res)
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartial)
ar.logDebugf(ts, nil, "query returned %d series (elapsed: %s, isPartial: %t)", curState.Samples, curState.Duration, isPartialResponse(res))
qFn := func(query string) ([]datasource.Metric, error) {
res, _, err := ar.q.Query(ctx, query, ts)
return res.Data, err
@@ -472,11 +434,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
expandedLabels := make([]*labelSet, len(res.Data))
expandedAnnotations := make([]map[string]string, len(res.Data))
for i, m := range res.Data {
ls, err := ar.expandLabelTemplates(m, qFn)
ls, err := ar.expandLabelTemplates(m)
if err != nil {
// only set error in current state, but do not break alert processing
curState.Err = err
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
return nil, curState.Err
}
at := ts
alertID := hash(ls.processed)
@@ -486,11 +447,10 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
at = a.ActiveAt
}
}
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls, isPartial)
as, err := ar.expandAnnotationTemplates(m, qFn, at, ls)
if err != nil {
// only set error in current state, but do not break alert processing
curState.Err = err
logger.Errorf("got templating error in rule %s: %q", ar.Name, err)
return nil, curState.Err
}
expandedLabels[i] = ls
expandedAnnotations[i] = as
@@ -596,29 +556,31 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
return append(tss, ar.toTimeSeries(ts.Unix())...), nil
}
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric, qFn templates.QueryFn) (*labelSet, error) {
func (ar *AlertingRule) expandLabelTemplates(m datasource.Metric) (*labelSet, error) {
qFn := func(_ string) ([]datasource.Metric, error) {
return nil, fmt.Errorf("`query` template isn't supported in rule label")
}
ls, err := ar.toLabels(m, qFn)
if err != nil {
return ls, fmt.Errorf("failed to expand label templates: %s", err)
return nil, fmt.Errorf("failed to expand label templates: %s", err)
}
return ls, nil
}
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet, isPartial bool) (map[string]string, error) {
func (ar *AlertingRule) expandAnnotationTemplates(m datasource.Metric, qFn templates.QueryFn, activeAt time.Time, ls *labelSet) (map[string]string, error) {
tplData := notifier.AlertTplData{
Value: m.Values[0],
Type: ar.Type.String(),
Labels: ls.origin,
Expr: ar.Expr,
AlertID: hash(ls.processed),
GroupID: ar.GroupID,
ActiveAt: activeAt,
For: ar.For,
IsPartial: isPartial,
Value: m.Values[0],
Type: ar.Type.String(),
Labels: ls.origin,
Expr: ar.Expr,
AlertID: hash(ls.processed),
GroupID: ar.GroupID,
ActiveAt: activeAt,
For: ar.For,
}
as, err := notifier.ExecTemplate(qFn, ar.Annotations, tplData)
if err != nil {
return as, fmt.Errorf("failed to expand annotation templates: %s", err)
return nil, fmt.Errorf("failed to expand annotation templates: %s", err)
}
return as, nil
}
@@ -818,9 +780,7 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
expr := fmt.Sprintf("default_rollup(%s{%s%s}[%ds])",
alertForStateMetricName, nameStr, labelsFilter, int(lookback.Seconds()))
// query ALERTS_FOR_STATE at `ts-1s` instead `ts` to avoid retrieving data written in the current run,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10335
res, _, err := q.Query(ctx, expr, ts.Add(-1*time.Second))
res, _, err := q.Query(ctx, expr, ts)
if err != nil {
return fmt.Errorf("failed to execute restore query %q: %w ", expr, err)
}

View File

@@ -1,106 +0,0 @@
//go:build synctest
package rule
import (
"context"
"strings"
"testing"
"testing/synctest"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
)
// TestAlertingRule_ActiveAtPreservedInAnnotations ensures that the fix for
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9543 is preserved
// while allowing query templates in labels (https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9783)
func TestAlertingRule_ActiveAtPreservedInAnnotations(t *testing.T) {
// wrap into synctest because of time manipulations
synctest.Test(t, func(t *testing.T) {
fq := &datasource.FakeQuerier{}
ar := &AlertingRule{
Name: "TestActiveAtPreservation",
Labels: map[string]string{
"test_query_in_label": `{{ "static_value" }}`,
},
Annotations: map[string]string{
"description": "Alert active since {{ $activeAt }}",
},
alerts: make(map[uint64]*notifier.Alert),
q: fq,
state: &ruleState{
entries: make([]StateEntry, 10),
},
}
// Mock query result - return empty result to make suppress_for_mass_alert = false
// (no need to add anything to fq for empty result)
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "instance", "server1"))
// First execution - creates new alert
ts1 := time.Now()
_, err := ar.exec(context.TODO(), ts1, 0)
if err != nil {
t.Fatalf("unexpected error on first exec: %s", err)
}
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
firstAlert := ar.GetAlerts()[0]
// Verify first execution: activeAt should be ts1 and annotation should reflect it
if !firstAlert.ActiveAt.Equal(ts1) {
t.Fatalf("expected activeAt to be %v, got %v", ts1, firstAlert.ActiveAt)
}
// Extract time from annotation (format will be like "Alert active since 2025-09-30 08:55:13.638551611 -0400 EDT m=+0.002928464")
expectedTimeStr := ts1.Format("2006-01-02 15:04:05")
if !strings.Contains(firstAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("first exec annotation should contain time %s, got: %s", expectedTimeStr, firstAlert.Annotations["description"])
}
// Second execution - should preserve activeAt in annotation
// Ensure different timestamp with different seconds
// sleep is non-blocking thanks to synctest
time.Sleep(2 * time.Second)
ts2 := time.Now()
_, err = ar.exec(context.TODO(), ts2, 0)
if err != nil {
t.Fatalf("unexpected error on second exec: %s", err)
}
// Get the alert again (should be the same alert)
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
secondAlert := ar.GetAlerts()[0]
// Critical test: activeAt should still be ts1, not ts2
if !secondAlert.ActiveAt.Equal(ts1) {
t.Fatalf("activeAt should be preserved as %v, but got %v", ts1, secondAlert.ActiveAt)
}
// Critical test: annotation should still contain ts1 time, not ts2
if !strings.Contains(secondAlert.Annotations["description"], expectedTimeStr) {
t.Fatalf("second exec annotation should still contain original time %s, got: %s", expectedTimeStr, secondAlert.Annotations["description"])
}
// Additional verification: annotation should NOT contain ts2 time
ts2TimeStr := ts2.Format("2006-01-02 15:04:05")
if strings.Contains(secondAlert.Annotations["description"], ts2TimeStr) {
t.Fatalf("annotation should NOT contain new eval time %s, got: %s", ts2TimeStr, secondAlert.Annotations["description"])
}
// Verify query template in labels still works (this would fail if query templates were broken)
if firstAlert.Labels["test_query_in_label"] != "static_value" {
t.Fatalf("expected test_query_in_label=static_value, got %s", firstAlert.Labels["test_query_in_label"])
}
})
}

View File

@@ -663,7 +663,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-pending",
Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-pending"},
Annotations: map[string]string{},
Annotations: map[string]string{"activeAt": "5000"},
State: notifier.StatePending,
ActiveAt: time.Unix(5, 0),
Value: 1,
@@ -683,7 +683,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-firing",
Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-firing"},
Annotations: map[string]string{},
Annotations: map[string]string{"activeAt": "1000"},
State: notifier.StateFiring,
ActiveAt: time.Unix(1, 0),
Start: time.Unix(5, 0),
@@ -704,7 +704,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
Name: "for-hold-pending",
Type: config.NewPrometheusType().String(),
Labels: map[string]string{"alertname": "for-hold-pending"},
Annotations: map[string]string{},
Annotations: map[string]string{"activeAt": "5000"},
State: notifier.StatePending,
ActiveAt: time.Unix(5, 0),
Value: 1,
@@ -826,9 +826,12 @@ func TestGroup_Restore(t *testing.T) {
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
fg.Init()
wg := sync.WaitGroup{}
wg.Go(func() {
fg.Start(context.Background(), nil, fqr)
})
wg.Add(1)
go func() {
nts := func() []notifier.Notifier { return []notifier.Notifier{&notifier.FakeNotifier{}} }
fg.Start(context.Background(), nts, nil, fqr)
wg.Done()
}()
fg.Close()
wg.Wait()
@@ -1119,7 +1122,7 @@ func TestAlertingRuleLimit_Success(t *testing.T) {
}
func TestAlertingRule_Template(t *testing.T) {
f := func(rule *AlertingRule, metrics []datasource.Metric, isResponsePartial bool, alertsExpected map[uint64]*notifier.Alert) {
f := func(rule *AlertingRule, metrics []datasource.Metric, alertsExpected map[uint64]*notifier.Alert) {
t.Helper()
fakeGroup := Group{
@@ -1132,7 +1135,6 @@ func TestAlertingRule_Template(t *testing.T) {
entries: make([]StateEntry, 10),
}
fq.Add(metrics...)
fq.SetPartialResponse(isResponsePartial)
if _, err := rule.exec(context.TODO(), time.Now(), 0); err != nil {
t.Fatalf("unexpected error: %s", err)
@@ -1163,7 +1165,7 @@ func TestAlertingRule_Template(t *testing.T) {
}, []datasource.Metric{
metricWithValueAndLabels(t, 1, "instance", "foo"),
metricWithValueAndLabels(t, 1, "instance", "bar"),
}, false, map[uint64]*notifier.Alert{
}, map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
Annotations: map[string]string{
"summary": `common: Too high connection number for "foo"`,
@@ -1192,14 +1194,14 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "{{ $labels.instance }}",
},
Annotations: map[string]string{
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}".{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}"`,
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
},
alerts: make(map[uint64]*notifier.Alert),
}, []datasource.Metric{
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
}, false, map[uint64]*notifier.Alert{
}, map[uint64]*notifier.Alert{
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
Labels: map[string]string{
alertNameLabel: "override label",
@@ -1207,7 +1209,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "foo",
},
Annotations: map[string]string{
"summary": `first: Too high connection number for "foo".`,
"summary": `first: Too high connection number for "foo"`,
"description": `override: It is 2 connections for "foo"`,
},
},
@@ -1218,7 +1220,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "bar",
},
Annotations: map[string]string{
"summary": `second: Too high connection number for "bar".`,
"summary": `second: Too high connection number for "bar"`,
"description": `override: It is 10 connections for "bar"`,
},
},
@@ -1231,7 +1233,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "{{ $labels.instance }}",
},
Annotations: map[string]string{
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}.{{ if $isPartial }} WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.{{ end }}`,
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
},
alerts: make(map[uint64]*notifier.Alert),
}, []datasource.Metric{
@@ -1239,7 +1241,7 @@ func TestAlertingRule_Template(t *testing.T) {
alertNameLabel, "originAlertname",
alertGroupNameLabel, "originGroupname",
"instance", "foo"),
}, true, map[uint64]*notifier.Alert{
}, map[uint64]*notifier.Alert{
hash(map[string]string{
alertNameLabel: "OriginLabels",
"exported_alertname": "originAlertname",
@@ -1255,7 +1257,7 @@ func TestAlertingRule_Template(t *testing.T) {
"instance": "foo",
},
Annotations: map[string]string{
"summary": `Alert "originAlertname(originGroupname)" for instance foo. WARNING: Partial response detected - this alert may be incomplete. Please verify the results manually.`,
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
},
},
})
@@ -1370,10 +1372,8 @@ func TestAlertingRule_ToLabels(t *testing.T) {
ar := &AlertingRule{
Labels: map[string]string{
"instance": "override", // this should override instance with new value
"group": "vmalert", // this shouldn't have effect since value in metric is equal
"invalid_label": "{{ .Values.mustRuntimeFail }}",
"empty_label": "", // this should be dropped
"instance": "override", // this should override instance with new value
"group": "vmalert", // this shouldn't have effect since value in metric is equal
},
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
Name: "AlertingRulesError",
@@ -1381,11 +1381,10 @@ func TestAlertingRule_ToLabels(t *testing.T) {
}
expectedOriginLabels := map[string]string{
"instance": "0.0.0.0:8800",
"group": "vmalert",
"alertname": "ConfigurationReloadFailure",
"alertgroup": "vmalert",
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
"instance": "0.0.0.0:8800",
"group": "vmalert",
"alertname": "ConfigurationReloadFailure",
"alertgroup": "vmalert",
}
expectedProcessedLabels := map[string]string{
@@ -1395,12 +1394,11 @@ func TestAlertingRule_ToLabels(t *testing.T) {
"exported_alertname": "ConfigurationReloadFailure",
"group": "vmalert",
"alertgroup": "vmalert",
"invalid_label": `error evaluating template: template: :1:298: executing "" at <.Values.mustRuntimeFail>: can't evaluate field Values in type notifier.tplData`,
}
ls, err := ar.toLabels(metric, nil)
if err == nil || !strings.Contains(err.Error(), "error evaluating template") {
t.Fatalf("unexpected error %q", err.Error())
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
@@ -1431,50 +1429,3 @@ func TestAlertingRuleExec_Partial(t *testing.T) {
t.Fatalf("unexpected error: %s", err)
}
}
func TestAlertingRule_QueryTemplateInLabels(t *testing.T) {
fq := &datasource.FakeQuerier{}
fakeGroup := Group{
Name: "TestQueryTemplateInLabels",
}
ar := &AlertingRule{
Name: "test_alert",
Labels: map[string]string{
"suppress_for_mass_alert": `{{ if (printf "ALERTS{alertname='SomeAlert', alertstate='firing', device='%s'} == 1" $labels.device | query) }}true{{ else }}false{{ end }}`,
},
Annotations: map[string]string{
"summary": "Test alert with query template in labels",
},
alerts: make(map[uint64]*notifier.Alert),
}
ar.GroupID = fakeGroup.GetID()
ar.q = fq
ar.state = &ruleState{
entries: make([]StateEntry, 10),
}
// Add a metric that should trigger the alert
fq.Add(metricWithValueAndLabels(t, 1, "device", "sda1"))
ts := time.Now()
_, err := ar.exec(context.TODO(), ts, 0)
if err != nil {
t.Fatalf("unexpected error with query template in labels: %s", err)
}
// Verify that the alert was created and the query template was executed
if len(ar.alerts) != 1 {
t.Fatalf("expected 1 alert, got %d", len(ar.alerts))
}
alert := ar.GetAlerts()[0]
suppressLabel, exists := alert.Labels["suppress_for_mass_alert"]
if !exists {
t.Fatalf("expected 'suppress_for_mass_alert' label to exist")
}
// The query template should have been executed (even if it returns false due to mock data)
if suppressLabel != "true" && suppressLabel != "false" {
t.Fatalf("expected 'suppress_for_mass_alert' label to be 'true' or 'false', got '%s'", suppressLabel)
}
}

View File

@@ -2,6 +2,7 @@ package rule
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
@@ -24,10 +25,6 @@ import (
)
var (
ruleResultsLimit = flag.Int("rule.resultsLimit", 0, "Limits the number of alerts or recording results a single rule can produce. "+
"Can be overridden by the limit option under group if specified. "+
"If exceeded, the rule will be marked with an error and all its results will be discarded. "+
"0 means no limit.")
ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param.")
resendDelay = flag.Duration("rule.resendDelay", 0, "MiniMum amount of time to wait before resending an alert to notifier.")
@@ -39,8 +36,6 @@ var (
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+
"For example, if lookback=1h then range from now() to now()-1h will be scanned.")
maxStartDelay = flag.Duration("group.maxStartDelay", 5*time.Minute, "Defines the max delay before starting the group evaluation. Group's start is artificially delayed for random duration on interval"+
" [0..min(--group.maxStartDelay, group.interval)]. This helps smoothing out the load on the configured datasource, so evaluations aren't executed too close to each other.")
)
// Group is an entity for grouping rules
@@ -117,6 +112,7 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
Name: cfg.Name,
File: cfg.File,
Interval: cfg.Interval.Duration(),
Limit: cfg.Limit,
Concurrency: cfg.Concurrency,
checksum: cfg.Checksum,
Params: cfg.Params,
@@ -133,11 +129,6 @@ func NewGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
if g.Interval == 0 {
g.Interval = defaultInterval
}
if cfg.Limit != nil {
g.Limit = *cfg.Limit
} else {
g.Limit = *ruleResultsLimit
}
if g.Concurrency < 1 {
g.Concurrency = 1
}
@@ -298,7 +289,7 @@ func (g *Group) InterruptEval() {
}
}
// Close stops the group and its rules, unregisters group metrics
// Close stops the group and it's rules, unregisters group metrics
func (g *Group) Close() {
if g.doneCh == nil {
return
@@ -307,6 +298,10 @@ func (g *Group) Close() {
g.InterruptEval()
<-g.finishedCh
g.closeGroupMetrics()
}
func (g *Group) closeGroupMetrics() {
metrics.UnregisterSet(g.metrics.set, true)
}
@@ -332,13 +327,13 @@ func (g *Group) Init() {
}
// Start starts group's evaluation
func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
defer func() { close(g.finishedCh) }()
evalTS := time.Now()
// sleep random duration to spread group rules evaluation
// over maxStartDelay to reduce the load on datasource.
// over time in order to reduce load on datasource.
if !SkipRandSleepOnGroupStart {
sleepBeforeStart := g.delayBeforeStart(evalTS, *maxStartDelay)
sleepBeforeStart := delayBeforeStart(evalTS, g.GetID(), g.Interval, g.EvalOffset)
g.infof("will start in %v", sleepBeforeStart)
sleepTimer := time.NewTimer(sleepBeforeStart)
@@ -370,12 +365,13 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
e := &executor{
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
}
g.infof("started")
eval := func(ctx context.Context, ts time.Time) time.Time {
eval := func(ctx context.Context, ts time.Time) {
g.metrics.iterationTotal.Inc()
start := time.Now()
@@ -383,7 +379,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
if len(g.Rules) < 1 {
g.metrics.iterationDuration.UpdateDuration(start)
g.LastEvaluation = start
return ts
return
}
resolveDuration := getResolveDuration(g.Interval, *resendDelay, *maxResolveDuration)
@@ -397,7 +393,6 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
}
g.metrics.iterationDuration.UpdateDuration(start)
g.LastEvaluation = start
return ts
}
evalCtx, cancel := context.WithCancel(ctx)
@@ -406,7 +401,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
g.mu.Unlock()
defer g.evalCancel()
realEvalTS := eval(evalCtx, evalTS)
eval(evalCtx, evalTS)
t := time.NewTicker(g.Interval)
defer t.Stop()
@@ -414,7 +409,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
// restore the rules state after the first evaluation
// so only active alerts can be restored.
if rr != nil {
err := g.restore(ctx, rr, realEvalTS, *remoteReadLookBack)
err := g.restore(ctx, rr, evalTS, *remoteReadLookBack)
if err != nil {
logger.Errorf("error while restoring ruleState for group %q: %s", g.Name, err)
}
@@ -477,31 +472,32 @@ func (g *Group) UpdateWith(newGroup *Group) {
g.updateCh <- newGroup
}
// delayBeforeStart returns duration for delaying the evaluation start
// based on given ts and Group settings. The delay can't exceed maxDelay.
// maxDelay is ignored if g.EvalOffset != nil.
//
// Delaying is important to smooth out the load on the datasource when all groups start at the same time.
// delayBeforeStart calculates delay based on Group ID, so all groups will start at different moments of time.
func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Duration {
if g.EvalOffset != nil {
// if offset is specified, ignore the maxDelay and return a duration aligned with offset
currentOffsetPoint := ts.Truncate(g.Interval).Add(*g.EvalOffset)
// DeepCopy returns a deep copy of group
func (g *Group) DeepCopy() *Group {
g.mu.RLock()
data, _ := json.Marshal(g)
g.mu.RUnlock()
newG := Group{}
_ = json.Unmarshal(data, &newG)
newG.Rules = g.Rules
newG.id = g.id
return &newG
}
// if offset is specified, delayBeforeStart returns a duration to help aligning timestamp with offset;
// otherwise, it returns a random duration between [0..interval] based on group key.
func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *time.Duration) time.Duration {
if offset != nil {
currentOffsetPoint := ts.Truncate(interval).Add(*offset)
if currentOffsetPoint.Before(ts) {
// wait until the next offset point
return currentOffsetPoint.Add(g.Interval).Sub(ts)
return currentOffsetPoint.Add(interval).Sub(ts)
}
return currentOffsetPoint.Sub(ts)
}
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
interval := g.Interval
if interval > maxDelay {
// artificially limit interval, so groups with big intervals could start sooner.
interval = maxDelay
}
var randSleep time.Duration
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64)))
randSleep = time.Duration(float64(interval) * (float64(key) / (1 << 64)))
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
if randSleep < sleepOffset {
randSleep += interval
@@ -563,13 +559,15 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
if !disableProgressBar {
bar = pb.StartNew(iterations * len(g.Rules))
}
for i := range g.Rules {
rule := g.Rules[i]
for _, r := range g.Rules {
sem <- struct{}{}
wg.Go(func() {
res <- replayRuleRange(rule, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
wg.Add(1)
go func(r Rule, ri rangeIterator) {
// pass ri as a copy, so it can be modified within the replayRuleRange
res <- replayRuleRange(r, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
<-sem
})
wg.Done()
}(r, ri)
}
wg.Wait()
@@ -599,10 +597,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1)
for ri.next() {
sem <- struct{}{}
start := ri.s
end := ri.e
wg.Go(func() {
n, err := replayRule(r, start, end, rw, replayRuleRetryAttempts)
wg.Add(1)
go func(s, e time.Time) {
n, err := replayRule(r, s, e, rw, replayRuleRetryAttempts)
if err != nil {
logger.Fatalf("rule %q: %s", r, err)
}
@@ -611,7 +609,8 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
}
res <- n
<-sem
})
wg.Done()
}(ri.s, ri.e)
}
wg.Wait()
close(res)
@@ -625,9 +624,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
}
// ExecOnce evaluates all the rules under group for once with given timestamp.
func (g *Group) ExecOnce(ctx context.Context, rw remotewrite.RWClient, evalTS time.Time) chan error {
func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
e := &executor{
Rw: rw,
Notifiers: nts,
notifierHeaders: g.NotifierHeaders,
}
if len(g.Rules) < 1 {
@@ -702,6 +702,7 @@ func (g *Group) getEvalDelay() time.Duration {
// executor contains group's notify and rw configs
type executor struct {
Notifiers func() []notifier.Notifier
notifierHeaders map[string]string
Rw remotewrite.RWClient
@@ -722,13 +723,14 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
sem := make(chan struct{}, concurrency)
go func() {
wg := sync.WaitGroup{}
for i := range rules {
rule := rules[i]
for _, r := range rules {
sem <- struct{}{}
wg.Go(func() {
res <- e.exec(ctx, rule, ts, resolveDuration, limit)
wg.Add(1)
go func(r Rule) {
res <- e.exec(ctx, r, ts, resolveDuration, limit)
<-sem
})
wg.Done()
}(r)
}
wg.Wait()
close(res)
@@ -757,7 +759,6 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return fmt.Errorf("rule %q: failed to execute: %w", r, err)
}
var errG vmalertutil.ErrGroup
if e.Rw != nil {
pushToRW := func(tss []prompb.TimeSeries) error {
var lastErr error
@@ -769,26 +770,31 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
return lastErr
}
if err := pushToRW(tss); err != nil {
errG.Add(err)
return err
}
}
ar, ok := r.(*AlertingRule)
if !ok {
return errG.Err()
return nil
}
alerts := ar.alertsToSend(resolveDuration, *resendDelay)
if len(alerts) < 1 {
return errG.Err()
return nil
}
notifierErr := notifier.Send(ctx, alerts, e.notifierHeaders)
for err := range notifierErr {
if err != nil {
errG.Add(fmt.Errorf("rule %q: notifier failure: %w", r, err))
}
wg := sync.WaitGroup{}
errGr := new(vmalertutil.ErrGroup)
for _, nt := range e.Notifiers() {
wg.Add(1)
go func(nt notifier.Notifier) {
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
}
wg.Done()
}(nt)
}
return errG.Err()
wg.Wait()
return errGr.Err()
}

View File

@@ -262,7 +262,7 @@ func TestUpdateDuringRandSleep(t *testing.T) {
updateCh: make(chan *Group),
}
g.Init()
go g.Start(context.Background(), nil, nil)
go g.Start(context.Background(), nil, nil, nil)
rule1 := AlertingRule{
Name: "jobDown",
@@ -346,8 +346,7 @@ func TestGroupStart(t *testing.T) {
}
fs := &datasource.FakeQuerier{}
fn, cleanup := notifier.InitFakeNotifier()
defer cleanup()
fn := &notifier.FakeNotifier{}
const evalInterval = time.Millisecond
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
@@ -396,7 +395,7 @@ func TestGroupStart(t *testing.T) {
fs.Add(m2)
g.Init()
go func() {
g.Start(context.Background(), nil, fs)
g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
close(finished)
}()
@@ -473,10 +472,15 @@ func TestFaultyNotifier(t *testing.T) {
r := newTestAlertingRule("instant", 0)
r.q = fq
fn, cleanup := notifier.InitFakeNotifier()
defer cleanup()
e := &executor{}
fn := &notifier.FakeNotifier{}
e := &executor{
Notifiers: func() []notifier.Notifier {
return []notifier.Notifier{
&notifier.FaultyNotifier{},
fn,
}
},
}
delay := 5 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), delay)
defer cancel()
@@ -549,7 +553,7 @@ func TestCloseWithEvalInterruption(t *testing.T) {
g := NewGroup(groups[0], fq, evalInterval, nil)
g.Init()
go g.Start(context.Background(), nil, nil)
go g.Start(context.Background(), nil, nil, nil)
time.Sleep(evalInterval * 20)
@@ -567,10 +571,9 @@ func TestCloseWithEvalInterruption(t *testing.T) {
func TestGroupStartDelay(t *testing.T) {
g := &Group{}
g.id = uint64(math.MaxUint64 / 10)
// interval of 5min and key generate a static delay of 30s
g.Interval = time.Minute * 5
maxDelay := time.Minute * 5
key := uint64(math.MaxUint64 / 10)
f := func(atS, expS string) {
t.Helper()
@@ -582,7 +585,7 @@ func TestGroupStartDelay(t *testing.T) {
if err != nil {
t.Fatal(err)
}
delay := g.delayBeforeStart(at, maxDelay)
delay := delayBeforeStart(at, key, g.Interval, g.EvalOffset)
gotStart := at.Add(delay)
if expTS != gotStart {
t.Fatalf("expected to get %v; got %v instead", expTS, gotStart)
@@ -603,15 +606,6 @@ func TestGroupStartDelay(t *testing.T) {
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
maxDelay = time.Minute * 1
g.EvalOffset = nil
// test group with maxDelay, and offset disabled
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:06.000+00:00")
f("2023-01-01T00:00:01.000+00:00", "2023-01-01T00:00:06.000+00:00")
f("2023-01-01T00:00:06.100+00:00", "2023-01-01T00:01:06.000+00:00")
f("2023-01-01T00:00:11.000+00:00", "2023-01-01T00:01:06.000+00:00")
}
func TestGetPrometheusReqTimestamp(t *testing.T) {

View File

@@ -2,7 +2,6 @@ package rule
import (
"context"
"errors"
"fmt"
"strings"
"time"
@@ -82,37 +81,6 @@ func (rr *RecordingRule) ID() uint64 {
return rr.RuleID
}
// ToAPI returns ApiRule representation of rr
func (rr *RecordingRule) ToAPI() ApiRule {
state := rr.state
lastState := state.getLast()
r := ApiRule{
Type: TypeRecording,
DatasourceType: rr.Type.String(),
Name: rr.Name,
Query: rr.Expr,
Labels: rr.Labels,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: state.size(),
Updates: state.getAll(),
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", rr.ID()),
GroupID: fmt.Sprintf("%d", rr.GroupID),
GroupName: rr.GroupName,
File: rr.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
return r
}
// NewRecordingRule creates a new RecordingRule
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
debug := group.Debug
@@ -198,7 +166,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
defer func() {
rr.state.add(curState)
if curState.Err != nil && !errors.Is(curState.Err, context.Canceled) {
if curState.Err != nil {
rr.metrics.errors.Inc()
}
}()
@@ -237,8 +205,7 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
Labels: stringToLabels(k),
Samples: []prompb.Sample{
{Value: decimal.StaleNaN, Timestamp: ts.UnixNano() / 1e6},
},
})
}})
}
rr.lastEvaluation = curEvaluation
return tss, nil
@@ -293,11 +260,6 @@ func (rr *RecordingRule) toTimeSeries(m datasource.Metric) prompb.TimeSeries {
}
// add extra labels configured by user
for k := range rr.Labels {
// do not add label with empty value, since it has no meaning.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9984
if rr.Labels[k] == "" {
continue
}
existingLabel := promrelabel.GetLabelByName(m.Labels, k)
if existingLabel != nil { // there is a conflict between extra and existing label
if existingLabel.Value == rr.Labels[k] {

View File

@@ -21,8 +21,6 @@ type Rule interface {
// ID returns unique ID that may be used for
// identifying this Rule among others.
ID() uint64
// ToAPI returns ApiRule representation of Rule
ToAPI() ApiRule
// exec executes the rule with given context at the given timestamp and limit.
// returns an err if number of resulting time series exceeds the limit.
exec(ctx context.Context, ts time.Time, limit int) ([]prompb.TimeSeries, error)
@@ -70,6 +68,39 @@ type StateEntry struct {
Curl string `json:"curl"`
}
// GetLastEntry returns latest stateEntry of rule
func GetLastEntry(r Rule) StateEntry {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.getLast()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.getLast()
}
return StateEntry{}
}
// GetRuleStateSize returns size of rule stateEntry
func GetRuleStateSize(r Rule) int {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.size()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.size()
}
return 0
}
// GetAllRuleState returns rule entire stateEntries
func GetAllRuleState(r Rule) []StateEntry {
if rule, ok := r.(*AlertingRule); ok {
return rule.state.getAll()
}
if rule, ok := r.(*RecordingRule); ok {
return rule.state.getAll()
}
return []StateEntry{}
}
func (s *ruleState) size() int {
s.RLock()
defer s.RUnlock()

View File

@@ -65,15 +65,17 @@ func TestRule_stateConcurrent(_ *testing.T) {
r := &AlertingRule{state: &ruleState{entries: make([]StateEntry, 20)}}
const workers = 50
const iterations = 100
var wg sync.WaitGroup
for range workers {
wg.Go(func() {
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go func() {
defer wg.Done()
for i := 0; i < iterations; i++ {
r.state.add(StateEntry{At: time.Now()})
r.state.getAll()
r.state.getLast()
}
})
}()
}
wg.Wait()
}

View File

@@ -19,13 +19,13 @@ func CompareRules(t *testing.T, a, b Rule) error {
case *AlertingRule:
br, ok := b.(*AlertingRule)
if !ok {
return fmt.Errorf("rule %d supposed to be of type AlertingRule", b.ID())
return fmt.Errorf("rule %q supposed to be of type AlertingRule", b.ID())
}
return compareAlertingRules(t, v, br)
case *RecordingRule:
br, ok := b.(*RecordingRule)
if !ok {
return fmt.Errorf("rule %d supposed to be of type RecordingRule", b.ID())
return fmt.Errorf("rule %q supposed to be of type RecordingRule", b.ID())
}
return compareRecordingRules(t, v, br)
default:

View File

@@ -34,12 +34,11 @@ body {
padding-top: 4.5rem;
}
.vm-group {
.group-items {
cursor: pointer;
padding: 5px;
margin-top: 5px;
position: relative;
display: none;
}
.btn svg, .dropdown-item svg {
@@ -56,22 +55,14 @@ body {
height: 38px;
}
.vm-item:not(.vm-found) {
display: none;
.group-items:not(:has(.sub-item:not(.d-none))) {
display: none !important;
}
.vm-group:has(.vm-item:is(.vm-found)), .vm-group:is(.vm-found) {
display: flex;
}
.vm-group:hover {
.group-items:hover {
background-color: #f8f9fa!important;
}
.vm-group:is(.vm-found) .vm-item {
display: table-row;
}
.table {
table-layout: fixed;
}
@@ -120,9 +111,3 @@ textarea.curl-area {
.w-60 {
width: 60%;
}
.annotations {
white-space: pre-wrap;
color: gray;
word-wrap: break-word;
}

View File

@@ -65,34 +65,32 @@ function getParamURL(key) {
return url.searchParams.get(key)
}
function matchText(search, item) {
const text = item.innerText.toLowerCase();
return text.indexOf(search) >= 0;
}
function filterRules(searchPhrase) {
document.querySelectorAll('.vm-group').forEach((group) => {
if (!searchPhrase) {
group.classList.add('vm-found');
return;
}
for (const item of group.querySelectorAll('.vm-group-search')) {
if (matchText(searchPhrase, item)) {
group.classList.add('vm-found');
return;
document.querySelectorAll('.sub-items').forEach((rules) => {
let found = false;
rules.querySelectorAll('.sub-item').forEach((rule) => {
if (searchPhrase) {
const ruleName = rule.innerText.toLowerCase();
const matches = []
const hasValue = ruleName.indexOf(searchPhrase) >= 0;
rule.querySelectorAll('.label').forEach((label) => {
const text = label.innerText.toLowerCase();
if (text.indexOf(searchPhrase) >= 0) {
matches.push(text);
}
});
if (!matches.length && !hasValue) {
rule.classList.add('d-none');
return;
}
}
}
group.classList.remove('vm-found');
for (const item of group.querySelectorAll('.vm-item')) {
if (matchText(searchPhrase, item)) {
item.classList.add('vm-found');
continue;
}
if (Array.from(item.querySelectorAll('.label')).find(l => matchText(searchPhrase, l))) {
item.classList.add('vm-found');
continue;
}
item.classList.remove('vm-found');
rule.classList.remove('d-none');
found = true;
});
if (found && searchPhrase || !searchPhrase) {
rules.classList.remove('d-none');
} else {
rules.classList.add('d-none');
}
});
}

View File

@@ -485,12 +485,6 @@ func templateFuncs() textTpl.FuncMap {
/* Helpers */
// now returns the Unix timestamp in seconds at the time of the template evaluation.
// For example: {{ (now | toTime).Sub $activeAt }} will return the duration the alert has been active.
"now": func() float64 {
return float64(time.Now().Unix())
},
// Converts a list of objects to a map with keys arg0, arg1 etc.
// This is intended to allow multiple arguments to be passed to templates.
"args": func(args ...any) map[string]any {

View File

@@ -45,7 +45,7 @@ func (eg *ErrGroup) Error() string {
return ""
}
var b strings.Builder
fmt.Fprintf(&b, "errors(%d): \n", len(eg.errs))
fmt.Fprintf(&b, "errors(%d): ", len(eg.errs))
for i, err := range eg.errs {
b.WriteString(err.Error())
if i != len(eg.errs)-1 {

View File

@@ -30,8 +30,8 @@ func TestErrGroup(t *testing.T) {
}
f(nil, "")
f([]error{errors.New("timeout")}, "errors(1): \ntimeout")
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): \ntimeout\ndeadline")
f([]error{errors.New("timeout")}, "errors(1): timeout")
f([]error{errors.New("timeout"), errors.New("deadline")}, "errors(2): timeout\ndeadline")
}
// TestErrGroupConcurrent supposed to test concurrent

View File

@@ -29,9 +29,7 @@ var (
{"api/v1/rules", "list all loaded groups and rules"},
{"api/v1/alerts", "list all active alerts"},
{"api/v1/notifiers", "list all notifiers"},
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamAlertID), "get alert status by group and alert ID"},
{fmt.Sprintf("api/v1/rule?%s=<int>&%s=<int>", rule.ParamGroupID, rule.ParamRuleID), "get rule status by group and rule ID"},
{fmt.Sprintf("api/v1/group?%s=<int>", rule.ParamGroupID), "get group status by group ID"},
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", paramGroupID, paramAlertID), "get alert status by group and alert ID"},
}
systemLinks = [][2]string{
{"vmalert/groups", "UI"},
@@ -47,8 +45,8 @@ var (
{Name: "Docs", URL: "https://docs.victoriametrics.com/victoriametrics/vmalert/"},
}
ruleTypeMap = map[string]string{
"alert": rule.TypeAlerting,
"record": rule.TypeRecording,
"alert": ruleTypeAlerting,
"record": ruleTypeRecording,
}
)
@@ -114,7 +112,7 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
case "/rules":
// Grafana makes an extra request to `/rules`
// handler in addition to `/api/v1/rules` calls in alerts UI
var data []*rule.ApiGroup
var data []*apiGroup
rf, err := newRulesFilter(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
@@ -180,14 +178,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
w.Write(data)
return true
case "/vmalert/api/v1/rule", "/api/v1/rule":
apiRule, err := rh.getRule(r)
rule, err := rh.getRule(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
rwu := rule.ApiRuleWithUpdates{
ApiRule: apiRule,
StateUpdates: apiRule.Updates,
rwu := apiRuleWithUpdates{
apiRule: rule,
StateUpdates: rule.Updates,
}
data, err := json.Marshal(rwu)
if err != nil {
@@ -197,20 +195,6 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
w.Header().Set("Content-Type", "application/json")
w.Write(data)
return true
case "/vmalert/api/v1/group", "/api/v1/group":
group, err := rh.getGroup(r)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
data, err := json.Marshal(group)
if err != nil {
httpserver.Errorf(w, r, "failed to marshal group: %s", err)
return true
}
w.Header().Set("Content-Type", "application/json")
w.Write(data)
return true
case "/-/reload":
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
return true
@@ -225,42 +209,30 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
}
}
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, error) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
func (rh *requestHandler) getRule(r *http.Request) (apiRule, error) {
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
}
obj, err := rh.m.groupAPI(groupID)
ruleID, err := strconv.ParseUint(r.FormValue(paramRuleID), 10, 64)
if err != nil {
return nil, errResponse(err, http.StatusNotFound)
}
return obj, nil
}
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, error) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
if err != nil {
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
}
ruleID, err := strconv.ParseUint(r.FormValue(rule.ParamRuleID), 10, 64)
if err != nil {
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err)
return apiRule{}, fmt.Errorf("failed to read %q param: %w", paramRuleID, err)
}
obj, err := rh.m.ruleAPI(groupID, ruleID)
if err != nil {
return rule.ApiRule{}, errResponse(err, http.StatusNotFound)
return apiRule{}, errResponse(err, http.StatusNotFound)
}
return obj, nil
}
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
groupID, err := strconv.ParseUint(r.FormValue(paramGroupID), 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
return nil, fmt.Errorf("failed to read %q param: %w", paramGroupID, err)
}
alertID, err := strconv.ParseUint(r.FormValue(rule.ParamAlertID), 10, 64)
alertID, err := strconv.ParseUint(r.FormValue(paramAlertID), 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err)
return nil, fmt.Errorf("failed to read %q param: %w", paramAlertID, err)
}
a, err := rh.m.alertAPI(groupID, alertID)
if err != nil {
@@ -272,7 +244,7 @@ func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
type listGroupsResponse struct {
Status string `json:"status"`
Data struct {
Groups []*rule.ApiGroup `json:"groups"`
Groups []*apiGroup `json:"groups"`
} `json:"data"`
}
@@ -338,19 +310,19 @@ func (rf *rulesFilter) matchesGroup(group *rule.Group) bool {
return true
}
func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
func (rh *requestHandler) groups(rf *rulesFilter) []*apiGroup {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
groups := make([]*rule.ApiGroup, 0)
groups := make([]*apiGroup, 0)
for _, group := range rh.m.groups {
if !rf.matchesGroup(group) {
continue
}
g := group.ToAPI()
g := groupToAPI(group)
// the returned list should always be non-nil
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
filteredRules := make([]rule.ApiRule, 0)
filteredRules := make([]apiRule, 0)
for _, rule := range g.Rules {
if rf.ruleType != "" && rf.ruleType != rule.Type {
continue
@@ -378,7 +350,7 @@ func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
groups = append(groups, g)
}
// sort list of groups for deterministic output
slices.SortFunc(groups, func(a, b *rule.ApiGroup) int {
slices.SortFunc(groups, func(a, b *apiGroup) int {
if a.Name != b.Name {
return strings.Compare(a.Name, b.Name)
}
@@ -403,32 +375,32 @@ func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, error) {
type listAlertsResponse struct {
Status string `json:"status"`
Data struct {
Alerts []*rule.ApiAlert `json:"alerts"`
Alerts []*apiAlert `json:"alerts"`
} `json:"data"`
}
func (rh *requestHandler) groupAlerts() []rule.GroupAlerts {
func (rh *requestHandler) groupAlerts() []groupAlerts {
rh.m.groupsMu.RLock()
defer rh.m.groupsMu.RUnlock()
var gAlerts []rule.GroupAlerts
for _, group := range rh.m.groups {
var alerts []*rule.ApiAlert
g := group.ToAPI()
var gAlerts []groupAlerts
for _, g := range rh.m.groups {
var alerts []*apiAlert
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
a, ok := r.(*rule.AlertingRule)
if !ok {
continue
}
alerts = append(alerts, r.Alerts...)
alerts = append(alerts, ruleToAPIAlert(a)...)
}
if len(alerts) > 0 {
gAlerts = append(gAlerts, rule.GroupAlerts{
Group: g,
gAlerts = append(gAlerts, groupAlerts{
Group: groupToAPI(g),
Alerts: alerts,
})
}
}
slices.SortFunc(gAlerts, func(a, b rule.GroupAlerts) int {
slices.SortFunc(gAlerts, func(a, b groupAlerts) int {
return strings.Compare(a.Group.Name, b.Group.Name)
})
return gAlerts
@@ -439,22 +411,22 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
defer rh.m.groupsMu.RUnlock()
lr := listAlertsResponse{Status: "success"}
lr.Data.Alerts = make([]*rule.ApiAlert, 0)
lr.Data.Alerts = make([]*apiAlert, 0)
for _, group := range rh.m.groups {
if !rf.matchesGroup(group) {
continue
}
g := group.ToAPI()
for _, r := range g.Rules {
if r.Type != rule.TypeAlerting {
for _, r := range group.Rules {
a, ok := r.(*rule.AlertingRule)
if !ok {
continue
}
lr.Data.Alerts = append(lr.Data.Alerts, r.Alerts...)
lr.Data.Alerts = append(lr.Data.Alerts, ruleToAPIAlert(a)...)
}
}
// sort list of alerts for deterministic output
slices.SortFunc(lr.Data.Alerts, func(a, b *rule.ApiAlert) int {
slices.SortFunc(lr.Data.Alerts, func(a, b *apiAlert) int {
return strings.Compare(a.ID, b.ID)
})
@@ -471,7 +443,7 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
type listNotifiersResponse struct {
Status string `json:"status"`
Data struct {
Notifiers []*notifier.ApiNotifier `json:"notifiers"`
Notifiers []*apiNotifier `json:"notifiers"`
} `json:"data"`
}
@@ -479,20 +451,19 @@ func (rh *requestHandler) listNotifiers() ([]byte, error) {
targets := notifier.GetTargets()
lr := listNotifiersResponse{Status: "success"}
lr.Data.Notifiers = make([]*notifier.ApiNotifier, 0)
lr.Data.Notifiers = make([]*apiNotifier, 0)
for protoName, protoTargets := range targets {
nr := &notifier.ApiNotifier{
Kind: protoName,
Targets: make([]*notifier.ApiTarget, 0, len(protoTargets)),
notifier := &apiNotifier{
Kind: string(protoName),
Targets: make([]*apiTarget, 0, len(protoTargets)),
}
for _, target := range protoTargets {
nr.Targets = append(nr.Targets, &notifier.ApiTarget{
Address: target.Addr(),
Labels: target.Labels.ToMap(),
LastError: target.LastError(),
notifier.Targets = append(notifier.Targets, &apiTarget{
Address: target.Addr(),
Labels: target.Labels.ToMap(),
})
}
lr.Data.Notifiers = append(lr.Data.Notifiers, nr)
lr.Data.Notifiers = append(lr.Data.Notifiers, notifier)
}
b, err := json.Marshal(lr)

View File

@@ -8,8 +8,6 @@
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
) %}
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
@@ -79,8 +77,6 @@
{% func Welcome(r *http.Request) %}
{%= tpl.Header(r, navItems, "vmalert", getLastConfigError()) %}
<p>
Version {%s buildinfo.Version %} <br>
API:<br>
{% for _, p := range apiLinks %}
{%code p, doc := p[0], p[1] %}
@@ -97,7 +93,7 @@
{%= tpl.Footer(r) %}
{% endfunc %}
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, filter string) %}
{% func ListGroups(r *http.Request, groups []*apiGroup, filter string) %}
{%code
prefix := vmalertutil.Prefix(r.URL.Path)
filters := map[string]string{
@@ -117,17 +113,14 @@
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
{% if len(groups) > 0 %}
{% for _, g := range groups %}
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
<div id="group-{%s g.ID %}" class="d-flex w-100 border-0 flex-column group-items{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
<span class="d-flex justify-content-between">
<a
class="vm-group-search"
href="#group-{%s g.ID %}"
>{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
<span
class="flex-grow-1 d-flex justify-content-end"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}"
data-bs-target="#sub-{%s g.ID %}"
>
<span class="d-flex gap-2">
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
@@ -140,9 +133,9 @@
class="d-flex flex-column row-gap-2 mb-2"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}"
data-bs-target="#sub-{%s g.ID %}"
>
<span class="fs-6 text-start vm-group-search w-100 fw-lighter">{%s g.File %}</span>
<span class="fs-6 text-start w-100 fw-lighter">{%s g.File %}</span>
{% if len(g.Params) > 0 %}
<span class="fs-6 text-start w-100 d-flex justify-content-between fw-lighter">
<span>Extra params</span>
@@ -164,7 +157,7 @@
</span>
{% endif %}
</span>
<div class="collapse" id="item-{%s g.ID %}">
<div class="collapse sub-items" id="sub-{%s g.ID %}">
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
@@ -175,7 +168,7 @@
</thead>
<tbody>
{% for _, r := range g.Rules %}
<tr class="vm-item{% if r.LastError != "" %} alert-danger{% endif %}">
<tr class="sub-item{% if r.LastError != "" %} alert-danger{% endif %}">
<td>
<div class="row">
<div class="col-12 mb-2">
@@ -212,12 +205,7 @@
</div>
</td>
<td class="text-center">{%d r.LastSamples %}</td>
<td class="text-center">{% if r.LastEvaluation.IsZero() %}
Never
{% else %}
{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago
{% endif %}
</td>
<td class="text-center">{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago</td>
</tr>
{% endfor %}
</tbody>
@@ -234,7 +222,7 @@
{% endfunc %}
{% func ListAlerts(r *http.Request, groupAlerts []rule.GroupAlerts) %}
{% func ListAlerts(r *http.Request, groupAlerts []groupAlerts) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "Alerts", getLastConfigError()) %}
{%= Controls(prefix, "", "", nil, nil, true) %}
@@ -243,7 +231,7 @@
{%code
g := ga.Group
var keys []string
alertsByRule := make(map[string][]*rule.ApiAlert)
alertsByRule := make(map[string][]*apiAlert)
for _, alert := range ga.Alerts {
if len(alertsByRule[alert.RuleID]) < 1 {
keys = append(keys, alert.RuleID)
@@ -252,14 +240,14 @@
}
sort.Strings(keys)
%}
<div class="w-100 flex-column vm-group alert-danger">
<div class="d-flex w-100 flex-column group-items alert-danger">
<span id="group-{%s g.ID %}" class="d-flex justify-content-between">
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %}</a>
<span
class="flex-grow-1 d-flex justify-content-end"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}"
data-bs-target="#sub-{%s g.ID %}"
>
<span class="badge bg-danger" title="Number of active alerts">{%d len(ga.Alerts) %}</span>
</span>
@@ -269,10 +257,10 @@
class="fs-6 text-start w-100 fw-lighter"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s g.ID %}"
data-bs-target="#sub-{%s g.ID %}"
>{%s g.File %}</span>
</span>
<div class="collapse" id="item-{%s g.ID %}">
<div class="collapse sub-items" id="sub-{%s g.ID %}">
{% for _, ruleID := range keys %}
{%code
defaultAR := alertsByRule[ruleID][0]
@@ -283,7 +271,7 @@
sort.Strings(labelKeys)
%}
<br>
<div class="vm-item">
<div class="sub-item">
<b>alert:</b> {%s defaultAR.Name %} ({%d len(alertsByRule[ruleID]) %})
| <span><a target="_blank" href="{%s defaultAR.SourceLink %}">Source</a></span>
<br>
@@ -348,20 +336,20 @@
typeK, ns := keys[i], targets[notifier.TargetType(keys[i])]
count := len(ns)
%}
<div class="w-100 flex-column vm-group">
<div class="d-flex w-100 flex-column group-items">
<span class="d-flex justify-content-between" id="group-{%s typeK %}">
<a href="#group-{%s typeK %}">{%s typeK %} ({%d count %})</a>
<span
class="flex-grow-1"
role="button"
data-bs-toggle="collapse"
data-bs-target="#item-{%s typeK %}"
data-bs-target="#sub-{%s typeK %}"
></span>
</span>
<div id="item-{%s typeK %}" class="collapse show">
<div id="sub-{%s typeK %}" class="collapse show sub-items">
<table class="table table-striped table-hover table-sm">
<thead>
<tr class="vm-item">
<tr class="sub-item">
<th scope="col">Labels</th>
<th scope="col">Address</th>
</tr>
@@ -390,7 +378,7 @@
{%= tpl.Footer(r) %}
{% endfunc %}
{% func Alert(r *http.Request, alert *rule.ApiAlert) %}
{% func Alert(r *http.Request, alert *apiAlert) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
{%code
@@ -446,7 +434,7 @@
<div class="col">
{% for _, k := range annotationKeys %}
<b>{%s k %}:</b><br>
<p class="annotations">{%s alert.Annotations[k] %}</p>
<p>{%s alert.Annotations[k] %}</p>
{% endfor %}
</div>
</div>
@@ -476,7 +464,7 @@
{% endfunc %}
{% func RuleDetails(r *http.Request, rule rule.ApiRule) %}
{% func RuleDetails(r *http.Request, rule apiRule) %}
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
{%code
@@ -560,7 +548,7 @@
<div class="col">
{% for _, k := range annotationKeys %}
<b>{%s k %}:</b><br>
<p class="annotations">{%s rule.Annotations[k] %}</p>
<p>{%s rule.Annotations[k] %}</p>
{% endfor %}
</div>
</div>
@@ -605,11 +593,11 @@
<table class="table table-striped table-hover table-sm">
<thead>
<tr>
<th scope="col" title="The time when the rule was executed">Updated at</th>
<th scope="col" title="The time when event was created">Updated at</th>
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
<th scope="col" class="text-center" title="The time used in execution query request">Execution timestamp</th>
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
</tr>
</thead>
@@ -661,7 +649,7 @@
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
{% endfunc %}
{% func seriesFetchedWarn(prefix string, r rule.ApiRule) %}
{% func seriesFetchedWarn(prefix string, r apiRule) %}
{% if isNoMatch(r) %}
<svg
data-bs-toggle="tooltip"
@@ -675,7 +663,7 @@
{% endfunc %}
{%code
func isNoMatch (r rule.ApiRule) bool {
func isNoMatch (r apiRule) bool {
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
}
%}

File diff suppressed because it is too large Load Diff

View File

@@ -23,12 +23,8 @@ func TestHandler(t *testing.T) {
Timestamps: []int64{0},
})
m := &manager{groups: map[uint64]*rule.Group{}}
_, cleanup := notifier.InitFakeNotifier()
defer cleanup()
var ar *rule.AlertingRule
var rr *rule.RecordingRule
var groupIDs []uint64
for _, dsType := range []string{"prometheus", "", "graphite"} {
g := rule.NewGroup(config.Group{
Name: "group",
@@ -48,10 +44,8 @@ func TestHandler(t *testing.T) {
}, fq, 1*time.Minute, nil)
ar = g.Rules[0].(*rule.AlertingRule)
rr = g.Rules[1].(*rule.RecordingRule)
g.ExecOnce(context.Background(), nil, time.Time{})
id := g.CreateID()
m.groups[id] = g
groupIDs = append(groupIDs, id)
g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, nil, time.Time{})
m.groups[g.CreateID()] = g
}
rh := &requestHandler{m: m}
@@ -88,22 +82,22 @@ func TestHandler(t *testing.T) {
})
t.Run("/vmalert/rule", func(t *testing.T) {
a := ar.ToAPI()
a := ruleToAPI(ar)
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
r := rr.ToAPI()
r := ruleToAPI(rr)
getResp(t, ts.URL+"/vmalert/"+r.WebLink(), nil, 200)
})
t.Run("/vmalert/alert", func(t *testing.T) {
alerts := ar.AlertsToAPI()
alerts := ruleToAPIAlert(ar)
for _, a := range alerts {
getResp(t, ts.URL+"/vmalert/"+a.WebLink(), nil, 200)
}
})
t.Run("/vmalert/rule?badParam", func(t *testing.T) {
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamRuleID)
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramRuleID)
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamRuleID)
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramRuleID)
getResp(t, ts.URL+"/vmalert/rule"+params, nil, 404)
})
@@ -130,14 +124,14 @@ func TestHandler(t *testing.T) {
}
})
t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) {
expAlert := rule.NewAlertAPI(ar, ar.GetAlerts()[0])
alert := &rule.ApiAlert{}
expAlert := newAlertAPI(ar, ar.GetAlerts()[0])
alert := &apiAlert{}
getResp(t, ts.URL+"/"+expAlert.APILink(), alert, 200)
if !reflect.DeepEqual(alert, expAlert) {
t.Fatalf("expected %v is equal to %v", alert, expAlert)
}
alert = &rule.ApiAlert{}
alert = &apiAlert{}
getResp(t, ts.URL+"/vmalert/"+expAlert.APILink(), alert, 200)
if !reflect.DeepEqual(alert, expAlert) {
t.Fatalf("expected %v is equal to %v", alert, expAlert)
@@ -145,16 +139,16 @@ func TestHandler(t *testing.T) {
})
t.Run("/api/v1/alert?badParams", func(t *testing.T) {
params := fmt.Sprintf("?%s=0&%s=1", rule.ParamGroupID, rule.ParamAlertID)
params := fmt.Sprintf("?%s=0&%s=1", paramGroupID, paramAlertID)
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
params = fmt.Sprintf("?%s=1&%s=0", rule.ParamGroupID, rule.ParamAlertID)
params = fmt.Sprintf("?%s=1&%s=0", paramGroupID, paramAlertID)
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 404)
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 404)
// bad request, alertID is missing
params = fmt.Sprintf("?%s=1", rule.ParamGroupID)
params = fmt.Sprintf("?%s=1", paramGroupID)
getResp(t, ts.URL+"/api/v1/alert"+params, nil, 400)
getResp(t, ts.URL+"/vmalert/api/v1/alert"+params, nil, 400)
})
@@ -173,42 +167,27 @@ func TestHandler(t *testing.T) {
}
})
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
expRule := ar.ToAPI()
gotRule := rule.ApiRule{}
expRule := ruleToAPI(ar)
gotRule := apiRule{}
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRule, 200)
if expRule.ID != gotRule.ID {
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
}
gotRule = rule.ApiRule{}
gotRule = apiRule{}
getResp(t, ts.URL+"/vmalert/"+expRule.APILink(), &gotRule, 200)
if expRule.ID != gotRule.ID {
t.Fatalf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
}
gotRuleWithUpdates := rule.ApiRuleWithUpdates{}
gotRuleWithUpdates := apiRuleWithUpdates{}
getResp(t, ts.URL+"/"+expRule.APILink(), &gotRuleWithUpdates, 200)
if len(gotRuleWithUpdates.StateUpdates) < 1 {
t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates)
}
})
t.Run("/api/v1/group?groupID", func(t *testing.T) {
id := groupIDs[0]
g := m.groups[id]
expGroup := g.ToAPI()
gotGroup := rule.ApiGroup{}
getResp(t, ts.URL+"/"+expGroup.APILink(), &gotGroup, 200)
if expGroup.ID != gotGroup.ID {
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
}
gotGroup = rule.ApiGroup{}
getResp(t, ts.URL+"/vmalert/"+expGroup.APILink(), &gotGroup, 200)
if expGroup.ID != gotGroup.ID {
t.Fatalf("expected to get Group %q; got %q instead", expGroup.ID, gotGroup.ID)
}
})
t.Run("/api/v1/rules&filters", func(t *testing.T) {
check := func(url string, statusCode, expGroups, expRules int) {

View File

@@ -1,4 +1,4 @@
package rule
package main
import (
"fmt"
@@ -8,28 +8,79 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
)
const (
// ParamGroupID is group id key in url parameter
ParamGroupID = "group_id"
paramGroupID = "group_id"
// ParamAlertID is alert id key in url parameter
ParamAlertID = "alert_id"
paramAlertID = "alert_id"
// ParamRuleID is rule id key in url parameter
ParamRuleID = "rule_id"
// TypeRecording is a RecordingRule type
TypeRecording = "recording"
// TypeAlerting is an AlertingRule type
TypeAlerting = "alerting"
paramRuleID = "rule_id"
)
// ApiGroup represents a Group for web view
type ApiGroup struct {
type apiNotifier struct {
Kind string `json:"kind"`
Targets []*apiTarget `json:"targets"`
}
type apiTarget struct {
Address string `json:"address"`
Labels map[string]string `json:"labels"`
}
// apiAlert represents a notifier.AlertingRule state
// for WEB view
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type apiAlert struct {
State string `json:"state"`
Name string `json:"name"`
Value string `json:"value"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations"`
ActiveAt time.Time `json:"activeAt"`
// Additional fields
// ID is an unique Alert's ID within a group
ID string `json:"id"`
// RuleID is an unique Rule's ID within a group
RuleID string `json:"rule_id"`
// GroupID is an unique Group's ID
GroupID string `json:"group_id"`
// Expression contains the PromQL/MetricsQL expression
// for Rule's evaluation
Expression string `json:"expression"`
// SourceLink contains a link to a system which should show
// why Alert was generated
SourceLink string `json:"source"`
// Restored shows whether Alert's state was restored on restart
Restored bool `json:"restored"`
// Stabilizing shows when firing state is kept because of
// `keep_firing_for` instead of real alert
Stabilizing bool `json:"stabilizing"`
}
// WebLink returns a link to the alert which can be used in UI.
func (aa *apiAlert) WebLink() string {
return fmt.Sprintf("alert?%s=%s&%s=%s",
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
}
// APILink returns a link to the alert's JSON representation.
func (aa *apiAlert) APILink() string {
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
paramGroupID, aa.GroupID, paramAlertID, aa.ID)
}
// apiGroup represents Group for web view
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type apiGroup struct {
// Name is the group name as present in the config
Name string `json:"name"`
// Rules contains both recording and alerting rules
Rules []ApiRule `json:"rules"`
Rules []apiRule `json:"rules"`
// Interval is the Group's evaluation interval in float seconds as present in the file.
Interval float64 `json:"interval"`
// LastEvaluation is the timestamp of the last time the Group was executed
@@ -65,20 +116,15 @@ type ApiGroup struct {
NoMatch int
}
// APILink returns a link to the group's JSON representation.
func (ag *ApiGroup) APILink() string {
return fmt.Sprintf("api/v1/group?%s=%s", ParamGroupID, ag.ID)
// groupAlerts represents a group of alerts for WEB view
type groupAlerts struct {
Group *apiGroup
Alerts []*apiAlert
}
// GroupAlerts represents a Group with its Alerts for web view
type GroupAlerts struct {
Group *ApiGroup
Alerts []*ApiAlert
}
// ApiRule represents a Rule for web view
// apiRule represents a Rule for web view
// see https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type ApiRule struct {
type apiRule struct {
// State must be one of these under following scenarios
// "pending": at least 1 alert in the rule in pending state and no other alert in firing ruleState.
// "firing": at least 1 alert in the rule in firing state.
@@ -100,7 +146,7 @@ type ApiRule struct {
// LastEvaluation is the timestamp of the last time the rule was executed
LastEvaluation time.Time `json:"lastEvaluation"`
// Alerts is the list of all the alerts in this rule that are currently pending or firing
Alerts []*ApiAlert `json:"alerts,omitempty"`
Alerts []*apiAlert `json:"alerts,omitempty"`
// Health is the health of rule evaluation.
// It MUST be one of "ok", "err", "unknown"
Health string `json:"health"`
@@ -131,87 +177,143 @@ type ApiRule struct {
// MaxUpdates is the max number of recorded ruleStateEntry objects
MaxUpdates int `json:"max_updates_entries"`
// Updates contains the ordered list of recorded ruleStateEntry objects
Updates []StateEntry `json:"-"`
Updates []rule.StateEntry `json:"-"`
}
// ApiAlert represents a notifier.AlertingRule state
// for WEB view
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
type ApiAlert struct {
State string `json:"state"`
Name string `json:"name"`
Value string `json:"value"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations"`
ActiveAt time.Time `json:"activeAt"`
// Additional fields
// ID is an unique Alert's ID within a group
ID string `json:"id"`
// RuleID is an unique Rule's ID within a group
RuleID string `json:"rule_id"`
// GroupID is an unique Group's ID
GroupID string `json:"group_id"`
// Expression contains the PromQL/MetricsQL expression
// for Rule's evaluation
Expression string `json:"expression"`
// SourceLink contains a link to a system which should show
// why Alert was generated
SourceLink string `json:"source"`
// Restored shows whether Alert's state was restored on restart
Restored bool `json:"restored"`
// Stabilizing shows when firing state is kept because of
// `keep_firing_for` instead of real alert
Stabilizing bool `json:"stabilizing"`
}
// WebLink returns a link to the alert which can be used in UI.
func (aa *ApiAlert) WebLink() string {
return fmt.Sprintf("alert?%s=%s&%s=%s",
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
}
// APILink returns a link to the alert's JSON representation.
func (aa *ApiAlert) APILink() string {
return fmt.Sprintf("api/v1/alert?%s=%s&%s=%s",
ParamGroupID, aa.GroupID, ParamAlertID, aa.ID)
}
// ApiRuleWithUpdates represents ApiRule but with extra fields for marshalling
type ApiRuleWithUpdates struct {
ApiRule
// apiRuleWithUpdates represents apiRule but with extra fields for marshalling
type apiRuleWithUpdates struct {
apiRule
// Updates contains the ordered list of recorded ruleStateEntry objects
StateUpdates []StateEntry `json:"updates,omitempty"`
StateUpdates []rule.StateEntry `json:"updates,omitempty"`
}
// APILink returns a link to the rule's JSON representation.
func (ar ApiRule) APILink() string {
func (ar apiRule) APILink() string {
return fmt.Sprintf("api/v1/rule?%s=%s&%s=%s",
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID)
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
}
// WebLink returns a link to the alert which can be used in UI.
func (ar ApiRule) WebLink() string {
func (ar apiRule) WebLink() string {
return fmt.Sprintf("rule?%s=%s&%s=%s",
ParamGroupID, ar.GroupID, ParamRuleID, ar.ID)
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
}
// AlertsToAPI returns list of ApiAlert objects from existing alerts
func (ar *AlertingRule) AlertsToAPI() []*ApiAlert {
var alerts []*ApiAlert
func ruleToAPI(r any) apiRule {
if ar, ok := r.(*rule.AlertingRule); ok {
return alertingToAPI(ar)
}
if rr, ok := r.(*rule.RecordingRule); ok {
return recordingToAPI(rr)
}
return apiRule{}
}
const (
ruleTypeRecording = "recording"
ruleTypeAlerting = "alerting"
)
func recordingToAPI(rr *rule.RecordingRule) apiRule {
lastState := rule.GetLastEntry(rr)
r := apiRule{
Type: ruleTypeRecording,
DatasourceType: rr.Type.String(),
Name: rr.Name,
Query: rr.Expr,
Labels: rr.Labels,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: rule.GetRuleStateSize(rr),
Updates: rule.GetAllRuleState(rr),
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", rr.ID()),
GroupID: fmt.Sprintf("%d", rr.GroupID),
GroupName: rr.GroupName,
File: rr.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
return r
}
// alertingToAPI returns Rule representation in form of apiRule
func alertingToAPI(ar *rule.AlertingRule) apiRule {
lastState := rule.GetLastEntry(ar)
r := apiRule{
Type: ruleTypeAlerting,
DatasourceType: ar.Type.String(),
Name: ar.Name,
Query: ar.Expr,
Duration: ar.For.Seconds(),
KeepFiringFor: ar.KeepFiringFor.Seconds(),
Labels: ar.Labels,
Annotations: ar.Annotations,
LastEvaluation: lastState.Time,
EvaluationTime: lastState.Duration.Seconds(),
Health: "ok",
State: "inactive",
Alerts: ruleToAPIAlert(ar),
LastSamples: lastState.Samples,
LastSeriesFetched: lastState.SeriesFetched,
MaxUpdates: rule.GetRuleStateSize(ar),
Updates: rule.GetAllRuleState(ar),
Debug: ar.Debug,
// encode as strings to avoid rounding in JSON
ID: fmt.Sprintf("%d", ar.ID()),
GroupID: fmt.Sprintf("%d", ar.GroupID),
GroupName: ar.GroupName,
File: ar.File,
}
if lastState.Err != nil {
r.LastError = lastState.Err.Error()
r.Health = "err"
}
// satisfy apiRule.State logic
if len(r.Alerts) > 0 {
r.State = notifier.StatePending.String()
stateFiring := notifier.StateFiring.String()
for _, a := range r.Alerts {
if a.State == stateFiring {
r.State = stateFiring
break
}
}
}
return r
}
// ruleToAPIAlert generates list of apiAlert objects from existing alerts
func ruleToAPIAlert(ar *rule.AlertingRule) []*apiAlert {
var alerts []*apiAlert
for _, a := range ar.GetAlerts() {
if a.State == notifier.StateInactive {
continue
}
alerts = append(alerts, NewAlertAPI(ar, a))
alerts = append(alerts, newAlertAPI(ar, a))
}
return alerts
}
// alertToAPI generates apiAlert object from alert by its id(hash)
func alertToAPI(ar *rule.AlertingRule, id uint64) *apiAlert {
a := ar.GetAlert(id)
if a == nil {
return nil
}
return newAlertAPI(ar, a)
}
// NewAlertAPI creates apiAlert for notifier.Alert
func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
aa := &ApiAlert{
func newAlertAPI(ar *rule.AlertingRule, a *notifier.Alert) *apiAlert {
aa := &apiAlert{
// encode as strings to avoid rounding
ID: fmt.Sprintf("%d", a.ID),
GroupID: fmt.Sprintf("%d", a.GroupID),
@@ -226,8 +328,8 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
Restored: a.Restored,
Value: strconv.FormatFloat(a.Value, 'f', -1, 32),
}
if notifier.AlertURLGeneratorFn != nil {
aa.SourceLink = notifier.AlertURLGeneratorFn(*a)
if alertURLGeneratorFn != nil {
aa.SourceLink = alertURLGeneratorFn(*a)
}
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
aa.Stabilizing = true
@@ -235,11 +337,9 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
return aa
}
// ToAPI returns ApiGroup representation of g
func (g *Group) ToAPI() *ApiGroup {
g.mu.RLock()
defer g.mu.RUnlock()
ag := ApiGroup{
func groupToAPI(g *rule.Group) *apiGroup {
g = g.DeepCopy()
ag := apiGroup{
// encode as string to avoid rounding
ID: strconv.FormatUint(g.GetID(), 10),
Name: g.Name,
@@ -259,9 +359,9 @@ func (g *Group) ToAPI() *ApiGroup {
if g.EvalDelay != nil {
ag.EvalDelay = g.EvalDelay.Seconds()
}
ag.Rules = make([]ApiRule, 0)
ag.Rules = make([]apiRule, 0)
for _, r := range g.Rules {
ag.Rules = append(ag.Rules, r.ToAPI())
ag.Rules = append(ag.Rules, ruleToAPI(r))
}
return &ag
}

View File

@@ -1,4 +1,4 @@
package rule
package main
import (
"fmt"
@@ -8,6 +8,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
)
func TestRecordingToApi(t *testing.T) {
@@ -16,7 +17,7 @@ func TestRecordingToApi(t *testing.T) {
Values: []float64{1}, Timestamps: []int64{0},
})
entriesLimit := 44
g := NewGroup(config.Group{
g := rule.NewGroup(config.Group{
Name: "group",
File: "rules.yaml",
Concurrency: 1,
@@ -30,24 +31,24 @@ func TestRecordingToApi(t *testing.T) {
},
},
}, fq, 1*time.Minute, nil)
rr := g.Rules[0].(*RecordingRule)
rr := g.Rules[0].(*rule.RecordingRule)
expectedRes := ApiRule{
expectedRes := apiRule{
Name: "record_name",
Query: "up",
Labels: map[string]string{"label": "value"},
Health: "ok",
Type: TypeRecording,
Type: ruleTypeRecording,
DatasourceType: "prometheus",
ID: "1248",
GroupID: fmt.Sprintf("%d", g.CreateID()),
GroupName: "group",
File: "rules.yaml",
MaxUpdates: 44,
Updates: make([]StateEntry, 0),
Updates: make([]rule.StateEntry, 0),
}
res := rr.ToAPI()
res := recordingToAPI(rr)
if !reflect.DeepEqual(res, expectedRes) {
t.Fatalf("expected to have: \n%v;\ngot: \n%v", expectedRes, res)

View File

@@ -27,9 +27,6 @@ vmauth-linux-ppc64le-prod:
vmauth-linux-386-prod:
APP_NAME=vmauth $(MAKE) app-via-docker-linux-386
vmauth-linux-s390x-prod:
APP_NAME=vmauth $(MAKE) app-via-docker-linux-s390x
vmauth-darwin-amd64-prod:
APP_NAME=vmauth $(MAKE) app-via-docker-darwin-amd64

View File

@@ -4,7 +4,6 @@ import (
"bytes"
"context"
"encoding/base64"
"errors"
"flag"
"fmt"
"math"
@@ -42,9 +41,6 @@ var (
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing for details")
defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+
"Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing")
defaultMergeQueryArgs = flagutil.NewArrayString("mergeQueryArgs", "An optional list of client query arg names, which must be merged with args at backend urls. "+
"The rest of client query args are replaced by the corresponding query args from backend urls for security reasons; "+
"see https://docs.victoriametrics.com/victoriametrics/vmauth/#query-args-handling")
discoverBackendIPsGlobal = flag.Bool("discoverBackendIPs", false, "Whether to discover backend IPs via periodic DNS queries to hostnames specified in url_prefix. "+
"This may be useful when url_prefix points to a hostname with dynamically scaled instances behind it. See https://docs.victoriametrics.com/victoriametrics/vmauth/#discovering-backend-ips")
discoverBackendIPsInterval = flag.Duration("discoverBackendIPsInterval", 10*time.Second, "The interval for re-discovering backend IPs if -discoverBackendIPs command-line flag is set. "+
@@ -79,7 +75,6 @@ type UserInfo struct {
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
TLSCAFile string `yaml:"tls_ca_file,omitempty"`
TLSCertFile string `yaml:"tls_cert_file,omitempty"`
@@ -95,8 +90,6 @@ type UserInfo struct {
rt http.RoundTripper
requests *metrics.Counter
requestErrors *metrics.Counter
backendRequests *metrics.Counter
backendErrors *metrics.Counter
requestsDuration *metrics.Summary
}
@@ -108,29 +101,13 @@ type HeadersConf struct {
KeepOriginalHost *bool `yaml:"keep_original_host,omitempty"`
}
func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
func (ui *UserInfo) beginConcurrencyLimit() error {
select {
case ui.concurrencyLimitCh <- struct{}{}:
return nil
default:
// The number of concurrently executed requests for the given user equals the limt.
// Wait until some of the currently executed requests are finished, so the current request could be executed.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
select {
case ui.concurrencyLimitCh <- struct{}{}:
return nil
case <-ctx.Done():
err := ctx.Err()
if errors.Is(err, context.DeadlineExceeded) {
// The current request couldn't be executed until the request timeout.
ui.concurrencyLimitReached.Inc()
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because %d concurrent requests from the user %s are executed",
*maxQueueDuration, ui.getMaxConcurrentRequests(), ui.name())
}
return fmt.Errorf("cannot start executing the request because %d concurrent requests from the user %s are executed: %w",
ui.getMaxConcurrentRequests(), ui.name(), err)
}
ui.concurrencyLimitReached.Inc()
return fmt.Errorf("cannot handle more than %d concurrent requests from user %s", ui.getMaxConcurrentRequests(), ui.name())
}
}
@@ -146,28 +123,6 @@ func (ui *UserInfo) getMaxConcurrentRequests() int {
return mcr
}
func (ui *UserInfo) stopHealthChecks() {
if ui == nil {
return
}
if ui.URLPrefix != nil {
bus := ui.URLPrefix.bus.Load()
bus.stopHealthChecks()
}
if ui.DefaultURL != nil {
bus := ui.DefaultURL.bus.Load()
bus.stopHealthChecks()
}
for i := range ui.URLMaps {
um := &ui.URLMaps[i]
if um.URLPrefix != nil {
bus := um.URLPrefix.bus.Load()
bus.stopHealthChecks()
}
}
}
// Header is `Name: Value` http header, which must be added to the proxied request.
type Header struct {
Name string
@@ -227,11 +182,6 @@ type URLMap struct {
// LoadBalancingPolicy is load balancing policy among UrlPrefix backends.
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
// MergeQueryArgs is a list of client query args, which must be merged with the existing backend query args.
//
// The rest of client query args are replaced with the corresponding backend query args for security reasons.
MergeQueryArgs []string `yaml:"merge_query_args,omitempty"`
// DropSrcPathPrefixParts is the number of `/`-delimited request path prefix parts to drop before proxying the request to backend.
DropSrcPathPrefixParts *int `yaml:"drop_src_path_prefix_parts,omitempty"`
}
@@ -278,7 +228,7 @@ func (qa *QueryArg) MarshalYAML() (any, error) {
return qa.sOriginal, nil
}
// URLPrefix represents the `url_prefix` from auth config.
// URLPrefix represents passed `url_prefix`
type URLPrefix struct {
// requests are re-tried on other backend urls for these http response status codes
retryStatusCodes []int
@@ -286,11 +236,6 @@ type URLPrefix struct {
// load balancing policy used
loadBalancingPolicy string
// the list of client query args, which must be merged with backend query args.
//
// By default backend query args replace all the client query args for security reasons.
mergeQueryArgs []string
// how many request path prefix parts to drop before routing the request to backendURL
dropSrcPathPrefixParts int
@@ -303,7 +248,7 @@ type URLPrefix struct {
// the list of backend urls
//
// the list can be dynamically updated if `discover_backend_ips` option is set.
bus atomic.Pointer[backendURLs]
bus atomic.Pointer[[]*backendURL]
// if this option is set, then backend ips for busOriginal are periodically re-discovered and put to bus.
discoverBackendIPs bool
@@ -327,91 +272,21 @@ func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
}
}
type backendURLs struct {
healthChecksContext context.Context
healthChecksCancel func()
healthChecksWG sync.WaitGroup
bus []*backendURL
}
func newBackendURLs() *backendURLs {
ctx, cancel := context.WithCancel(context.Background())
return &backendURLs{
healthChecksContext: ctx,
healthChecksCancel: cancel,
}
}
func (bus *backendURLs) add(u *url.URL) {
bus.bus = append(bus.bus, &backendURL{
url: u,
healthCheckContext: bus.healthChecksContext,
healthCheckWG: &bus.healthChecksWG,
})
}
func (bus *backendURLs) stopHealthChecks() {
bus.healthChecksCancel()
bus.healthChecksWG.Wait()
}
type backendURL struct {
broken atomic.Bool
healthCheckContext context.Context
healthCheckWG *sync.WaitGroup
brokenDeadline atomic.Uint64
concurrentRequests atomic.Int32
url *url.URL
}
func (bu *backendURL) isBroken() bool {
return bu.broken.Load()
ct := fasttime.UnixTimestamp()
return ct < bu.brokenDeadline.Load()
}
func (bu *backendURL) setBroken() {
if bu.broken.CompareAndSwap(false, true) {
bu.healthCheckWG.Go(func() {
bu.runHealthCheck()
bu.broken.Store(false)
})
}
}
func (bu *backendURL) runHealthCheck() {
port := bu.url.Port()
if port == "" {
port = "80"
}
addr := net.JoinHostPort(bu.url.Hostname(), port)
t := time.NewTicker(*failTimeout)
defer t.Stop()
for {
select {
case <-t.C:
// Verify network connectivity via TCP dial before marking backend healthy.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997
ctx, cancel := context.WithTimeout(bu.healthCheckContext, time.Second)
c, err := netutil.Dialer.DialContext(ctx, "tcp", addr)
cancel()
if err != nil {
if errors.Is(bu.healthCheckContext.Err(), context.Canceled) {
return
}
logger.Warnf("ignoring the backend at %s for %s because of dial error: %s", addr, *failTimeout, err)
continue
}
_ = c.Close()
return
case <-bu.healthCheckContext.Done():
return
}
}
deadline := fasttime.UnixTimestamp() + uint64((*failTimeout).Seconds())
bu.brokenDeadline.Store(deadline)
}
func (bu *backendURL) get() {
@@ -423,8 +298,8 @@ func (bu *backendURL) put() {
}
func (up *URLPrefix) getBackendsCount() int {
bus := up.bus.Load()
return len(bus.bus)
pbus := up.bus.Load()
return len(*pbus)
}
// getBackendURL returns the backendURL depending on the load balance policy.
@@ -435,15 +310,16 @@ func (up *URLPrefix) getBackendsCount() int {
func (up *URLPrefix) getBackendURL() *backendURL {
up.discoverBackendAddrsIfNeeded()
bus := up.bus.Load()
if len(bus.bus) == 0 {
pbus := up.bus.Load()
bus := *pbus
if len(bus) == 0 {
return nil
}
if up.loadBalancingPolicy == "first_available" {
return getFirstAvailableBackendURL(bus.bus)
return getFirstAvailableBackendURL(bus)
}
return getLeastLoadedBackendURL(bus.bus, &up.n)
return getLeastLoadedBackendURL(bus, &up.n)
}
func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
@@ -517,24 +393,25 @@ func (up *URLPrefix) discoverBackendAddrsIfNeeded() {
cancel()
// generate new backendURLs for the resolved IPs
busNew := newBackendURLs()
var busNew []*backendURL
for _, bu := range up.busOriginal {
host := bu.Hostname()
for _, addr := range hostToAddrs[host] {
buCopy := *bu
buCopy.Host = addr
busNew.add(&buCopy)
busNew = append(busNew, &backendURL{
url: &buCopy,
})
}
}
bus := up.bus.Load()
if areEqualBackendURLs(bus.bus, busNew.bus) {
pbus := up.bus.Load()
if areEqualBackendURLs(*pbus, busNew) {
return
}
// Store new backend urls
up.bus.Store(busNew)
bus.stopHealthChecks()
up.bus.Store(&busNew)
}
func areEqualBackendURLs(a, b []*backendURL) bool {
@@ -565,23 +442,20 @@ func getFirstAvailableBackendURL(bus []*backendURL) *backendURL {
for i := 1; i < len(bus); i++ {
if !bus[i].isBroken() {
bu = bus[i]
bu.get()
return bu
break
}
}
return nil
bu.get()
return bu
}
// getLeastLoadedBackendURL returns a non-broken backendURL with the lowest number of concurrent requests.
// getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
//
// backendURL.put() must be called on the returned backendURL after the request is complete.
func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *backendURL {
if len(bus) == 1 {
// Fast path - return the only backend url.
bu := bus[0]
if bu.isBroken() {
return nil
}
bu.get()
return bu
}
@@ -594,37 +468,27 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
if bu.isBroken() {
continue
}
// The Load() in front of CompareAndSwap() avoids CAS overhead for items with values bigger than 0.
if bu.concurrentRequests.Load() == 0 && bu.concurrentRequests.CompareAndSwap(0, 1) {
atomicCounter.CompareAndSwap(n+1, idx+1)
// There is no need in the call bu.get(), because we already incremented bu.concrrentRequests above.
if bu.concurrentRequests.Load() == 0 {
// Fast path - return the backend with zero concurrently executed requests.
// Do not use CompareAndSwap() instead of Load(), since it is much slower on systems with many CPU cores.
bu.concurrentRequests.Add(1)
return bu
}
}
// Slow path - return the backend with the minimum number of concurrently executed requests.
buMinIdx := n % uint32(len(bus))
minRequests := bus[buMinIdx].concurrentRequests.Load()
for i := uint32(1); i < uint32(len(bus)); i++ {
idx := (n + i) % uint32(len(bus))
bu := bus[idx]
buMin := bus[n%uint32(len(bus))]
minRequests := buMin.concurrentRequests.Load()
for _, bu := range bus {
if bu.isBroken() {
continue
}
reqs := bu.concurrentRequests.Load()
if reqs < minRequests || bus[buMinIdx].isBroken() {
buMinIdx = idx
minRequests = reqs
if n := bu.concurrentRequests.Load(); n < minRequests || buMin.isBroken() {
buMin = bu
minRequests = n
}
}
buMin := bus[buMinIdx]
if buMin.isBroken() {
return nil
}
buMin.get()
atomicCounter.CompareAndSwap(n+1, buMinIdx+1)
return buMin
}
@@ -741,9 +605,11 @@ func initAuthConfig() {
configTimestamp.Set(fasttime.UnixTimestamp())
stopCh = make(chan struct{})
authConfigWG.Go(func() {
authConfigWG.Add(1)
go func() {
defer authConfigWG.Done()
authConfigReloader(sighupCh)
})
}()
}
func stopAuthConfig() {
@@ -815,7 +681,7 @@ func reloadAuthConfig() (bool, error) {
ok, err := reloadAuthConfigData(data)
if err != nil {
return false, fmt.Errorf("failed to parse -auth.config=%q: %w", *authConfigPath, err)
return false, fmt.Errorf("failed to pars -auth.config=%q: %w", *authConfigPath, err)
}
if !ok {
return false, nil
@@ -845,11 +711,6 @@ func reloadAuthConfigData(data []byte) (bool, error) {
acPrev := authConfig.Load()
if acPrev != nil {
acPrev.UnauthorizedUser.stopHealthChecks()
for i := range acPrev.Users {
acPrev.Users[i].stopHealthChecks()
}
metrics.UnregisterSet(acPrev.ms, true)
}
metrics.RegisterSet(ac.ms)
@@ -896,8 +757,6 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
}
ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
ui.requestErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_errors_total` + metricLabels)
ui.backendRequests = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_requests_total` + metricLabels)
ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
@@ -946,8 +805,6 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
}
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
ui.requestErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_errors_total` + metricLabels)
ui.backendRequests = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_requests_total` + metricLabels)
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
mcr := ui.getMaxConcurrentRequests()
@@ -999,7 +856,6 @@ func (ui *UserInfo) getMetricLabels() (string, error) {
func (ui *UserInfo) initURLs() error {
retryStatusCodes := defaultRetryStatusCodes.Values()
loadBalancingPolicy := *defaultLoadBalancingPolicy
mergeQueryArgs := *defaultMergeQueryArgs
dropSrcPathPrefixParts := 0
discoverBackendIPs := *discoverBackendIPsGlobal
if ui.RetryStatusCodes != nil {
@@ -1008,9 +864,6 @@ func (ui *UserInfo) initURLs() error {
if ui.LoadBalancingPolicy != "" {
loadBalancingPolicy = ui.LoadBalancingPolicy
}
if len(ui.MergeQueryArgs) != 0 {
mergeQueryArgs = ui.MergeQueryArgs
}
if ui.DropSrcPathPrefixParts != nil {
dropSrcPathPrefixParts = *ui.DropSrcPathPrefixParts
}
@@ -1018,18 +871,16 @@ func (ui *UserInfo) initURLs() error {
discoverBackendIPs = *ui.DiscoverBackendIPs
}
up := ui.URLPrefix
if up != nil {
if err := up.sanitizeAndInitialize(); err != nil {
if ui.URLPrefix != nil {
if err := ui.URLPrefix.sanitizeAndInitialize(); err != nil {
return err
}
up.retryStatusCodes = retryStatusCodes
up.dropSrcPathPrefixParts = dropSrcPathPrefixParts
up.discoverBackendIPs = discoverBackendIPs
if err := up.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
ui.URLPrefix.retryStatusCodes = retryStatusCodes
ui.URLPrefix.dropSrcPathPrefixParts = dropSrcPathPrefixParts
ui.URLPrefix.discoverBackendIPs = discoverBackendIPs
if err := ui.URLPrefix.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
return err
}
up.mergeQueryArgs = mergeQueryArgs
}
if ui.DefaultURL != nil {
if err := ui.DefaultURL.sanitizeAndInitialize(); err != nil {
@@ -1048,7 +899,6 @@ func (ui *UserInfo) initURLs() error {
}
rscs := retryStatusCodes
lbp := loadBalancingPolicy
mqa := mergeQueryArgs
dsp := dropSrcPathPrefixParts
dbd := discoverBackendIPs
if e.RetryStatusCodes != nil {
@@ -1057,9 +907,6 @@ func (ui *UserInfo) initURLs() error {
if e.LoadBalancingPolicy != "" {
lbp = e.LoadBalancingPolicy
}
if len(e.MergeQueryArgs) != 0 {
mqa = e.MergeQueryArgs
}
if e.DropSrcPathPrefixParts != nil {
dsp = *e.DropSrcPathPrefixParts
}
@@ -1070,7 +917,6 @@ func (ui *UserInfo) initURLs() error {
if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil {
return err
}
e.URLPrefix.mergeQueryArgs = mqa
e.URLPrefix.dropSrcPathPrefixParts = dsp
e.URLPrefix.discoverBackendIPs = dbd
}
@@ -1182,11 +1028,13 @@ func (up *URLPrefix) sanitizeAndInitialize() error {
}
// Initialize up.bus
bus := newBackendURLs()
for _, bu := range up.busOriginal {
bus.add(bu)
bus := make([]*backendURL, len(up.busOriginal))
for i, bu := range up.busOriginal {
bus[i] = &backendURL{
url: bu,
}
}
up.bus.Store(bus)
up.bus.Store(&bus)
return nil
}

View File

@@ -280,7 +280,7 @@ users:
}
func TestParseAuthConfigSuccess(t *testing.T) {
f := func(s string, expectedAuthConfig map[string]*UserInfo, expectedUnauthorizedUserConfig *UserInfo) {
f := func(s string, expectedAuthConfig map[string]*UserInfo) {
t.Helper()
ac, err := parseAuthConfig([]byte(s))
if err != nil {
@@ -294,19 +294,15 @@ func TestParseAuthConfigSuccess(t *testing.T) {
if err := areEqualConfigs(m, expectedAuthConfig); err != nil {
t.Fatal(err)
}
if err := areEqualConfigs(ac.UnauthorizedUser, expectedUnauthorizedUserConfig); err != nil {
t.Fatal(err)
}
}
insecureSkipVerifyTrue := true
// Empty config
f(``, map[string]*UserInfo{}, nil)
f(``, map[string]*UserInfo{})
// Empty users
f(`users: []`, map[string]*UserInfo{}, nil)
f(`users: []`, map[string]*UserInfo{})
// Single user
f(`
@@ -324,7 +320,7 @@ users:
MaxConcurrentRequests: 5,
TLSInsecureSkipVerify: &insecureSkipVerifyTrue,
},
}, nil)
})
// Single user with auth_token
f(`
@@ -348,7 +344,7 @@ users:
TLSCertFile: "foo/baz",
TLSKeyFile: "foo/foo",
},
}, nil)
})
// Multiple url_prefix entries
insecureSkipVerifyFalse := false
@@ -363,7 +359,6 @@ users:
tls_insecure_skip_verify: false
retry_status_codes: [500, 501]
load_balancing_policy: first_available
merge_query_args: [foo, bar]
drop_src_path_prefix_parts: 1
discover_backend_ips: true
`, map[string]*UserInfo{
@@ -377,11 +372,10 @@ users:
TLSInsecureSkipVerify: &insecureSkipVerifyFalse,
RetryStatusCodes: []int{500, 501},
LoadBalancingPolicy: "first_available",
MergeQueryArgs: []string{"foo", "bar"},
DropSrcPathPrefixParts: intp(1),
DiscoverBackendIPs: &discoverBackendIPsTrue,
},
}, nil)
})
// Multiple users
f(`
@@ -399,7 +393,7 @@ users:
Username: "bar",
URLPrefix: mustParseURL("https://bar/x/"),
},
}, nil)
})
// non-empty URLMap
sharedUserInfo := &UserInfo{
@@ -449,7 +443,7 @@ users:
`, map[string]*UserInfo{
getHTTPAuthBearerToken("foo"): sharedUserInfo,
getHTTPAuthBasicToken("foo", ""): sharedUserInfo,
}, nil)
})
// Multiple users with the same name - this should work, since these users have different passwords
f(`
@@ -471,7 +465,7 @@ users:
Password: "bar",
URLPrefix: mustParseURL("https://bar/x"),
},
}, nil)
})
// with default url
keepOriginalHost := true
@@ -487,8 +481,6 @@ users:
- "foo: bar"
- "xxx: y"
keep_original_host: true
load_balancing_policy: first_available
merge_query_args: [foo, bar]
default_url:
- http://default1/select/0/prometheus
- http://default2/select/0/prometheus
@@ -513,8 +505,6 @@ users:
},
KeepOriginalHost: &keepOriginalHost,
},
LoadBalancingPolicy: "first_available",
MergeQueryArgs: []string{"foo", "bar"},
},
},
DefaultURL: mustParseURLs([]string{
@@ -542,8 +532,6 @@ users:
},
KeepOriginalHost: &keepOriginalHost,
},
LoadBalancingPolicy: "first_available",
MergeQueryArgs: []string{"foo", "bar"},
},
},
DefaultURL: mustParseURLs([]string{
@@ -551,7 +539,7 @@ users:
"http://default2/select/0/prometheus",
}),
},
}, nil)
})
// With metric_labels
f(`
@@ -603,23 +591,6 @@ users:
},
},
},
}, nil)
// unauthorized_user
f(`
unauthorized_user:
merge_query_args: [extra_filters]
url_map:
- src_paths: ["/select/.+"]
url_prefix: 'http://victoria-logs:9428/?extra_filters={env="prod"}'
`, nil, &UserInfo{
MergeQueryArgs: []string{"extra_filters"},
URLMaps: []URLMap{
{
SrcPaths: getRegexs([]string{"/select/.+"}),
URLPrefix: mustParseURL(`http://victoria-logs:9428/?extra_filters={env="prod"}`),
},
},
})
}
@@ -752,12 +723,10 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
})
up.loadBalancingPolicy = "least_loaded"
pbus := up.bus.Load()
bus := pbus.bus
fn := func(ns ...int) {
t.Helper()
pbus := up.bus.Load()
bus := *pbus
for i, b := range bus {
got := int(b.concurrentRequests.Load())
exp := ns[i]
@@ -769,52 +738,45 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
up.getBackendURL()
fn(1, 0, 0)
up.getBackendURL()
fn(1, 1, 0)
up.getBackendURL()
fn(1, 1, 1)
bus[1].put()
bus[2].put()
fn(1, 0, 0)
up.getBackendURL()
up.getBackendURL()
fn(2, 2, 1)
bus := up.bus.Load()
pbus := *bus
pbus[0].concurrentRequests.Add(2)
pbus[2].concurrentRequests.Add(5)
fn(4, 2, 6)
up.getBackendURL()
fn(1, 1, 0)
fn(4, 3, 6)
bus[1].put()
up.getBackendURL()
fn(1, 0, 1)
fn(4, 4, 6)
up.getBackendURL()
fn(4, 5, 6)
up.getBackendURL()
fn(5, 5, 6)
up.getBackendURL()
fn(6, 5, 6)
up.getBackendURL()
fn(6, 6, 6)
up.getBackendURL()
fn(6, 6, 7)
up.getBackendURL()
up.getBackendURL()
fn(1, 1, 2)
bus[0].concurrentRequests.Add(2)
bus[2].concurrentRequests.Add(2)
fn(3, 1, 4)
up.getBackendURL()
fn(3, 2, 4)
up.getBackendURL()
fn(3, 3, 4)
up.getBackendURL()
fn(4, 3, 4)
up.getBackendURL()
fn(4, 4, 4)
bus[0].put()
bus[2].put()
up.getBackendURL()
fn(3, 4, 4)
up.getBackendURL()
fn(4, 4, 4)
fn(7, 7, 7)
}
func TestBrokenBackend(t *testing.T) {
@@ -825,7 +787,7 @@ func TestBrokenBackend(t *testing.T) {
})
up.loadBalancingPolicy = "least_loaded"
pbus := up.bus.Load()
bus := pbus.bus
bus := *pbus
// explicitly mark one of the backends as broken
bus[1].setBroken()
@@ -848,7 +810,7 @@ func TestDiscoverBackendIPsWithIPV6(t *testing.T) {
up.discoverBackendAddrsIfNeeded()
pbus := up.bus.Load()
bus := pbus.bus
bus := *pbus
if len(bus) != 1 {
t.Fatalf("expected url list to be of size 1; got %d instead", len(bus))
@@ -922,7 +884,7 @@ func removeMetrics(m map[string]*UserInfo) {
}
}
func areEqualConfigs(a, b any) error {
func areEqualConfigs(a, b map[string]*UserInfo) error {
aData, err := yaml.Marshal(a)
if err != nil {
return fmt.Errorf("cannot marshal a: %w", err)
@@ -942,14 +904,16 @@ func mustParseURL(u string) *URLPrefix {
}
func mustParseURLs(us []string) *URLPrefix {
bus := newBackendURLs()
bus := make([]*backendURL, len(us))
urls := make([]*url.URL, len(us))
for i, u := range us {
pu, err := url.Parse(u)
if err != nil {
panic(fmt.Errorf("BUG: cannot parse %q: %w", u, err))
}
bus.add(pu)
bus[i] = &backendURL{
url: pu,
}
urls[i] = pu
}
up := &URLPrefix{}
@@ -958,7 +922,7 @@ func mustParseURLs(us []string) *URLPrefix {
} else {
up.vOriginal = us
}
up.bus.Store(bus)
up.bus.Store(&bus)
up.busOriginal = urls
return up
}

View File

@@ -24,7 +24,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ioutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
@@ -41,38 +40,22 @@ var (
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host")
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host. "+
"See also -maxConcurrentRequests")
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, "The timeout for HTTP keep-alive connections to backend services. "+
"It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services")
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
requestBufferSize = flagutil.NewBytes("requestBufferSize", 32*1024, "The size of the buffer for reading the request body before proxying the request to backends. "+
"This allows reducing the comsumption of backend resources when processing requests from clients connected via slow networks. "+
"Set to 0 to disable request buffering. See https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering")
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size to buffer in memory for potential retries at other backends. "+
"Request bodies larger than this size cannot be retried if the backend fails. Zero or negative value disables request body buffering and retries. "+
"See also -requestBufferSize")
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process simultaneously. "+
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
"This protects vmauth itself from overloading and out-of-memory (OOM) failures. See also -maxConcurrentPerUserRequests "+
"and https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 100, "The maximum number of concurrent requests vmauth can process per each configured user. "+
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
"This provides fairness and isolation between users, preventing a single user from consuming all the available resources. "+
"It works in conjunction with -maxConcurrentRequests, which sets the global limit across all users. "+
"This default can be overridden for individual users via max_concurrent_requests option in per-user config. "+
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
maxQueueDuration = flag.Duration("maxQueueDuration", 10*time.Second, "The maximum duration to wait before rejecting incoming requests if concurrency limit "+
"specified via -maxConcurrentRequests or -maxConcurrentPerUserRequests command-line flags is reached. "+
"Requests are rejected with '429 Too Many Requests' http status code if the limit is still reached after the -maxQueueDuration duration. "+
"This allows graceful handling of short spikes in concurrent requests. See https://docs.victoriametrics.com/victoriametrics/vmauth/#concurrency-limiting")
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process. Other requests are rejected with "+
"'429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options")
maxConcurrentPerUserRequests = flag.Int("maxConcurrentPerUserRequests", 300, "The maximum number of concurrent requests vmauth can process per each configured user. "+
"Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option "+
"in per-user config")
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
logInvalidAuthTokens = flag.Bool("logInvalidAuthTokens", false, "Whether to log requests with invalid auth tokens. "+
`Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page`)
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
failTimeout = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size, which can be cached and re-tried at other backends. "+
"Bigger values may require more memory. Zero or negative value disables caching of request body. This may be useful when proxying data ingestion requests")
backendTLSInsecureSkipVerify = flag.Bool("backend.tlsInsecureSkipVerify", false, "Whether to skip TLS verification when connecting to backends over HTTPS. "+
"See https://docs.victoriametrics.com/victoriametrics/vmauth/#backend-tls-setup")
backendTLSCAFile = flag.String("backend.TLSCAFile", "", "Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. "+
@@ -168,6 +151,7 @@ func requestHandlerWithInternalRoutes(w http.ResponseWriter, r *http.Request) bo
}
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
ats := getAuthTokensFromRequest(r)
if len(ats) == 0 {
// Process requests for unauthorized users
@@ -224,124 +208,26 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
ui.requests.Inc()
ctx, cancel := context.WithTimeout(r.Context(), *maxQueueDuration)
defer cancel()
// Acquire global concurrency limit.
if err := beginConcurrencyLimit(ctx); err != nil {
handleConcurrencyLimitError(w, r, err)
return
}
defer endConcurrencyLimit()
// Set read deadline for reading the initial chunk for the request body.
rc := http.NewResponseController(w)
deadline, ok := ctx.Deadline()
if !ok {
logger.Panicf("BUG: expecting valid deadline for the context")
}
if err := rc.SetReadDeadline(deadline); err != nil {
logger.Panicf("BUG: cannot set read deadline: %s", err)
}
// Read the initial chunk for the request body.
userName := ui.name()
if userName == "" {
userName = "unauthorized"
}
bb, err := bufferRequestBody(ctx, r.Body, userName)
if err != nil {
httpserver.Errorf(w, r, "%s", err)
return
}
r.Body = bb
// Disable the read deadline for the rest of the request body.
if err := rc.SetReadDeadline(time.Time{}); err != nil {
logger.Panicf("BUG: cannot reset read deadline: %s", err)
}
// Acquire concurrency limit for the given user.
if err := ui.beginConcurrencyLimit(ctx); err != nil {
handleConcurrencyLimitError(w, r, err)
return
}
defer ui.endConcurrencyLimit()
// Process the request.
processRequest(w, r, ui)
}
func beginConcurrencyLimit(ctx context.Context) error {
// Limit the concurrency of requests to backends
concurrencyLimitOnce.Do(concurrencyLimitInit)
select {
case concurrencyLimitCh <- struct{}{}:
return nil
if err := ui.beginConcurrencyLimit(); err != nil {
handleConcurrencyLimitError(w, r, err)
<-concurrencyLimitCh
return
}
default:
// The -maxConcurrentRequests are executed. Wait until some of the requests are finished,
// so the current request could be executed.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
select {
case concurrencyLimitCh <- struct{}{}:
return nil
case <-ctx.Done():
err := ctx.Err()
if errors.Is(err, context.DeadlineExceeded) {
// The current request couldn't be executed until the request timeout.
concurrentRequestsLimitReached.Inc()
return fmt.Errorf("cannot start executing the request during -maxQueueDuration=%s because -maxConcurrentRequests=%d concurrent requests are executed",
*maxQueueDuration, cap(concurrencyLimitCh))
}
return fmt.Errorf("cannot start executing the request because -maxConcurrentRequests=%d concurrent requests are executed: %w", cap(concurrencyLimitCh), err)
}
concurrentRequestsLimitReached.Inc()
err := fmt.Errorf("cannot serve more than -maxConcurrentRequests=%d concurrent requests", cap(concurrencyLimitCh))
handleConcurrencyLimitError(w, r, err)
return
}
}
func endConcurrencyLimit() {
processRequest(w, r, ui)
ui.endConcurrencyLimit()
<-concurrencyLimitCh
}
func bufferRequestBody(ctx context.Context, r io.ReadCloser, userName string) (io.ReadCloser, error) {
if r == nil {
// This is a GET request with nil reader.
return nil, nil
}
maxBufSize := max(requestBufferSize.IntN(), maxRequestBodySizeToRetry.IntN())
if maxBufSize <= 0 {
return r, nil
}
lr := ioutil.GetLimitedReader(r, int64(maxBufSize))
defer ioutil.PutLimitedReader(lr)
start := time.Now()
buf, err := io.ReadAll(lr)
bufferRequestBodyDuration.UpdateDuration(start)
if err != nil {
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
rejectSlowClientRequests.Inc()
d := time.Since(start)
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("reject request from the user %s because the request body couldn't be read in -maxQueueDuration=%s; read %d bytes in %s",
userName, *maxQueueDuration, len(buf), d.Truncate(time.Second)),
StatusCode: http.StatusBadRequest,
}
}
return nil, &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot read request body: %w", err),
StatusCode: http.StatusBadRequest,
}
}
bb := newBufferedBody(r, buf, maxBufSize)
return bb, nil
}
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
u := normalizeURL(r.URL)
up, hc := ui.getURLPrefixAndHeaders(u, r.Host, r.Header)
@@ -367,6 +253,9 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
isDefault = true
}
rtb := newReadTrackingBody(r.Body, maxRequestBodySizeToRetry.IntN())
r.Body = rtb
maxAttempts := up.getBackendsCount()
for i := 0; i < maxAttempts; i++ {
bu := up.getBackendURL()
@@ -374,19 +263,18 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
break
}
targetURL := bu.url
// Don't change path and add request_path query param for default route.
if isDefault {
// Don't change path and add request_path query param for default route.
query := targetURL.Query()
query.Set("request_path", u.String())
targetURL.RawQuery = query.Encode()
} else {
// Update path for regular routes.
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
} else { // Update path for regular routes.
targetURL = mergeURLs(targetURL, u, up.dropSrcPathPrefixParts)
}
wasLocalRetry := false
again:
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui, bu)
ok, needLocalRetry := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui)
if needLocalRetry && !wasLocalRetry {
wasLocalRetry = true
goto again
@@ -396,20 +284,17 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
if ok {
return
}
bu.setBroken()
ui.backendErrors.Inc()
}
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("all the %d backends for the user %q are unavailable", up.getBackendsCount(), ui.name()),
StatusCode: http.StatusBadGateway,
}
httpserver.Errorf(w, r, "%s", err)
ui.requestErrors.Inc()
ui.backendErrors.Inc()
}
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo, bu *backendURL) (bool, bool) {
ui.backendRequests.Inc()
func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int, ui *UserInfo) (bool, bool) {
req := sanitizeRequestHeaders(r)
req.URL = targetURL
@@ -423,19 +308,21 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
}
}
bb, bbOK := req.Body.(*bufferedBody)
canRetry := !bbOK || bb.canRetry()
rtb, rtbOK := req.Body.(*readTrackingBody)
res, err := ui.rt.RoundTrip(req)
if errors.Is(r.Context().Err(), context.Canceled) {
// Do not retry canceled requests.
clientCanceledRequests.Inc()
return true, false
}
if err != nil {
if !canRetry {
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
// Do not retry canceled or timed out requests
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
if errors.Is(err, context.DeadlineExceeded) {
// Timed out request must be counted as errors, since this usually means that the backend is slow.
ui.backendErrors.Inc()
}
return false, false
}
if !rtbOK || !rtb.canRetry() {
// Request body cannot be re-sent to another backend. Return the error to the client then.
err = &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("cannot proxy the request to %s: %w", targetURL, err),
@@ -443,51 +330,41 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
}
httpserver.Errorf(w, r, "%s", err)
ui.backendErrors.Inc()
ui.requestErrors.Inc()
bu.setBroken()
return true, false
}
if netutil.IsTrivialNetworkError(err) {
// Retry request at the same backend on trivial network errors, such as proxy idle timeout misconfiguration or socket close by OS
if bbOK {
bb.resetReader()
}
return false, true
}
// Retry the request at another backend
// Retry the request if its body wasn't read yet. This usually means that the backend isn't reachable.
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed: %s, retrying the request at another backend", remoteAddr, requestURI, targetURL, err)
if bbOK {
bb.resetReader()
}
// NOTE: do not use httpserver.GetRequestURI
// it explicitly reads request body, which may fail retries.
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because of response error: %s", remoteAddr, req.URL, targetURL, err)
return false, false
}
if slices.Contains(retryStatusCodes, res.StatusCode) {
if !canRetry {
_ = res.Body.Close()
if !rtbOK || !rtb.canRetry() {
// If we get an error from the retry_status_codes list, but cannot execute retry,
// we consider such a request an error as well.
err := &httpserver.ErrorWithStatusCode{
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request at another backend, because the request body has been already consumed",
Err: fmt.Errorf("got response status code=%d from %s, but cannot retry the request on another backend, because the request has been already consumed",
res.StatusCode, targetURL),
StatusCode: http.StatusServiceUnavailable,
}
httpserver.Errorf(w, r, "%s", err)
ui.backendErrors.Inc()
ui.requestErrors.Inc()
return true, false
}
// Retry requests at other backends if it matches retryStatusCodes.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; request to %s failed, retrying the request at another backend because response status code=%d belongs to retry_status_codes=%d",
remoteAddr, requestURI, targetURL, res.StatusCode, retryStatusCodes)
if bbOK {
bb.resetReader()
}
// NOTE: do not use httpserver.GetRequestURI
// it explicitly reads request body, which may fail retries.
logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because response status code=%d belongs to retry_status_codes=%d",
remoteAddr, req.URL, targetURL, res.StatusCode, retryStatusCodes)
return false, false
}
removeHopHeaders(res.Header)
@@ -495,61 +372,20 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
updateHeadersByConfig(w.Header(), hc.ResponseHeaders)
w.WriteHeader(res.StatusCode)
err = copyStreamToClient(w, res.Body)
copyBuf := copyBufPool.Get()
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
_, err = io.CopyBuffer(w, res.Body, copyBuf.B)
copyBufPool.Put(copyBuf)
_ = res.Body.Close()
if errors.Is(r.Context().Err(), context.Canceled) {
// Do not retry canceled requests.
clientCanceledRequests.Inc()
return true, false
}
if err != nil && !netutil.IsTrivialNetworkError(err) {
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
requestURI := httpserver.GetRequestURI(r)
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
ui.requestErrors.Inc()
return true, false
}
return true, false
}
func copyStreamToClient(client io.Writer, backend io.Reader) error {
copyBuf := copyBufPool.Get()
copyBuf.B = bytesutil.ResizeNoCopyNoOverallocate(copyBuf.B, 16*1024)
defer copyBufPool.Put(copyBuf)
buf := copyBuf.B
flusher, ok := client.(http.Flusher)
if !ok {
logger.Panicf("BUG: client must implement net/http.Flusher interface; got %T", client)
}
for {
n, backendErr := backend.Read(buf)
if n > 0 {
data := buf[:n]
n, clientErr := client.Write(data)
if clientErr != nil {
return fmt.Errorf("cannot write data to client: %w", clientErr)
}
if n != len(data) {
logger.Panicf("BUG: unexpected number of bytes written returned by client.Write; got %d; want %d", n, len(data))
}
// Flush the read data from the backend to the client as fast as possible
// in order to reduce delays for data propagation.
// See https://github.com/VictoriaMetrics/VictoriaLogs/issues/667
flusher.Flush()
}
if backendErr != nil {
if backendErr == io.EOF {
return nil
}
return fmt.Errorf("cannot read data from backend: %w", backendErr)
}
}
}
var copyBufPool bytesutil.ByteBufferPool
func copyHeader(dst, src http.Header) {
@@ -636,10 +472,6 @@ var (
configReloadRequests = metrics.NewCounter(`vmauth_http_requests_total{path="/-/reload"}`)
invalidAuthTokenRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="invalid_auth_token"}`)
missingRouteRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`)
clientCanceledRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="client_canceled"}`)
rejectSlowClientRequests = metrics.NewCounter(`vmauth_http_request_errors_total{reason="reject_slow_client"}`)
bufferRequestBodyDuration = metrics.NewSummary(`vmauth_buffer_request_body_duration_seconds`)
)
func newRoundTripper(caFileOpt, certFileOpt, keyFileOpt, serverNameOpt string, insecureSkipVerifyP *bool) (http.RoundTripper, error) {
@@ -723,13 +555,6 @@ func handleMissingAuthorizationError(w http.ResponseWriter) {
}
func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err error) {
if errors.Is(r.Context().Err(), context.Canceled) {
// Do not return any response for the request canceled by the client,
// since the connection to the client is already closed.
clientCanceledRequests.Inc()
return
}
w.Header().Add("Retry-After", "10")
err = &httpserver.ErrorWithStatusCode{
Err: err,
@@ -738,76 +563,120 @@ func handleConcurrencyLimitError(w http.ResponseWriter, r *http.Request, err err
httpserver.Errorf(w, r, "%s", err)
}
// bufferedBody serves two purposes:
// 1. Enables request retries when the body size does not exceed maxBodySize
// by fully buffering the body in memory.
// 2. Prevents slow clients from reducing effective server capacity by
// buffering the request body before acquiring a per-user concurrency slot.
//
// See bufferRequestBody for details on how bufferedBody is used.
type bufferedBody struct {
// r contains reader for reading the data after buf is read.
// readTrackingBody must be obtained via getReadTrackingBody()
type readTrackingBody struct {
// maxBodySize is the maximum body size to cache in buf.
//
// r is nil if buf contains all the data.
// Bigger bodies cannot be retried.
maxBodySize int
// r contains reader for initial data reading
r io.ReadCloser
// buf contains the initial buffer read from r.
// buf is a buffer for data read from r. Buf size is limited by maxBodySize.
// If more than maxBodySize is read from r, then cannotRetry is set to true.
buf []byte
// bufOffset is the offset at buf for already read bytes.
bufOffset int
// readBuf points to the cached data at buf, which must be read in the next call to Read().
readBuf []byte
// cannotRetry is set to true after Close() call on non-nil r.
// cannotRetry is set to true when more than maxBodySize bytes are read from r.
// In this case the read data cannot fit buf, so it cannot be re-read from buf.
cannotRetry bool
// bufComplete is set to true when buf contains complete request body read from r.
bufComplete bool
}
func newBufferedBody(r io.ReadCloser, buf []byte, maxBufSize int) *bufferedBody {
// Do not use sync.Pool here, since http.RoundTrip may still use request body after return.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
if len(buf) < maxBufSize {
// Read the full request body into buf.
r = nil
func newReadTrackingBody(r io.ReadCloser, maxBodySize int) *readTrackingBody {
// do not use sync.Pool there
// since http.RoundTrip may still use request body after return
// See this issue for details https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
rtb := &readTrackingBody{}
if maxBodySize < 0 {
maxBodySize = 0
}
rtb.maxBodySize = maxBodySize
return &bufferedBody{
r: r,
buf: buf,
if r == nil {
// This is GET request without request body
r = (*zeroReader)(nil)
}
rtb.r = r
return rtb
}
// Read implements io.Reader interface.
func (bb *bufferedBody) Read(p []byte) (int, error) {
if bb.cannotRetry {
return 0, fmt.Errorf("cannot read already closed body")
}
if bb.bufOffset < len(bb.buf) {
n := copy(p, bb.buf[bb.bufOffset:])
bb.bufOffset += n
return n, nil
}
if bb.r == nil {
return 0, io.EOF
}
return bb.r.Read(p)
}
type zeroReader struct{}
func (bb *bufferedBody) canRetry() bool {
return bb.r == nil
func (r *zeroReader) Read(_ []byte) (int, error) {
return 0, io.EOF
}
// Close implements io.Closer interface.
func (bb *bufferedBody) Close() error {
bb.resetReader()
if bb.r != nil {
bb.cannotRetry = true
return bb.r.Close()
}
func (r *zeroReader) Close() error {
return nil
}
func (bb *bufferedBody) resetReader() {
bb.bufOffset = 0
// Read implements io.Reader interface.
func (rtb *readTrackingBody) Read(p []byte) (int, error) {
if len(rtb.readBuf) > 0 {
n := copy(p, rtb.readBuf)
rtb.readBuf = rtb.readBuf[n:]
return n, nil
}
if rtb.r == nil {
if rtb.bufComplete {
return 0, io.EOF
}
return 0, fmt.Errorf("cannot read client request body after closing client reader")
}
n, err := rtb.r.Read(p)
if rtb.cannotRetry {
return n, err
}
if len(rtb.buf)+n > rtb.maxBodySize {
rtb.cannotRetry = true
return n, err
}
rtb.buf = append(rtb.buf, p[:n]...)
if err == io.EOF {
rtb.bufComplete = true
}
return n, err
}
func (rtb *readTrackingBody) canRetry() bool {
if rtb.cannotRetry {
return false
}
if rtb.bufComplete {
return true
}
return rtb.r != nil
}
// Close implements io.Closer interface.
func (rtb *readTrackingBody) Close() error {
if !rtb.cannotRetry {
rtb.readBuf = rtb.buf
} else {
rtb.readBuf = nil
}
// Close rtb.r only if the request body is completely read or if it is too big.
// http.Roundtrip performs body.Close call even without any Read calls,
// so this hack allows us to reuse request body.
if rtb.bufComplete || rtb.cannotRetry {
if rtb.r == nil {
return nil
}
err := rtb.r.Close()
rtb.r = nil
return err
}
return nil
}
func debugInfo(u *url.URL, r *http.Request) string {

View File

@@ -2,7 +2,6 @@ package main
import (
"bytes"
"context"
"fmt"
"io"
"net"
@@ -11,7 +10,6 @@ import (
"strings"
"sync/atomic"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
)
@@ -516,11 +514,6 @@ func (w *fakeResponseWriter) getResponse() string {
return w.bb.String()
}
// Flush implements net/http.Flusher
func (w *fakeResponseWriter) Flush() {
// Nothing to do.
}
func (w *fakeResponseWriter) Header() http.Header {
if w.h == nil {
w.h = http.Header{}
@@ -548,300 +541,28 @@ func (w *fakeResponseWriter) WriteHeader(statusCode int) {
}
}
// This is needed for net/http.ResponseController
func (w *fakeResponseWriter) SetReadDeadline(deadline time.Time) error {
return nil
}
func TestBufferRequestBody_Success(t *testing.T) {
defaultRequestBufferSize := requestBufferSize.String()
defer func() {
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
t.Fatalf("cannot reset requestBufferSize: %s", err)
}
}()
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
defer func() {
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
}
}()
f := func(body *bytes.Buffer, requestBufferSizeFlag, maxRequestBodySizeToRetryFlag string) {
t.Helper()
expectedResponse := "statusCode=200"
if body.Len() > 0 {
expectedResponse += "\n" + body.String()
}
if err := requestBufferSize.Set(requestBufferSizeFlag); err != nil {
t.Fatalf("cannot set requestBufferSize: %s", err)
}
if err := maxRequestBodySizeToRetry.Set(maxRequestBodySizeToRetryFlag); err != nil {
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
}
var backendCalled bool
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
backendCalled = true
b, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, fmt.Sprintf("cannot read body: %s", err), http.StatusBadRequest)
return
}
if _, err := w.Write(b); err != nil {
http.Error(w, fmt.Sprintf("cannot write body: %s", err), http.StatusInternalServerError)
return
}
}))
defer ts.Close()
// regular url_prefix
cfgStr := strings.ReplaceAll(`
unauthorized_user:
url_prefix: {BACKEND}/foo`, "{BACKEND}", ts.URL)
cfgOrigP := authConfigData.Load()
if _, err := reloadAuthConfigData([]byte(cfgStr)); err != nil {
t.Fatalf("cannot load config data: %s", err)
}
defer func() {
cfgOrig := []byte("unauthorized_user:\n url_prefix: http://foo/bar")
if cfgOrigP != nil {
cfgOrig = *cfgOrigP
}
_, err := reloadAuthConfigData(cfgOrig)
if err != nil {
t.Fatalf("cannot load the original config: %s", err)
}
}()
r, err := http.NewRequest(http.MethodPost, `http://some-host.com`, body)
if err != nil {
t.Fatalf("cannot initialize http request: %s", err)
}
w := &fakeResponseWriter{}
if !requestHandlerWithInternalRoutes(w, r) {
t.Fatalf("unexpected false is returned from requestHandler")
}
response := w.getResponse()
response = strings.ReplaceAll(response, "\r\n", "\n")
response = strings.TrimSpace(response)
if response != expectedResponse {
t.Fatalf("unexpected response\ngot\n%s\nwant\n%s", response, expectedResponse)
}
if !backendCalled {
t.Fatalf("backend is not called")
}
}
// no body, no buffering, no retry
f(bytes.NewBuffer(nil), "0", "0")
// no body, buffering on, no retry
f(bytes.NewBuffer(nil), "100", "0")
// no body, no buffering, retry on
f(bytes.NewBuffer(nil), "0", "100")
// no body, buffering on, retry on
f(bytes.NewBuffer(nil), "100", "100")
// body smaller than buffer, retry max on
f(bytes.NewBufferString(strings.Repeat("abcdf", 100)), "101", "101")
// body smaller than buffer
f(bytes.NewBufferString(strings.Repeat("abcdf", 100)), "501", "0")
// body same size as buffer
f(bytes.NewBufferString(strings.Repeat("abcdf", 100)), "500", "0")
// body bigger than a buffer
f(bytes.NewBufferString(strings.Repeat("abcdf", 100)), "499", "0")
// body bigger than tmpBuf 8KiB used in buffering
f(bytes.NewBufferString(strings.Repeat("a", 32*1024)), "16384", "")
f(bytes.NewBufferString(strings.Repeat("a", 32*1024)), "16385", "")
f(bytes.NewBufferString(strings.Repeat("a", 32*1024)), "16383", "")
}
func TestBufferRequestBody_Failure(t *testing.T) {
defaultRequestBufferSize := requestBufferSize.String()
defer func() {
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
t.Fatalf("cannot reset requestBufferSize: %s", err)
}
}()
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
defer func() {
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
}
}()
defaultMaxQueueDuration := *maxQueueDuration
defer func() {
*maxQueueDuration = defaultMaxQueueDuration
}()
f := func(body *mockBody, expectedResponse string) {
t.Helper()
if err := maxRequestBodySizeToRetry.Set("0"); err != nil {
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
}
if err := requestBufferSize.Set("2048"); err != nil {
t.Fatalf("cannot set requestBufferSize: %s", err)
}
*maxQueueDuration = 100 * time.Millisecond
var backendCalled bool
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
backendCalled = true
b, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, fmt.Sprintf("cannot read body: %s", err), http.StatusBadRequest)
return
}
if _, err := w.Write(b); err != nil {
http.Error(w, fmt.Sprintf("cannot write body: %s", err), http.StatusInternalServerError)
return
}
}))
defer ts.Close()
// regular url_prefix
cfgStr := strings.ReplaceAll(`
unauthorized_user:
url_prefix: {BACKEND}/foo`, "{BACKEND}", ts.URL)
cfgOrigP := authConfigData.Load()
if _, err := reloadAuthConfigData([]byte(cfgStr)); err != nil {
t.Fatalf("cannot load config data: %s", err)
}
defer func() {
cfgOrig := []byte("unauthorized_user:\n url_prefix: http://foo/bar")
if cfgOrigP != nil {
cfgOrig = *cfgOrigP
}
_, err := reloadAuthConfigData(cfgOrig)
if err != nil {
t.Fatalf("cannot load the original config: %s", err)
}
}()
r, err := http.NewRequest(http.MethodPost, `http://some-host.com`, body)
if err != nil {
t.Fatalf("cannot initialize http request: %s", err)
}
w := &fakeResponseWriter{}
if !requestHandlerWithInternalRoutes(w, r) {
t.Fatalf("unexpected false is returned from requestHandler")
}
response := w.getResponse()
response = strings.ReplaceAll(response, "\r\n", "\n")
response = strings.TrimSpace(response)
if response != expectedResponse {
t.Fatalf("unexpected response\ngot\n%s\nwant\n%s", response, expectedResponse)
}
if backendCalled {
t.Fatalf("backend is called")
}
}
// an error at the beginning of reading
f(&mockBody{err: fmt.Errorf("an error")}, `statusCode=400
cannot read request body: an error`)
// an error after reading 1024 bytes, buffer size is 2048 bytes
f(&mockBody{head: make([]byte, 1024), err: fmt.Errorf("an error")}, `statusCode=400
cannot read request body: an error`)
}
type mockBody struct {
head []byte
err error
tail []byte
}
func (r *mockBody) Read(p []byte) (n int, err error) {
if len(r.head) > 0 {
n = copy(p, r.head)
r.head = r.head[n:]
return n, nil
}
if r.err != nil {
return 0, r.err
}
if len(r.tail) > 0 {
n = copy(p, r.tail)
r.tail = r.tail[n:]
return n, nil
}
return 0, io.EOF
}
func TestBufferedBody_RetrySuccess(t *testing.T) {
func TestReadTrackingBody_RetrySuccess(t *testing.T) {
f := func(s string, maxBodySize int) {
t.Helper()
defaultRequestBufferSize := requestBufferSize.String()
defer func() {
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
t.Fatalf("cannot reset requestBufferSize: %s", err)
}
}()
if err := requestBufferSize.Set(fmt.Sprintf("%d", maxBodySize)); err != nil {
t.Fatalf("cannot set requestBufferSize: %s", err)
}
rtb := newReadTrackingBody(io.NopCloser(bytes.NewBufferString(s)), maxBodySize)
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
defer func() {
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
}
}()
if err := maxRequestBodySizeToRetry.Set("0"); err != nil {
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
}
ctx := context.Background()
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
bb, ok := rb.(*bufferedBody)
canRetry := !ok || bb.canRetry()
if !canRetry {
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true before reading anything")
}
for i := 0; i < 5; i++ {
data, err := io.ReadAll(rb)
data, err := io.ReadAll(rtb)
if err != nil {
t.Fatalf("unexpected error when reading all the data at iteration %d: %s", i, err)
}
if string(data) != s {
t.Fatalf("unexpected data read at iteration %d\ngot\n%s\nwant\n%s", i, data, s)
}
if err := rb.Close(); err != nil {
t.Fatalf("unexpected error when closing bufferedBody at iteration %d: %s", i, err)
if err := rtb.Close(); err != nil {
t.Fatalf("unexpected error when closing readTrackingBody at iteration %d: %s", i, err)
}
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true at iteration %d", i)
}
}
}
@@ -851,48 +572,19 @@ func TestBufferedBody_RetrySuccess(t *testing.T) {
f("", 100)
f("foo", 100)
f("foobar", 100)
f(newTestString(1000), 1001)
f(newTestString(1000), 1000)
}
func TestBufferedBody_RetrySuccessPartialRead(t *testing.T) {
func TestReadTrackingBody_RetrySuccessPartialRead(t *testing.T) {
f := func(s string, maxBodySize int) {
t.Helper()
// Check the case with partial read
defaultRequestBufferSize := requestBufferSize.String()
defer func() {
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
t.Fatalf("cannot reset requestBufferSize: %s", err)
}
}()
if err := requestBufferSize.Set(fmt.Sprintf("%d", maxBodySize)); err != nil {
t.Fatalf("cannot set requestBufferSize: %s", err)
}
rtb := newReadTrackingBody(io.NopCloser(bytes.NewBufferString(s)), maxBodySize)
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
defer func() {
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
}
}()
if err := maxRequestBodySizeToRetry.Set("0"); err != nil {
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
}
ctx := context.Background()
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
bb, ok := rb.(*bufferedBody)
canRetry := !ok || bb.canRetry()
if !canRetry {
t.Fatalf("canRetry must return true")
}
for i := 0; i < len(s); i++ {
buf := make([]byte, i)
n, err := io.ReadFull(rb, buf)
n, err := io.ReadFull(rtb, buf)
if err != nil {
t.Fatalf("unexpected error when reading %d bytes: %s", i, err)
}
@@ -902,20 +594,26 @@ func TestBufferedBody_RetrySuccessPartialRead(t *testing.T) {
if string(buf) != s[:i] {
t.Fatalf("unexpected data read with the length %d\ngot\n%s\nwant\n%s", i, buf, s[:i])
}
if err := rb.Close(); err != nil {
if err := rtb.Close(); err != nil {
t.Fatalf("unexpected error when closing reader after reading %d bytes", i)
}
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true after closing the reader after reading %d bytes", i)
}
}
data, err := io.ReadAll(rb)
data, err := io.ReadAll(rtb)
if err != nil {
t.Fatalf("unexpected error when reading all the data: %s", err)
}
if string(data) != s {
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", data, s)
}
if err := rb.Close(); err != nil {
t.Fatalf("unexpected error when closing bufferedBody: %s", err)
if err := rtb.Close(); err != nil {
t.Fatalf("unexpected error when closing readTrackingBody: %s", err)
}
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true after closing the reader after reading all the input")
}
}
@@ -924,53 +622,30 @@ func TestBufferedBody_RetrySuccessPartialRead(t *testing.T) {
f("", 100)
f("foo", 100)
f("foobar", 100)
f(newTestString(1000), 1001)
f(newTestString(1000), 1000)
}
func TestBufferedBody_RetryFailureTooBigBody(t *testing.T) {
func TestReadTrackingBody_RetryFailureTooBigBody(t *testing.T) {
f := func(s string, maxBodySize int) {
t.Helper()
defaultRequestBufferSize := requestBufferSize.String()
defer func() {
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
t.Fatalf("cannot reset requestBufferSize: %s", err)
}
}()
if err := requestBufferSize.Set("0"); err != nil {
t.Fatalf("cannot set requestBufferSize: %s", err)
}
rtb := newReadTrackingBody(io.NopCloser(bytes.NewBufferString(s)), maxBodySize)
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
defer func() {
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
}
}()
if err := maxRequestBodySizeToRetry.Set(fmt.Sprintf("%d", maxBodySize)); err != nil {
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
}
ctx := context.Background()
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
bb, ok := rb.(*bufferedBody)
canRetry := !ok || bb.canRetry()
if canRetry {
t.Fatalf("canRetry() must return false because of too big request body")
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true before reading anything")
}
buf := make([]byte, 1)
n, err := io.ReadFull(rb, buf)
n, err := io.ReadFull(rtb, buf)
if err != nil {
t.Fatalf("unexpected error when reading a single byte: %s", err)
}
if n != 1 {
t.Fatalf("unexpected number of bytes read; got %d; want 1", n)
}
data, err := io.ReadAll(rb)
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true after reading one byte")
}
data, err := io.ReadAll(rtb)
if err != nil {
t.Fatalf("unexpected error when reading all the data: %s", err)
}
@@ -978,11 +653,14 @@ func TestBufferedBody_RetryFailureTooBigBody(t *testing.T) {
if dataRead != s {
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", dataRead, s)
}
if err := rb.Close(); err != nil {
t.Fatalf("unexpected error when closing bufferedBody: %s", err)
if err := rtb.Close(); err != nil {
t.Fatalf("unexpected error when closing readTrackingBody: %s", err)
}
if rtb.canRetry() {
t.Fatalf("canRetry() must return false after closing the reader")
}
data, err = io.ReadAll(rb)
data, err = io.ReadAll(rtb)
if err == nil {
t.Fatalf("expecting non-nil error")
}
@@ -996,48 +674,35 @@ func TestBufferedBody_RetryFailureTooBigBody(t *testing.T) {
f(newTestString(2*maxBodySize), maxBodySize)
}
func TestBufferedBody_RetryFailureZeroOrNegativeMaxBodySize(t *testing.T) {
func TestReadTrackingBody_RetryFailureZeroOrNegativeMaxBodySize(t *testing.T) {
f := func(s string, maxBodySize int) {
t.Helper()
defaultRequestBufferSize := requestBufferSize.String()
defer func() {
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
t.Fatalf("cannot reset requestBufferSize: %s", err)
}
}()
if err := requestBufferSize.Set(fmt.Sprintf("%d", maxBodySize)); err != nil {
t.Fatalf("cannot set requestBufferSize: %s", err)
}
rtb := newReadTrackingBody(io.NopCloser(bytes.NewBufferString(s)), maxBodySize)
ctx := context.Background()
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
bb, ok := rb.(*bufferedBody)
canRetry := !ok || bb.canRetry()
if !canRetry {
if !rtb.canRetry() {
t.Fatalf("canRetry() must return true before reading anything")
}
data, err := io.ReadAll(rb)
data, err := io.ReadAll(rtb)
if err != nil {
t.Fatalf("unexpected error when reading all the data: %s", err)
}
if string(data) != s {
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", data, s)
}
if err := rb.Close(); err != nil {
t.Fatalf("unexpected error when closing bufferedBody: %s", err)
if err := rtb.Close(); err != nil {
t.Fatalf("unexpected error when closing readTrackingBody: %s", err)
}
data, err = io.ReadAll(rb)
if err != nil {
t.Fatalf("unexpected error in io.ReadAll: %s", err)
if rtb.canRetry() {
t.Fatalf("canRetry() must return false after closing the reader")
}
if string(data) != s {
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", data, s)
data, err = io.ReadAll(rtb)
if err == nil {
t.Fatalf("expecting non-nil error")
}
if len(data) != 0 {
t.Fatalf("unexpected non-empty data read: %q", data)
}
}

View File

@@ -8,42 +8,29 @@ import (
"strings"
)
func mergeURLs(uiURL, requestURI *url.URL, dropSrcPathPrefixParts int, mergeQueryArgs []string) *url.URL {
func mergeURLs(uiURL, requestURI *url.URL, dropSrcPathPrefixParts int) *url.URL {
targetURL := *uiURL
srcPath := dropPrefixParts(requestURI.Path, dropSrcPathPrefixParts)
if strings.HasPrefix(srcPath, "/") {
targetURL.Path = strings.TrimSuffix(targetURL.Path, "/")
}
targetURL.Path += srcPath
requestParams := requestURI.Query()
// fast path
if len(requestParams) == 0 {
return &targetURL
}
// Merge client query args with backend query args
targetParams := targetURL.Query()
uiParams := url.Values{}
// Copy all the target query args
for k, v := range targetParams {
for i := range v {
uiParams.Add(k, v[i])
}
}
// Copy the client query args if they do not clash with target args.
// merge query parameters from requests.
uiParams := targetURL.Query()
for k, v := range requestParams {
if targetParams.Has(k) && !slices.Contains(mergeQueryArgs, k) {
// Skip clashed client query params for security reasons
// skip clashed query params from original request
if exist := uiParams.Get(k); len(exist) > 0 {
continue
}
for i := range v {
uiParams.Add(k, v[i])
}
}
targetURL.RawQuery = uiParams.Encode()
return &targetURL
}

View File

@@ -101,7 +101,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
return
}
bu := up.getBackendURL()
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts)
bu.put()
gotTarget := target.String()
@@ -352,7 +352,7 @@ func TestUserInfoGetBackendURL_SRV(t *testing.T) {
return
}
bu := up.getBackendURL()
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts, up.mergeQueryArgs)
target := mergeURLs(bu.url, u, up.dropSrcPathPrefixParts)
bu.put()
gotTarget := target.String()
@@ -528,43 +528,3 @@ func (r *fakeResolver) LookupIPAddr(_ context.Context, host string) ([]net.IPAdd
func (r *fakeResolver) LookupMX(_ context.Context, _ string) ([]*net.MX, error) {
return nil, nil
}
func TestMergeURLs(t *testing.T) {
f := func(clientURL, backendURL string, dropSrcPathPrefixParts int, mergeQueryArgs []string, resultURLExpected string) {
t.Helper()
cu, err := url.Parse(clientURL)
if err != nil {
t.Fatalf("cannot parse client url %q: %s", clientURL, err)
}
cu = normalizeURL(cu)
bu, err := url.Parse(backendURL)
if err != nil {
t.Fatalf("cannot parse backend url %q: %s", backendURL, err)
}
ru := mergeURLs(bu, cu, dropSrcPathPrefixParts, mergeQueryArgs)
resultURL := ru.String()
if resultURL != resultURLExpected {
t.Fatalf("unexpected resultURL\ngot\n%s\nwant\n%s", resultURL, resultURLExpected)
}
}
f("http://foo:1234", "https://backend/foo/bar?baz=abc&de", 0, nil, "https://backend/foo/bar?baz=abc&de")
f("http://foo:1234", "https://backend/foo/bar/?baz=abc&de", 0, nil, "https://backend/foo/bar/?baz=abc&de")
f("https://foo:1234/", "https://backend/foo/bar?baz=abc&de", 0, nil, "https://backend/foo/bar?baz=abc&de")
f("https://foo:1234/", "http://backend:8888/foo/bar/?baz=abc&de", 0, nil, "http://backend:8888/foo/bar/?baz=abc&de")
// merge paths
f("http://foo:1234/x/y?z=xxx", "https://backend/foo/bar?baz=abc&de", 0, nil, "https://backend/foo/bar/x/y?baz=abc&de=&z=xxx")
// "hacky" url
f("http://foo:1234/../../x/../y?z=xxx", "https://backend/foo/bar?baz=abc&de", 0, nil, "https://backend/foo/bar/y?baz=abc&de=&z=xxx")
// make sure that the client args are overridden by server args by default
f("http://foo:1234/x/y?password=hack&qqq=www", "https://backend/foo/bar?password=abc", 0, nil, "https://backend/foo/bar/x/y?password=abc&qqq=www")
// allow overriding the selected query args
f("http://foo:1234/x/y?baz=xxx&qqq=www", "https://backend/foo/bar?baz=abc", 0, []string{"baz"}, "https://backend/foo/bar/x/y?baz=abc&baz=xxx&qqq=www")
}

View File

@@ -31,9 +31,6 @@ vmbackup-linux-ppc64le-prod:
vmbackup-linux-386-prod:
APP_NAME=vmbackup EXTRA_GO_BUILD_TAGS=$(VMBACKUP_GO_BUILD_TAGS) $(MAKE) app-via-docker-linux-386
vmbackup-linux-s390x-prod:
APP_NAME=vmbackup EXTRA_GO_BUILD_TAGS=$(VMBACKUP_GO_BUILD_TAGS) $(MAKE) app-via-docker-linux-s390x
vmbackup-darwin-amd64-prod:
APP_NAME=vmbackup EXTRA_GO_BUILD_TAGS=$(VMBACKUP_GO_BUILD_TAGS) $(MAKE) app-via-docker-darwin-amd64

View File

@@ -115,7 +115,7 @@ func main() {
if err != nil {
logger.Fatalf("cannot create backup: %s", err)
}
pushmetrics.StopAndPush()
pushmetrics.Stop()
startTime := time.Now()
logger.Infof("gracefully shutting down http server for metrics at %q", listenAddrs)
@@ -212,7 +212,7 @@ func newSrcFS() (*fslocal.FS, error) {
}
func newDstFS(ctx context.Context) (common.RemoteFS, error) {
fs, err := actions.NewRemoteFS(ctx, *dst, nil)
fs, err := actions.NewRemoteFS(ctx, *dst)
if err != nil {
return nil, fmt.Errorf("cannot parse `-dst`=%q: %w", *dst, err)
}
@@ -255,7 +255,7 @@ func newOriginFS(ctx context.Context) (common.OriginFS, error) {
if len(*origin) == 0 {
return &fsnil.FS{}, nil
}
fs, err := actions.NewRemoteFS(ctx, *origin, nil)
fs, err := actions.NewRemoteFS(ctx, *origin)
if err != nil {
return nil, fmt.Errorf("cannot parse `-origin`=%q: %w", *origin, err)
}
@@ -266,7 +266,7 @@ func newRemoteOriginFS(ctx context.Context) (common.RemoteFS, error) {
if len(*origin) == 0 {
return nil, fmt.Errorf("-origin cannot be empty when -snapshotName and -snapshot.createURL aren't set")
}
fs, err := actions.NewRemoteFS(ctx, *origin, nil)
fs, err := actions.NewRemoteFS(ctx, *origin)
if err != nil {
return nil, fmt.Errorf("cannot parse `-origin`=%q: %w", *origin, err)
}

View File

@@ -27,9 +27,6 @@ vmctl-linux-ppc64le-prod:
vmctl-linux-386-prod:
APP_NAME=vmctl $(MAKE) app-via-docker-linux-386
vmctl-linux-s390x-prod:
APP_NAME=vmctl $(MAKE) app-via-docker-linux-s390x
vmctl-darwin-amd64-prod:
APP_NAME=vmctl $(MAKE) app-via-docker-darwin-amd64

View File

@@ -7,8 +7,6 @@ import (
"math"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@@ -57,7 +55,6 @@ func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
return attempt, err // fail fast if not recoverable
}
attempt++
retriesTotal.Inc()
backoff := float64(b.minDuration) * math.Pow(b.factor, float64(i))
dur := time.Duration(backoff)
logger.Errorf("got error: %s on attempt: %d; will retry in %v", err, attempt, dur)
@@ -77,7 +74,3 @@ func (b *Backoff) Retry(ctx context.Context, cb retryableFunc) (uint64, error) {
}
return attempt, fmt.Errorf("execution failed after %d retry attempts", b.retries)
}
var (
retriesTotal = metrics.NewCounter(`vmctl_backoff_retries_total`)
)

View File

@@ -14,12 +14,6 @@ const (
globalSilent = "s"
globalVerbose = "verbose"
globalDisableProgressBar = "disable-progress-bar"
globalPushMetricsURL = "pushmetrics.url"
globalPushMetricsInterval = "pushmetrics.interval"
globalPushExtraLabels = "pushmetrics.extraLabel"
globalPushHeaders = "pushmetrics.header"
globalPushDisableCompression = "pushmetrics.disableCompression"
)
var (
@@ -39,29 +33,6 @@ var (
Value: false,
Usage: "Whether to disable progress bar during the import.",
},
&cli.StringSliceFlag{
Name: globalPushMetricsURL,
Usage: "Optional URL to push metrics. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics",
},
&cli.DurationFlag{
Name: globalPushMetricsInterval,
Value: 10 * time.Second,
Usage: "Interval for pushing metrics to every -pushmetrics.url",
},
&cli.StringSliceFlag{
Name: globalPushExtraLabels,
Usage: "Extra labels to add to pushed metrics. In case of collision, label value defined by flag will have priority. " +
"Flag can be set multiple times, to add few additional labels. " +
"For example, -pushmetrics.extraLabel='instance=\"foo\"' adds instance=\"foo\" label to all the metrics pushed to every -pushmetrics.url",
},
&cli.StringSliceFlag{
Name: globalPushHeaders,
Usage: "Optional HTTP headers to add to pushed metrics. Flag can be set multiple times, to add few additional headers.",
},
&cli.BoolFlag{
Name: globalPushDisableCompression,
Usage: "Whether to disable compression when pushing metrics.",
},
}
)
@@ -152,32 +123,32 @@ var (
Name: vmExtraLabel,
Value: nil,
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
" will have priority. Flag can be set multiple times, to add few additional labels.",
"will have priority. Flag can be set multiple times, to add few additional labels.",
},
&cli.Int64Flag{
Name: vmRateLimit,
Usage: "Optional data transfer rate limit in bytes per second.\n" +
"By default, the rate limit is disabled. It can be useful for limiting load on configured via '--vm-addr' destination.",
"By default, the rate limit is disabled. It can be useful for limiting load on configured via '--vmAddr' destination.",
},
&cli.StringFlag{
Name: vmCertFile,
Usage: "Optional path to client-side TLS certificate file to use when connecting to '--vm-addr'",
Usage: "Optional path to client-side TLS certificate file to use when connecting to '--vmAddr'",
},
&cli.StringFlag{
Name: vmKeyFile,
Usage: "Optional path to client-side TLS key to use when connecting to '--vm-addr'",
Usage: "Optional path to client-side TLS key to use when connecting to '--vmAddr'",
},
&cli.StringFlag{
Name: vmCAFile,
Usage: "Optional path to TLS CA file to use for verifying connections to '--vm-addr'. By default, system CA is used",
Usage: "Optional path to TLS CA file to use for verifying connections to '--vmAddr'. By default, system CA is used",
},
&cli.StringFlag{
Name: vmServerName,
Usage: "Optional TLS server name to use for connections to '--vm-addr'. By default, the server name from '--vm-addr' is used",
Usage: "Optional TLS server name to use for connections to '--vmAddr'. By default, the server name from '--vmAddr' is used",
},
&cli.BoolFlag{
Name: vmInsecureSkipVerify,
Usage: "Whether to skip tls verification when connecting to '--vm-addr'",
Usage: "Whether to skip tls verification when connecting to '--vmAddr'",
Value: false,
},
&cli.IntFlag{
@@ -497,7 +468,7 @@ var (
Name: vmNativeFilterMatch,
Usage: "Time series selector to match series for export. For example, select {instance!=\"localhost\"} will " +
"match all series with \"instance\" label different to \"localhost\".\n" +
" See more details here https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format",
" See more details here https://github.com/VictoriaMetrics/VictoriaMetrics#how-to-export-data-in-native-format",
Value: `{__name__!=""}`,
},
&cli.StringFlag{
@@ -627,7 +598,7 @@ var (
Name: vmExtraLabel,
Value: nil,
Usage: "Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag" +
" will have priority. Flag can be set multiple times, to add few additional labels.",
"will have priority. Flag can be set multiple times, to add few additional labels.",
},
&cli.Int64Flag{
Name: vmRateLimit,
@@ -654,8 +625,8 @@ var (
&cli.BoolFlag{
Name: vmNativeDisableBinaryProtocol,
Usage: "Whether to use https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-json-line-format " +
"instead of https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format API. " +
"Binary export/import API protocol implies less network and resource usage, as it transfers compressed binary data blocks. " +
"instead of https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-data-in-native-format API." +
"Binary export/import API protocol implies less network and resource usage, as it transfers compressed binary data blocks." +
"Non-binary export/import API is less efficient, but supports deduplication if it is configured on vm-native-src-addr side.",
Value: false,
},
@@ -718,15 +689,15 @@ var (
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'",
Layout: time.RFC3339,
},
&cli.StringSliceFlag{
Name: remoteReadFilterLabel,
Usage: "Prometheus label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
DefaultText: "__name__",
&cli.StringFlag{
Name: remoteReadFilterLabel,
Usage: "Prometheus label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
Value: "__name__",
},
&cli.StringSliceFlag{
Name: remoteReadFilterLabelValue,
Usage: fmt.Sprintf("Prometheus regular expression to filter label from %q flag.", remoteReadFilterLabelValue),
DefaultText: ".*",
&cli.StringFlag{
Name: remoteReadFilterLabelValue,
Usage: fmt.Sprintf("Prometheus regular expression to filter label from %q flag.", remoteReadFilterLabelValue),
Value: ".*",
},
&cli.BoolFlag{
Name: remoteRead,

Some files were not shown because too many files have changed in this diff Show More